Commit cd5affa9 authored by perdacherMartin's avatar perdacherMartin

added file io

parent 392ba5ce
.DS_Store
build/
CMakeCache.txt
CMakeFiles
CMakeScripts
......
......@@ -3,7 +3,7 @@ project(blasMeans)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(SOURCE_FILES main.cpp util/allocation.cpp util/arguments.cpp util/timer.cpp bmeans/blasmeans.cpp)
set(SOURCE_FILES main.cpp util/allocation.cpp util/arguments.cpp util/timer.cpp util/dataIo.cpp bmeans/blasmeans.cpp)
SET(CMAKE_C_COMPILER gcc)
SET(CMAKE_CXX_COMPILER g++)
......
# BlasMeans
Using the BLAS (Basic Linear Algebra System) dgemm operation to perform the calculation of the distances in K-means.
# Prerequisits
1) Set your MKL environment variables
(see [MKL ](https://software.intel.com/en-us/articles/intel-mkl-103-getting-started))
Example:
```
source /opt/intel/mkl/bin/mklvars.sh intel64
```
2) A working CMake installation
(see [CMake website](https://cmake.org/))
# CMake Usage
```
......
......@@ -5,6 +5,7 @@
#include "util/allocation.h"
#include "util/arguments.h"
#include "util/dataIo.h"
#include "bmeans/blasmeans.h"
......@@ -12,12 +13,14 @@ int main(int argc, char** argv) {
long *members = NULL;
double *x = NULL;
CUtilTimer timer;
char filename[] = "";
double elapsed=0.0;
bool isBinary = false;
int threads = 0;
int N=64, K=40, D=20;
parsing_args(argc,argv, &N, &K, &D, &threads);
parsing_args(argc,argv, &N, &K, &D, &threads, filename, isBinary);
if ( threads != 0 ){
omp_set_num_threads(threads);
......@@ -32,8 +35,38 @@ int main(int argc, char** argv) {
random_init(x,n,d);
printf("start:\n");
elapsed = blasMeans( members, x, d, k, n, threads);
for ( int i = 0 ; i < 2 ; i++ ){
for ( int j = 0 ; j < D ; j++ ){
printf("%f, ", x[i*D + j]);
}
printf("\n");
}
char test_file[] = "matrix.txt";
// save_binary_file(x,n,d,test_file);
save_text_file(x,n,d,test_file);
memset(x, 0, sizeof(double) * n * d );
// read_file(x,n,d,test_file, true);
read_file(x,n,d,test_file, false);
printf("\n\n\n\n");
for ( int i = 0 ; i < 2 ; i++ ){
for ( int j = 0 ; j < D ; j++ ){
printf("%f, ", x[i*D + j]);
}
printf("\n");
}
// if ( filename == "" ){
// random_init(x,n,d);
// }else{
// read_file(x, n, d, filename, isBinary);
// }
//
// elapsed = blasMeans( members, x, d, k, n, threads);
printf("%d; %d; %d; %f\n", n,d,k,elapsed);
......
......@@ -16,14 +16,3 @@ void * ddr_alloc(size_t bytes){
void ddr_free(void *ptrs){
_mm_free(ptrs);
}
void random_init(double *array, const int N, const int D){
short unsigned int seed = 3;
int i;
#pragma omp parallel for firstprivate(seed)
for ( i=0 ; i < N * D ; i++ ){
array[i] = erand48(&seed);
}
}
......@@ -2,8 +2,6 @@
#ifndef ALLOCATION_H
#define ALLOCATION_H
#include <omp.h>
#include <stdlib.h>
#include <stdio.h>
#if defined(__INTEL_COMPILER)
......@@ -12,11 +10,9 @@
#include <mm_malloc.h>
#endif
#define ALIGNMENT 64
void * ddr_alloc(size_t bytes);
void ddr_free(void * ptrs);
void random_init(double *array, const int N, const int D);
#endif
......@@ -3,7 +3,7 @@
#include "arguments.h"
void parsing_args(int argc, char* argv[], int *n, int *k, int *d, int *threads){
void parsing_args(int argc, char* argv[], int *n, int *k, int *d, int *threads, char *filename, bool isBinary){
char c;
FILE *file;
......@@ -13,12 +13,14 @@ void parsing_args(int argc, char* argv[], int *n, int *k, int *d, int *threads){
fprintf(stderr, "Obligatory parameters: \n");
fprintf(stderr, "n (number of objects in millions)\nk (number of clusters)\nd (dimensionality)\n");
fprintf(stderr, "Optional parameter: \n t number of threads\n\n");
fprintf(stderr, "Optional parameters: \n t number of threads\n\n");
fprintf(stderr, "f (filename) if there is no filename we use random generated data [0.0, 1.0)\n");
fprintf(stderr, "b use the -b argument without options to specify that it is a binary file.\n");
fprintf(stderr, "Example (with default values): ./blasMeans -n 64 -k 40 -d 20 -t 4\n");
exit(1);
}
while ((c = getopt(argc, argv, "n:k:d:t:")) != -1) {
while ((c = getopt(argc, argv, "bn:k:d:t:f:")) != -1) {
if ( optarg ){
switch(c){
case 'n':
......@@ -33,6 +35,22 @@ void parsing_args(int argc, char* argv[], int *n, int *k, int *d, int *threads){
case 'k':
*k = atoi(optarg);
break;
case 'f':
strcpy(filename, optarg);
break;
case 'b':
isBinary = true;
break;
case '?':
if (optopt == 'c')
fprintf (stderr, "Option -%c requires an argument.\n", optopt);
else if (isprint (optopt))
fprintf (stderr, "Unknown option `-%c'.\n", optopt);
else
fprintf (stderr,
"Unknown option character `\\x%x'.\n",
optopt);
exit(1);
default:
break;
}
......
......@@ -5,7 +5,9 @@
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
void parsing_args(int argc, char* argv[], int *n, int *k, int *d, int *threads);
void parsing_args(int argc, char* argv[], int *n, int *k, int *d, int *threads, char *filename, bool isBinary);
#endif //KMEANS_ARGS_H
#include "dataIo.h"
void random_init(double *array, const int N, const int D){
short unsigned int seed = 3;
int i;
#pragma omp parallel for firstprivate(seed)
for ( i=0 ; i < N * D ; i++ ){
array[i] = erand48(&seed);
}
}
void read_file(double *array, const int N, const int D, char filename[], bool isBinary){
FILE *fp;
size_t counts = 0;
size_t i=0,j=0;
char line[MAX_LINE_LENGTH];
char *token=NULL;
const char space[2] = " ";
fp = fopen(filename,"r");
if ( fp == NULL ){
fprintf(stderr, "File '%s' does not exists!", filename);
exit(1);
}
if ( isBinary ){
// read binary file, everything at once
counts = fread(array, sizeof(double) * N * D, 1, fp);
if ( counts == 0 ) {
fprintf(stderr, "Binary file '%s' could not be read. Wrong format.", filename);
exit(1);
}
}else{
// processing a text file
// format: there are D double values each line. Each value is separated by a space character.
// notice MAX_LINE_LENGTH = 2049
i = 0;
while ( fgets ( line, MAX_LINE_LENGTH, fp ) != NULL &&
i < N ) {
if ( line[0] != '%'){ // ignore '%' comment char
token = strtok(line, space);
j=0;
while ( token != NULL &&
j < D ){
array[i*D + j] = atof(token); // 0.0 if no valid conversion
token = strtok(NULL, space);
j++;
}
i++;
}
}
}
fclose(fp);
}
void save_binary_file(double *array, const int N, const int D, char filename[]){
FILE *fp=NULL;
size_t counts = 0;
fp = fopen(filename, "w");
if ( fp == NULL ){
fprintf(stderr, "Could not open file '%s'!", filename);
exit(1);
}
counts = fwrite(array,sizeof(double) * N * D, 1, fp);
if ( counts == 0 ){
fprintf(stderr, "Error in writing file '%s'. Abort.", filename);
exit(1);
}
fclose(fp);
}
void save_text_file(double *array, const int N, const int D, char filename[]){
FILE *fp=NULL;
size_t counts = 0;
size_t i=0, j=0;
char line[MAX_LINE_LENGTH];
char strDouble[50];
fp = fopen(filename, "w");
if ( fp == NULL ){
fprintf(stderr, "Could not open file '%s'!", filename);
exit(1);
}
for ( i=0 ; i < N ; i++ ){
strcpy(line, "");
for ( j=0 ; j < D ; j++ ){
strcpy(strDouble, "");
sprintf(strDouble, "%f ", array[i*D + j]);
strcat(line, strDouble);
}
fprintf(fp, "%s\n", line);
}
fclose(fp);
}
#ifndef DATA_IO_H
#define DATA_IO_H
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#define MAX_LINE_LENGTH 2049
void random_init(double *array, const int N, const int D);
void read_file(double *array, const int N, const int D, char filename[], bool isBinary);
void save_binary_file(double *array, const int N, const int D, char filename[]);
void save_text_file(double *array, const int N, const int D, char filename[]);
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment