Commit 73213edd authored by perdacherMartin's avatar perdacherMartin

added file io

parent ddea67a4
.DS_Store
build/
CMakeCache.txt
CMakeFiles
CMakeScripts
......
......@@ -3,7 +3,7 @@ project(MKM)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(SOURCE_FILES main.cpp util/allocation.cpp util/arguments.cpp util/timer.cpp mckm/mckm.cpp)
set(SOURCE_FILES main.cpp util/allocation.cpp util/arguments.cpp util/timer.cpp util/dataIo.cpp mckm/mckm.cpp)
SET(CMAKE_C_COMPILER gcc)
SET(CMAKE_CXX_COMPILER g++)
......
......@@ -18,6 +18,7 @@ Finally you can run MKM with its parameters, and with random generated data:
* d (dimensionality of the data)
* t (number of threads)
Example:
```
./MKM -n 64 -k 40 -d 20 -t 4
```
......
......@@ -4,6 +4,7 @@
#include <omp.h>
#include "util/allocation.h"
#include "util/dataIo.h"
#include "util/timer.h"
#include "util/arguments.h"
#include "mckm/mckm.h"
......@@ -13,11 +14,13 @@ int main(int argc, char** argv) {
double *means = NULL;
double *dmatrix = NULL;
CUtilTimer timer;
bool isBinary = false;
char filename[] = "";
int threads = 0;
int N=64, K=40, D=20;
parsing_args(argc,argv, &N, &K, &D, &threads);
parsing_args(argc,argv, &N, &K, &D, &threads, filename, isBinary);
if ( threads != 0 ){
omp_set_num_threads(threads);
......@@ -30,13 +33,16 @@ int main(int argc, char** argv) {
means = (double*) ddr_alloc((k + 3)/4*4 * d * sizeof (double));
dmatrix = (double*) ddr_alloc(n * d * sizeof (double));
if ( filename == "" ){
random_init(dmatrix,n,d);
}else{
read_file(dmatrix, n, d, filename, isBinary);
}
// use first k points for means
memcpy(means, dmatrix, k * d * sizeof (double));
timer.start();
block_km_final_omp( n, k, d, threads, means, dmatrix, 5);
block_km_final_omp( n, k, d, threads, means, dmatrix, 5); // perform MCKM with 5 iterations
timer.stop();
printf("%d; %d; %d; %f\n", n,d,k,timer.get_time());
......
......@@ -9,14 +9,3 @@ void * ddr_alloc(size_t bytes){
void ddr_free(void *ptrs){
_mm_free(ptrs);
}
void random_init(double *array, const int N, const int D){
short unsigned int seed = 3;
int i;
#pragma omp parallel for firstprivate(seed)
for ( i=0 ; i < N * D ; i++ ){
array[i] = erand48(&seed);
}
}
......@@ -17,6 +17,5 @@
void * ddr_alloc(size_t bytes);
void ddr_free(void * ptrs);
void random_init(double *array, const int N, const int D);
#endif
......@@ -3,22 +3,24 @@
#include "arguments.h"
void parsing_args(int argc, char* argv[], int *n, int *k, int *d, int *threads){
void parsing_args(int argc, char* argv[], int *n, int *k, int *d, int *threads, char *filename, bool isBinary){
char c;
FILE *file;
if ( argc < 4 ){
fprintf (stderr, "The parameters are obligatory.\n");
fprintf (stderr, "Usage: ./MKM ");
fprintf (stderr, "Usage: ./blasMeans ");
fprintf(stderr, "Obligatory parameters: \n");
fprintf(stderr, "n (number of objects in millions)\nk (number of clusters)\nd (dimensionality)\n");
fprintf(stderr, "Optional parameter: \n t number of threads\n\n");
fprintf(stderr, "Example (with default values): ./MKM -n 64 -k 40 -d 20 -t 4\n");
fprintf(stderr, "Optional parameters: \n t number of threads\n\n");
fprintf(stderr, "f (filename) if there is no filename we use random generated data [0.0, 1.0)\n");
fprintf(stderr, "b use the -b argument without options to specify that it is a binary file.\n");
fprintf(stderr, "Example (with default values): ./blasMeans -n 64 -k 40 -d 20 -t 4\n");
exit(1);
}
while ((c = getopt(argc, argv, "n:k:d:t:")) != -1) {
while ((c = getopt(argc, argv, "bn:k:d:t:f:")) != -1) {
if ( optarg ){
switch(c){
case 'n':
......@@ -33,6 +35,22 @@ void parsing_args(int argc, char* argv[], int *n, int *k, int *d, int *threads){
case 'k':
*k = atoi(optarg);
break;
case 'f':
strcpy(filename, optarg);
break;
case 'b':
isBinary = true;
break;
case '?':
if (optopt == 'c')
fprintf (stderr, "Option -%c requires an argument.\n", optopt);
else if (isprint (optopt))
fprintf (stderr, "Unknown option `-%c'.\n", optopt);
else
fprintf (stderr,
"Unknown option character `\\x%x'.\n",
optopt);
exit(1);
default:
break;
}
......
......@@ -5,7 +5,9 @@
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
void parsing_args(int argc, char* argv[], int *n, int *k, int *d, int *threads);
void parsing_args(int argc, char* argv[], int *n, int *k, int *d, int *threads, char *filename, bool isBinary);
#endif //KMEANS_ARGS_H
#include "dataIo.h"
void random_init(double *array, const int N, const int D){
short unsigned int seed = 3;
int i;
#pragma omp parallel for firstprivate(seed)
for ( i=0 ; i < N * D ; i++ ){
array[i] = erand48(&seed);
}
}
void read_file(double *array, const int N, const int D, char filename[], bool isBinary){
FILE *fp;
size_t counts = 0;
size_t i=0,j=0;
char line[MAX_LINE_LENGTH];
char *token=NULL;
const char space[2] = " ";
fp = fopen(filename,"r");
if ( fp == NULL ){
fprintf(stderr, "File '%s' does not exists!", filename);
exit(1);
}
if ( isBinary ){
// read binary file, everything at once
counts = fread(array, sizeof(double) * N * D, 1, fp);
if ( counts == 0 ) {
fprintf(stderr, "Binary file '%s' could not be read. Wrong format.", filename);
exit(1);
}
}else{
// processing a text file
// format: there are D double values each line. Each value is separated by a space character.
// notice MAX_LINE_LENGTH = 2049
i = 0;
while ( fgets ( line, MAX_LINE_LENGTH, fp ) != NULL &&
i < N ) {
if ( line[0] != '%'){ // ignore '%' comment char
token = strtok(line, space);
j=0;
while ( token != NULL &&
j < D ){
array[i*D + j] = atof(token); // 0.0 if no valid conversion
token = strtok(NULL, space);
j++;
}
i++;
}
}
}
fclose(fp);
}
void save_binary_file(double *array, const int N, const int D, char filename[]){
FILE *fp=NULL;
size_t counts = 0;
fp = fopen(filename, "w");
if ( fp == NULL ){
fprintf(stderr, "Could not open file '%s'!", filename);
exit(1);
}
counts = fwrite(array,sizeof(double) * N * D, 1, fp);
if ( counts == 0 ){
fprintf(stderr, "Error in writing file '%s'. Abort.", filename);
exit(1);
}
fclose(fp);
}
void save_text_file(double *array, const int N, const int D, char filename[]){
FILE *fp=NULL;
size_t counts = 0;
size_t i=0, j=0;
char line[MAX_LINE_LENGTH];
char strDouble[50];
fp = fopen(filename, "w");
if ( fp == NULL ){
fprintf(stderr, "Could not open file '%s'!", filename);
exit(1);
}
for ( i=0 ; i < N ; i++ ){
strcpy(line, "");
for ( j=0 ; j < D ; j++ ){
strcpy(strDouble, "");
sprintf(strDouble, "%f ", array[i*D + j]);
strcat(line, strDouble);
}
fprintf(fp, "%s\n", line);
}
fclose(fp);
}
#ifndef DATA_IO_H
#define DATA_IO_H
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#define MAX_LINE_LENGTH 2049
void random_init(double *array, const int N, const int D);
void read_file(double *array, const int N, const int D, char filename[], bool isBinary);
void save_binary_file(double *array, const int N, const int D, char filename[]);
void save_text_file(double *array, const int N, const int D, char filename[]);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment