Commit bf9c9379 authored by Martin Perdacher's avatar Martin Perdacher

update .csv format

parent 09415979
......@@ -51,8 +51,10 @@ add_executable(hilbertSelfJoinCountOnly ${SOURCE_FILES_SELF})
target_compile_definitions(hilbertJoinCountOnly PRIVATE -DCOUNT_ONLY)
target_compile_definitions(hilbertSelfJoinCountOnly PRIVATE -DCOUNT_ONLY)
# target_compile_definitions(hilbertJoinCountOnly PRIVATE -DOUTPUT)
# target_compile_definitions(hilbertSelfJoinCountOnly PRIVATE -DOUTPUT)
## for a non-verbose version comment out the next two lines
target_compile_definitions(hilbertJoinCountOnly PRIVATE -DOUTPUT)
target_compile_definitions(hilbertSelfJoinCountOnly PRIVATE -DOUTPUT)
if ($ENV{KBLOCK})
target_compile_definitions(hilbertJoinCountOnly PRIVATE -DKBLOCK=$ENV{KBLOCK})
......
......@@ -12,7 +12,7 @@
#include "../util/chrisutil.h"
#include "../util/dataIo.h"
#include "../measure/timer.h"
#include "../measure/energy.h"
// #include "../measure/energy.h"
#include "hilloop.h"
#include <boost/lockfree/queue.hpp>
......
......@@ -8,7 +8,7 @@
#include <math.h>
#include "measure/timer.h"
#include "measure/energy.h"
// #include "measure/energy.h"
#include "util/dataIo.h"
#include "util/chrisutil.h"
#include "util/arguments.h"
......@@ -37,51 +37,49 @@ int main(int argc, char** argv) {
size_t threads=64;
double epsilon = 0.034;
char filename[256] = "";
bool isBinary=false;
CUtilTimer timer, algtimer;
// Hioki pmeter;
size_t result=0l;
int stripes=14;
int actdim=3;
boost::lockfree::queue<join_pair> queue(10000);
double sortTime=0.0, reorderTime=0.0, indexTime=0.0,
double sortTime=0.0, reorderTime=0.0, indexTime=0.0;
double watthours=0.0;
double totaltime=0.0,algtime=0.0;
double loadpercent=0.0;
parsing_args(argc, argv, &n, &epsilon, &d, filename, &isBinary, &actdim);
parsing_args(argc, argv, &n, &epsilon, &d, filename, &actdim);
stripes = ((int)pow(3,actdim) + 1) / 2;
omp_set_num_threads(NUM_THREADS);
int *reorder_dim=(int*) malloc ((d+8)*sizeof(int));
// #ifndef COUNT_ONLY
// printf("COUNT_ONLY is not defined.");
// #endif
//
// #ifdef COUNT_ONLY
// printf("COUNT_ONLY is defined.");
// #endif
// printf("Using %d threads!\n", NUM_THREADS);
// omp_set_num_threads(threads);
// double * array = (double*) mallocA64((n+7)/8*8 * sizeof (double) * d + 16384);
// double * array = (double*) mallocA64(n * sizeof (double) * d + 16384);
// double * array = (double*) ddr_alloc((n+7)/8*8 * sizeof(double) * d + 16384);
double * array = (double*) ddr_alloc(n * sizeof (double) * d + 16384);
// printf("alloc ok\n"); fflush(stdout);
read_file(array, n, d, filename, isBinary);
// printf("readfile ok\n"); fflush(stdout);
for ( int i=0 ; i < 10 ; i++ ){
for ( int j=0 ; j < d ; j++ ){
printf("%f, ", array[i*d+j]);
}
printf("\n");
if ( strcmp(filename,"" ) == 0) {
random_init_unif(array,n,d,1);
// random_init_8_selective(x1,n,d,1);
}else{
read_file(array, n, d, filename);
}
//// dummy output of the data which have been read
// for ( int i=0 ; i < 5 ; i++ ){
// for ( int j=0 ; j < d ; j++ ){
// printf("%f, ", array[i*d+j]);
// }
// printf("\n");
// }
char outfile1[256] = "outfile1.csv";
save_text_file(array, n,d,outfile1);
// pmeter.reset(); pmeter.start();
timer.start();
// reordering dimensions, proposed in
// Dmitri V. Kalashnikov: Super-EGO: fast multi-dimensional similarity join. VLDB J. 22(4): 561-585 (2013)
outputStatistics(n, d, epsilon, array, reorder_dim);
// sampleHistograms(n, d, epsilon, array, reorder_dim);
reorder_dimensions(n, d, array, reorder_dim);
......@@ -89,9 +87,7 @@ int main(int argc, char** argv) {
timer.stop();
reorderTime = timer.get_time();
// test_ego_loop3(n,d,threads,epsilon,array,&result);
// printf("start\n"); fflush(stdout);
// test_ego_loop3_long(n,d,threads,epsilon,array,&result,stripes,KBLOCK);
algtimer.start();
#ifdef COUNT_ONLY
test_ego_loop3_macro(n,d,epsilon,array,&result,stripes,&sortTime,&indexTime,&loadpercent);
......@@ -105,11 +101,11 @@ int main(int argc, char** argv) {
algtimer.stop();
algtime = algtimer.get_time();
// pmeter.stop();
// watthours=pmeter.getWH();
#ifndef COUNT_ONLY
// if we materialize with a non-blocking linked list, then joincounts are zero
#pragma omp parallel for
for ( int i = 0 ; i < threads ; i++ ){
consumer(queue);
......@@ -118,8 +114,9 @@ int main(int argc, char** argv) {
result = consumer_count;
#endif
double jp_per_point = (result == 0 ) ? 0 : (double)result / n ;
// HEADER:
// N;D;JPPP;THREADS;EPSILON;STRIPES;KBLOCK;TIME;ALGTIME;SORTTIME;INDEXTIME;REORDERTIME;COUNTS;LOADPERCENT;WH
printf("N;D;JPPP;THREADS;EPSILON;STRIPES;KBLOCK;TIME;ALGTIME;SORTTIME;INDEXTIME;REORDERTIME;COUNTS;LOADPERCENT;WH\n");
printf("%zu;%zu;%f;%zu;%2.14f;%d;%d;%f;%f;%f;%f;%f;%ld;%f;%f\n", n,d,jp_per_point, NUM_THREADS,epsilon,stripes,KBLOCK,algtime+reorderTime,algtime - sortTime,sortTime,indexTime,reorderTime,result,loadpercent,watthours);
// freeA64(array);
ddr_free(array);
......
......@@ -39,7 +39,6 @@ int main(int argc, char** argv) {
double epsilon = 0.034;
char filename[256] = "";
char filename2[256] = "";
bool isBinary=false;
CUtilTimer timer, algtimer;
// Hioki pmeter;
size_t result=0l;
......@@ -49,7 +48,7 @@ int main(int argc, char** argv) {
double sortTime=0.0, reorderTime=0.0, indexTime=0.0, watthours=0.0,totaltime=0.0,algtime=0.0;
double loadpercent=0.0;
parsing_args_join(argc, argv, &n, &m, &epsilon, &d, filename, filename2, &isBinary,&actdim);
parsing_args_join(argc, argv, &n, &m, &epsilon, &d, filename, filename2,&actdim);
stripes = ((int)pow(3,actdim) + 1) / 2;
omp_set_num_threads(NUM_THREADS);
......@@ -58,12 +57,12 @@ int main(int argc, char** argv) {
double *x1 = (double*) ddr_alloc(n * sizeof (double) * d + 16384);
double *x2 = (double*) ddr_alloc(m * sizeof (double) * d + 16384);
// if ( strcmp(filename,"" ) == 0) {
// // random_init_unif(x1,n,d,1);
// random_init_8_selective(x1,n,d,1);
// }else{
// read_file(x1, n, d, filename, isBinary);
// }
if ( strcmp(filename,"" ) == 0) {
random_init_unif(x1,n,d,1);
// random_init_8_selective(x1,n,d,1);
}else{
read_file(x1, n, d, filename);
}
// char filenameA[256];
// sprintf(filenameA, "selective8_dims_A_%d_%d_normalized.bin", n,d);
......@@ -71,10 +70,10 @@ int main(int argc, char** argv) {
// printf("savedA\n");fflush(stdout);
if ( strcmp(filename2,"" ) == 0) {
// random_init_unif(x2,m,d,2);
random_init_8_selective(x2,m,d,2);
random_init_unif(x2,m,d,2);
// random_init_8_selective(x2,m,d,2);
}else{
read_file(x2, m, d, filename, isBinary);
read_file(x2, m, d, filename);
}
// char filenameB[256];
......@@ -92,6 +91,8 @@ int main(int argc, char** argv) {
// pmeter.reset(); pmeter.start();
timer.start();
// reordering dimensions, proposed in
// Dmitri V. Kalashnikov: Super-EGO: fast multi-dimensional similarity join. VLDB J. 22(4): 561-585 (2013)
outputStatistics(n, d, epsilon, x1, reorder_dim);
// sampleHistograms(n, d, epsilon, array, reorder_dim);
reorder_dimensions(n, d, x1, reorder_dim);
......@@ -133,6 +134,7 @@ int main(int argc, char** argv) {
double jp_per_point = (result == 0 ) ? 0 : (double)result / n ;
// HEADER:
// N;D;JPPP;THREADS;EPSILON;STRIPES;KBLOCK;TIME;ALGTIME;SORTTIME;INDEXTIME;REORDERTIME;COUNTS;LOADPERCENT;WH
printf("N;D;JPPP;THREADS;EPSILON;STRIPES;KBLOCK;TIME;ALGTIME;SORTTIME;INDEXTIME;REORDERTIME;COUNTS;LOADPERCENT;WH\n");
printf("%zu;%zu;%zu;%f;%zu;%2.14f;%d;%d;%f;%f;%f;%f;%f;%ld;%f;%f\n", n,m,d,jp_per_point, NUM_THREADS,epsilon,stripes,KBLOCK,algtime+reorderTime,algtime - sortTime,sortTime,indexTime,reorderTime,result,loadpercent,watthours);
ddr_free(x1);
......
......@@ -3,25 +3,25 @@
#include "arguments.h"
void parsing_args(int argc, char* argv[], size_t *n, double *epsilon, size_t *d, char *filename, bool *isBinary, int *activedims){
void parsing_args(int argc, char* argv[], size_t *n, double *epsilon, size_t *d, char *filename, int *activedims){
char c;
FILE *file;
if ( argc < 5 ){
fprintf (stderr, "There are obligatory parameters.\n");
fprintf (stderr, "Usage: ./egoHilb (or ./egoCano)");
fprintf (stderr, "Usage: ./hilbertSelfJoinCountOnly (or ./egoCano)");
fprintf(stderr, "Obligatory parameters: \n");
fprintf(stderr, "n (number of objects )\ne (epsilon)\nd (dimensionality)\n");
fprintf(stderr, "Optional parameters: \n\n");
fprintf(stderr, "a number of acitve dimensions (default 3)\n");
fprintf(stderr, "f (filename) if there is no filename we use random generated data [0.0, 100.0)\n");
fprintf(stderr, "b use the -b argument without options to specify that it is a binary file.\n");
fprintf(stderr, "Example (with default values): hilbertSelfJoinCountOnly -n 200 -e 0.2 -d 64 -t 64\n");
fprintf(stderr, "f (filename) if there is no filename we use random generated data [0.0, 1.0)\n");
// fprintf(stderr, "b use the -b argument without options to specify that it is a binary file.\n");
fprintf(stderr, "Example (with default values): ./hilbertSelfJoinCountOnly -n 200000 -e 0.2 -d 64 -t 64\n");
exit(1);
}
while ( (c = getopt(argc, argv, "n:e:d:t:f:k:s:b") ) != -1) {
while ( (c = getopt(argc, argv, "n:e:d:t:f:k:s:") ) != -1) {
if ( optarg ){
switch(c){
......@@ -57,9 +57,9 @@ void parsing_args(int argc, char* argv[], size_t *n, double *epsilon, size_t *d,
}
}else{
switch(c){
case 'b':
*isBinary = true;
break;
// case 'b':
// *isBinary = true;
// break;
case '?':
fprintf (stderr, "Unknown option `-%c'.\n", optopt);
exit(1);
......@@ -78,7 +78,7 @@ void parsing_args(int argc, char* argv[], size_t *n, double *epsilon, size_t *d,
}
void parsing_args_join(int argc, char* argv[], size_t *n, size_t *m, double *epsilon, size_t *d, char *filename, char *filename2, bool *isBinary, int *activedims){
void parsing_args_join(int argc, char* argv[], size_t *n, size_t *m, double *epsilon, size_t *d, char *filename, char *filename2, int *activedims){
char c;
FILE *file;
......@@ -92,12 +92,12 @@ void parsing_args_join(int argc, char* argv[], size_t *n, size_t *m, double *eps
fprintf(stderr, "a number of active dimensions (default 3)\n");
fprintf(stderr, "f (filename) if there is no filename we use random generated data [0.0, 1.0)\n");
fprintf(stderr, "g (filename set B) if there is no filename we use random generated data [0.0, 1.0)\n");
fprintf(stderr, "b use the -b argument without options to specify that it is a binary file.\n");
// fprintf(stderr, "b use the -b argument without options to specify that it is a binary file.\n");
fprintf(stderr, "Example (with default values): ./hilbertJoinCountOnly -n 200000 -m 200000 -e 0.2 -d 20 -t 64\n");
exit(1);
}
while ( (c = getopt(argc, argv, "n:m:e:d:t:f:g:k:a:b") ) != -1) {
while ( (c = getopt(argc, argv, "n:m:e:d:t:f:g:k:a:") ) != -1) {
if ( optarg ){
switch(c){
......@@ -139,9 +139,9 @@ void parsing_args_join(int argc, char* argv[], size_t *n, size_t *m, double *eps
}
}else{
switch(c){
case 'b':
*isBinary = true;
break;
// case 'b':
// *isBinary = true;
// break;
case '?':
fprintf (stderr, "Unknown option `-%c'.\n", optopt);
exit(1);
......
......@@ -8,8 +8,8 @@
#include <string.h>
#include <ctype.h>
void parsing_args(int argc, char* argv[], size_t *n, double *epsilon, size_t *d, char *filename, bool *isBinary, int *stripes);
void parsing_args_join(int argc, char* argv[], size_t *n, size_t *m, double *epsilon, size_t *d, char *filename, char *filename2, bool *isBinary, int *stripes);
void parsing_args(int argc, char* argv[], size_t *n, double *epsilon, size_t *d, char *filename, int *stripes);
void parsing_args_join(int argc, char* argv[], size_t *n, size_t *m, double *epsilon, size_t *d, char *filename, char *filename2, int *stripes);
#endif //KMEANS_ARGS_H
......@@ -21,6 +21,8 @@ void random_init_unif(double *array, const int N, const int D, const int INIT_SE
// const int imax = (ME == ALL_THREADS - 1 ) ? N : (N / ALL_THREADS) * (ME+1);
// const int MY_N = imax - imin;
errcode = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, 1 * D, &array[0], LOWER_BOUND, UPPER_BOUND); // avoid first value always 0
errcode = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, N * D, &array[0], LOWER_BOUND, UPPER_BOUND);
if ( errcode != VSL_ERROR_OK && errcode != VSL_STATUS_OK ){
......@@ -49,7 +51,7 @@ void random_init_8_selective(double *array, const int N, const int D, const int
// const int imax = (ME == ALL_THREADS - 1 ) ? N : (N / ALL_THREADS) * (ME+1);
// const int MY_N = imax - imin;
errcode = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, 1 * D, &array[0], LOWER_BOUND, UPPER_BOUND); // first value always 0
errcode = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, 1 * D, &array[0], LOWER_BOUND, UPPER_BOUND); // avoid first value always 0
errcode = vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, N * D, &array[0], LOWER_BOUND, UPPER_BOUND);
......@@ -97,13 +99,24 @@ void random_init(double *array, const int N, const int D){
}
}
void read_file(double *array, const int N, const int D, char filename[], const bool IS_BINARY){
int stringEndsWith(const char *str, const char *suffix)
{
if (!str || !suffix)
return 0;
size_t lenstr = strlen(str);
size_t lensuffix = strlen(suffix);
if (lensuffix > lenstr)
return 0;
return strncmp(str + lenstr - lensuffix, suffix, lensuffix) == 0;
}
void read_file(double *array, const int N, const int D, char filename[]){
FILE *fp;
size_t counts = 0;
size_t i=0,j=0;
char line[MAX_LINE_LENGTH];
char *token=NULL;
const char space[2] = " ";
const char space[2] = ",";
fp = fopen(filename,"r");
......@@ -112,10 +125,10 @@ void read_file(double *array, const int N, const int D, char filename[], const b
exit(1);
}
if ( IS_BINARY ){
// printf("processing binary file!");fflush(stdout);
// read binary file, everything at once
// printf("binary");
if ( stringEndsWith(filename, ".bin" ) ){
#ifdef OUTPUT
printf("reading binary file '%s'\n", filename);
#endif
counts = fread(array, N * D, sizeof(double), fp);
// printf("%dx%d: %d readed\n", N, D, counts);
if ( counts == 0 ) {
......@@ -123,11 +136,13 @@ void read_file(double *array, const int N, const int D, char filename[], const b
exit(1);
}
}else{
printf("not binary");
#ifdef OUTPUT
printf("reading text file '%s'\n", filename);
#endif
// processing a text file
// format: there are D double values each line. Each value is separated by a space character.
// notice MAX_LINE_LENGTH = 2049
// printf("processing text file!");fflush(stdout);
i = 0;
while ( fgets ( line, MAX_LINE_LENGTH, fp ) != NULL &&
i < N ) {
......@@ -188,12 +203,14 @@ void save_text_file(double *array, const int N, const int D, char filename[]){
strcpy(line, "");
for ( j=0 ; j < D ; j++ ){
strcpy(strDouble, "");
sprintf(strDouble, "%f ", array[i*D + j]);
sprintf(strDouble, "%f", array[i*D + j]);
if ( j+1 < D ){
strcat(strDouble, ",");
}
strcat(line, strDouble);
}
fprintf(fp, "%s\n", line);
}
fclose(fp);
......
......@@ -12,10 +12,11 @@
#define MAX_LINE_LENGTH 2049
int stringEndsWith(const char *str, const char *suffix);
void random_init(double *array, const int N, const int D);
void random_init_8_selective(double *array, const int N, const int D, const int INIT_SEED);
void random_init_unif(double *array, const int N, const int D, const int INIT_SEED);
void read_file(double *array, const int N, const int D, char filename[], const bool IS_BINARY);
void read_file(double *array, const int N, const int D, char filename[]);
void save_binary_file(double *array, const int N, const int D, char filename[]);
void save_text_file(double *array, const int N, const int D, char filename[]);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment