Commit 0662de68 authored by martinp16cs's avatar martinp16cs

added cblas version

parent 6de64ff4
build/
debug/
nohup.out
#include "blasJoin.h"
double blasJoin(double *x, const size_t N, const size_t D, const double EPS, const unsigned int THREADS){
__builtin_assume_aligned(x, 64);
double blasJoin(const double *x, const size_t N, const size_t D, const double EPS, const unsigned int THREADS, const size_t BLOCKSIZE){
assert( BLOCKSIZE < N && BLOCKSIZE < 21000 );
double elapsed=0.0;
CUtilTimer timer;
double *result;
double *iresult = NULL;
// bool result[(N * N )/ 2 - N];
int joinCounts = 0;
if ( NUM_THREADS != 0 ){
omp_set_num_threads(THREADS);
mkl_set_num_threads(THREADS);
}
result = (double*) ddr_alloc(N * N);
// ah this shit does not fit into memory.
// obviously, N=200000 leads to a size of ~298GB.
// TODO: make a boolean matrix as result, and do some shifting operation to
// determine wheater we have a positive or negavite number
double *p = (double*) ddr_alloc(sizeof(double) * N );
const double EPS_SQUARED = (EPS * EPS) / 4;
// pre-compute the values P(i) and P(j)
__assume_aligned(x, ALIGNMENT);
for ( int i = 0 ; i < N ; i++ ){
p[i] = EPS_SQUARED - cblas_ddot(D, &x[i*D], 1, &x[i*D], 1) / 2.0;
}
// intermediate result
iresult = (double*) ddr_alloc(BLOCKSIZE * BLOCKSIZE * sizeof(double));
// divide N into NRC rows and NRC cols of size BLOCKSIZE^2
size_t NRC = (size_t) ceil( (double) N / BLOCKSIZE);
size_t blockRow = 0, blockCol=0;
// test BLAS:
timer.start();
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, N, N, D, 1.0, x, D, x, D, 0.0, result, N);
// lower triangle excluding the main diagonal
__assume_aligned(x, ALIGNMENT);
for ( size_t i = 1 ; i < NRC ; i++ ){
blockRow = ( i + 1 == NRC ) ? N - (BLOCKSIZE * i ) : BLOCKSIZE;
for ( size_t j = 0 ; j < i ; j++ ){
blockCol = ( j + 1 == NRC ) ? N - (BLOCKSIZE * (j + 1) ) : BLOCKSIZE;
// perform regular matrix multiplication
// C := alpha*A*B' + beta*C
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, blockRow, blockCol, D, 1.0, x, D, x, D, 0.0, iresult, blockCol);
int accum=0;
// count join partners on whole matrix
#pragma omp parallel for reduction(+:accum) collapse(2)
for ( int k = 0 ; k < blockRow ; k++ ){
for ( int l =0 ; l < blockCol ; l++ ){
// if (i*BLOCKSIZE + l >= N)
// printf("%d * %d + %d\n", i, BLOCKSIZE, l);
assert(i*BLOCKSIZE+k < N);
assert(j*BLOCKSIZE+l < N);
// if (k*blockCol + l >= BLOCKSIZE * BLOCKSIZE )
// printf("%d * %d + %d, %d, %d\n", k, blockCol, l, blockRow, NRC);
assert(k*blockCol + l < BLOCKSIZE * BLOCKSIZE);
iresult[k*blockCol + l] += ( p[i*BLOCKSIZE+k] + p[j*BLOCKSIZE+l] );
// change sign of double to 0 or 1
accum += ( (long long) iresult[k * blockCol + l] ) >> 63;
// accum += copysign(1.0,iresult[k * blockCol + l]);
}
}
joinCounts += accum;
}
}
// main diagonal
__assume_aligned(x, ALIGNMENT);
for ( size_t i = 0 ; i < NRC ; i++ ){
blockRow = ( i + 1 == NRC ) ? N - (BLOCKSIZE * i ) : BLOCKSIZE;
// perform symmetric matrix multiplication on lower triangle only
// C := alpha*A*A' + beta*C
// printf("%d %d %d\n", i, blockRow, NRC);fflush(stdout);
cblas_dsyrk(CblasRowMajor, CblasLower, CblasNoTrans, blockRow, D, 1.0, x + BLOCKSIZE * D * i, D, 0.0, iresult, blockRow);
int accum=0;
// count join partners on lower triangle
#pragma omp parallel for reduction(+:accum) collapse(2)
for ( int k = 0 ; k < blockRow ; k++ ){
for ( int l =0 ; l < k ; l++ ){
assert(i*BLOCKSIZE + l < N);
assert(i*BLOCKSIZE + k < N);
assert(k*blockCol + l < BLOCKSIZE * BLOCKSIZE);
iresult[k*blockCol + l] += ( p[i*BLOCKSIZE + l] + p[i*BLOCKSIZE+k] );
// change sign of double to 0 or 1
accum += ( (long long) iresult[k * NRC + l] ) >> 63;
// accum += copysign(1.0,iresult[k * blockCol + l]);
}
}
joinCounts += accum;
}
timer.stop();
ddr_free(result);
joinCounts = ((N * N) / 2) - N + joinCounts;
#ifdef DEBUG
printf("joinCounts: %d\n", joinCounts);
#endif
ddr_free(iresult);
ddr_free(p);
return timer.get_time();
}
......@@ -6,12 +6,13 @@
#include <assert.h>
#include <string.h>
#include <omp.h>
#include <math.h>
#include "mkl.h"
#include "../util/allocation.h"
#include "../util/timer.h"
double blasJoin(double *x, const size_t N, const size_t D, const double EPS, const unsigned int THREADS);
double blasJoin(const double *x, const size_t N, const size_t D, const double EPS, const unsigned int THREADS, const size_t BLOCKSIZE);
#endif
......@@ -3,24 +3,24 @@
#include "arguments.h"
void parsing_args(int argc, char* argv[], size_t *n, double *epsilon, size_t *d, size_t *threads, char *filename, bool isBinary){
void parsing_args(int argc, char* argv[], size_t *n, double *epsilon, size_t *d, size_t *threads, size_t *blocksize, char *filename, bool isBinary){
char c;
FILE *file;
if ( argc < 4 ){
fprintf (stderr, "The parameters are obligatory.\n");
if ( argc < 6 ){
fprintf (stderr, "There are obligatory parameters.\n");
fprintf (stderr, "Usage: ./blasJoin ");
fprintf(stderr, "Obligatory parameters: \n");
fprintf(stderr, "n (number of objects in thousands (*1000))\ne (epsilon)\nd (dimensionality)\n");
fprintf(stderr, "n (number of objects in thousands (*1000))\ne (epsilon)\nd (dimensionality)\ns (blocksize)");
fprintf(stderr, "Optional parameters: \n t number of threads\n\n");
fprintf(stderr, "f (filename) if there is no filename we use random generated data [0.0, 100.0)\n");
fprintf(stderr, "b use the -b argument without options to specify that it is a binary file.\n");
fprintf(stderr, "Example (with default values): ./blasMeans -n 200 -e 0.2 -d 20 -t 64\n");
fprintf(stderr, "Example (with default values): ./blasJoin -n 200 -e 0.2 -d 20 -s 5000 -t 64\n");
exit(1);
}
while ((c = getopt(argc, argv, "bn:e:d:t:f:")) != -1) {
while ((c = getopt(argc, argv, "bn:e:d:t:s:f:")) != -1) {
if ( optarg ){
switch(c){
case 'n':
......@@ -41,12 +41,15 @@ void parsing_args(int argc, char* argv[], size_t *n, double *epsilon, size_t *d,
case 'b':
isBinary = true;
break;
case 's':
*blocksize = atol(optarg);
break;
case '?':
if (optopt == 'c')
fprintf (stderr, "Option -%c requires an argument.\n", optopt);
else if (isprint (optopt)){
fprintf (stderr, "Unknown option `-%c'.\n", optopt);
exit(1);
exit(1);
}
else
fprintf (stderr,
......
......@@ -8,6 +8,6 @@
#include <string.h>
#include <ctype.h>
void parsing_args(int argc, char* argv[], size_t *n, double *epsilon, size_t *d, size_t *threads, char *filename, bool isBinary);
void parsing_args(int argc, char* argv[], size_t *n, double *epsilon, size_t *d, size_t *threads, size_t *blocksize, char *filename, bool isBinary);
#endif //KMEANS_ARGS_H
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment