Commit 049ac6fc authored by Martin Perdacher's avatar Martin Perdacher

added non-blocking l-list

parent 5333e66b
cmake_minimum_required(VERSION 3.6)
project(blasJoin)
set(UTIL_SOURCES util/timer.cpp util/dataIo.cpp util/arguments.cpp util/allocation.cpp)
set(UTIL_SOURCES util/dataIo.cpp util/arguments.cpp util/allocation.cpp measure/energy.cpp measure/papicalls.cpp measure/timer.cpp)
set(SOURCE_FILES main.cpp blasJoin/blasJoin.cpp ${UTIL_SOURCES})
#####################
# build type: Release
#####################
# NDDEBUG turns off asserts
set(CMAKE_CXX_FLAGS "-std=c++11 ")
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=knl -mtune=knl -fpic -ffast-math -DNDEBUG -O3 -DNDDEBUG -fopenmp -lmkl_core -lmkl_intel_lp64 -lmkl_intel_thread")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=knl -mtune=knl -fpic -ffast-math -DNDEBUG -O3 -DNDDEBUG -fopenmp -lmkl_core -lmkl_intel_lp64 -lmkl_intel_thread -liomp5 -lboost_system")
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xmic-avx512 -qopenmp -qopt-report=2 -DNDEBUG -O3 -liomp5 -lpthread -lmkl_core -lmkl_intel_lp64 -lmkl_intel_thread")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xmic-avx512 -qopenmp -DNDEBUG -O3 -lmkl_intel_thread -liomp5 -lpthread -lmkl_core -lmkl_intel_lp64 -lboost_system")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -qopt-report=2")
# xeon phi (knl) specific:
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lmemkind")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lmemkind")
add_definitions(-DDEBUG)
endif()
......@@ -24,11 +28,12 @@ add_definitions(-DNUM_THREADS=64)
####################
# cmake -D CMAKE_BUILD_TYPE:STRING=Debug ..
# and ignore the warning: "Manually-specified variables were not used by the project: CMAKE_BUID_TYPE"
set(CMAKE_CXX_FLAGS_DEBUG "-std=c++11")
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set(CMAKE_CXX_FLAGS_DEBUG "-std=c++11 -march=knl -mtune=knl -fpic -ffast-math -O0 -fopenmp")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -march=knl -mtune=knl -fpic -ffast-math -O0 -fopenmp -lboost_system")
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
set(CMAKE_CXX_FLAGS_DEBUG "-std=c++11 -xmic-avx512 -fpic -qopenmp -axCOMMON-AVX512 -lmemkind -lmkl_core -lmkl_intel_lp64 -lmkl_intel_thread -liomp5 -lpthread -g -debug all -save-temps -Wl, -O0 -fstack-security-check")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -xmic-avx512 -fpic -qopenmp -axCOMMON-AVX512 -lmemkind -lmkl_core -lmkl_intel_lp64 -lmkl_intel_thread -liomp5 -lpthread -g -debug all -save-temps -Wl, -O0 -fstack-security-check -lboost_system")
endif()
# adding MKL include directory
......@@ -37,8 +42,10 @@ include_directories($ENV{MKLROOT}/include)
# xeon-phi
link_directories($ENV{MKLROOT}/lib/intel64)
include_directories($ENV{BOOST_HOME}/include/boost)
link_directories($ENV{BOOST_HOME}/lib)
# libraries
# mkl libraries
find_library ( mkl_lp64_LIB NAMES libmkl_intel_lp64.a
PATHS $ENV{MKLROOT} PATH_SUFFIXES lib)
find_library ( mkl_core_LIB NAMES libmkl_core.a
......@@ -48,5 +55,14 @@ find_library ( mkl_thread_LIB NAMES libmkl_intel_thread.a
find_library ( mkl_omp_LIB NAMES libiomp5.a
PATHS $ENV{MKLROOT} PATH_SUFFIXES lib)
# papi library
set(PAPI_PREFIX "/usr/local")
find_library(PAPI_LIBRARIES
# Pick the static library first for easier run-time linking.
NAMES libpapi.a papi PATHS ${PAPI_PREFIX}/lib
)
add_executable(blasJoin ${SOURCE_FILES})
add_definitions(-DCOUNT_ONLY)
add_executable(blasJoinCountOnly ${SOURCE_FILES})
target_link_libraries(blasJoin ${PAPI_LIBRARIES})
......@@ -2,13 +2,12 @@
#include "blasJoin.h"
/* returns energy consumption [watthours] */
double blasJoin(const double *x, const size_t N, const size_t D, const double EPS, const unsigned int THREADS, const size_t BLOCKSIZE, size_t *joinCounts, CounterBin * hwcounters){
void blasJoin(const double *x, const size_t N, const size_t D, const double EPS, const unsigned int THREADS, const size_t BLOCKSIZE, size_t *joinCounts, CounterBin * hwcounters, boost::lockfree::queue<join_pair> &jpartners){
assert( BLOCKSIZE < N && BLOCKSIZE < 21000 && BLOCKSIZE > 1);
double elapsed=0.0;
// CUtilTimer timer;
Hioki energy_consume;
PapiBin papi_bin;
double *iresult = NULL;
......@@ -23,7 +22,6 @@ double blasJoin(const double *x, const size_t N, const size_t D, const double EP
iresult = (double*) ddr_alloc(BLOCKSIZE * BLOCKSIZE * sizeof(double));
// timer.start();
energy_consume.reset(); energy_consume.start();
papi_bin.start();
const double EPS_SQUARED = (EPS * EPS) / 4.0 ;
......@@ -64,6 +62,14 @@ double blasJoin(const double *x, const size_t N, const size_t D, const double EP
* count only join-partners (positive ones)
*/
accum += - ( (long long) floor( - ( iresult[k * BLOCKSIZE + l] + p[i*BLOCKSIZE+k] + p[j*BLOCKSIZE+l] ) ) >> 63);
#ifndef COUNT_ONLY
if ( iresult[k * BLOCKSIZE + l] + p[i*BLOCKSIZE+k] + p[j*BLOCKSIZE+l] < 0 ){
join_pair p;
p.p1 = i * BLOCKSIZE + k;
p.p2 = j * BLOCKSIZE + l;
while ( ! jpartners.push(p) );
}
#endif
}
}
......@@ -89,6 +95,14 @@ double blasJoin(const double *x, const size_t N, const size_t D, const double EP
for ( int k = 1 ; k < blockRow ; k++ ){
for ( int l = 0 ; l < k ; l++ ){
accum += - ( (long long) floor( - ( iresult[k * BLOCKSIZE + l] + p[i*BLOCKSIZE+k] + p[i*BLOCKSIZE+l] ) ) >> 63);
#ifndef COUNT_ONLY
if ( iresult[k * BLOCKSIZE + l] + p[i*BLOCKSIZE+k] + p[i*BLOCKSIZE+l] < 0 ){
join_pair p;
p.p1 = i * BLOCKSIZE + k;
p.p2 = i * BLOCKSIZE + l;
while (!jpartners.push(p));
}
#endif
}
}
*joinCounts += accum;
......@@ -97,11 +111,8 @@ double blasJoin(const double *x, const size_t N, const size_t D, const double EP
// timer.stop();
papi_bin.stop();
energy_consume.stop();
*hwcounters = papi_bin.getBin();
ddr_free(iresult);
ddr_free(p);
return energy_consume.getWH();
}
......@@ -16,7 +16,14 @@
#include "../measure/papicalls.h"
#include "../util/dataIo.h"
#include <boost/lockfree/queue.hpp>
#include <boost/atomic.hpp>
double blasJoin(const double *x, const size_t N, const size_t D, const double EPS, const unsigned int THREADS, const size_t BLOCKSIZE, size_t *joinCounts, CounterBin * hwcounters);
struct join_pair {
size_t p1;
size_t p2;
};
void blasJoin(const double *x, const size_t N, const size_t D, const double EPS, const unsigned int THREADS, const size_t BLOCKSIZE, size_t *joinCounts, CounterBin * hwcounters, boost::lockfree::queue<join_pair> &jpartners);
#endif
// https://software.intel.com/en-us/forums/intel-c-compiler/topic/776876
#include <stdio.h>
#include <omp.h>
#include <boost/lockfree/queue.hpp>
#include <boost/atomic.hpp>
#include "util/allocation.h"
#include "util/arguments.h"
#include "util/dataIo.h"
#include "blasJoin/blasJoin.h"
#include "measure/energy.h"
int main(int argc, char** argv) {
CounterBin hwcounters;
......@@ -21,55 +27,29 @@ int main(int argc, char** argv) {
parsing_args(argc, argv, &N, &EPS, &D, &threads, &blocksize, filename, &isBinary);
N = N * 1000;
if ( threads != 0 ){
omp_set_num_threads(threads);
}
x = (double*) ddr_alloc(sizeof (double)*N * D);
// size_t threads = 1;
// size_t N=10, D=3;
// size_t blocksize=2, joinCounts=0;
// double EPS=0.3;
// double x[30] = { 1.0000, 2.0000, 3.0000,
// 1.1000, 2.2000, 3.0000,
// 5.0000, 6.0000, 7.0000,
// 5.1000, 6.2000, 7.1000,
// 1.0000, 4.8000, 6.3000,
// 1.0000, 14.8000, 1.3000,
// 3.0000, 17.0000, 2.0000,
// 14.000, 13.2000, 1.9000,
// 14.000, 1.00000, 15.000,
// 1.000, 1.000, 1.10000
// };
boost::lockfree::queue<join_pair> queue(10000);
x = (double*) ddr_alloc(sizeof (double)* N * D);
if ( strcmp(filename,"" ) == 0) {
// printf("random_init\n");
random_init(x,N,D);
}else{
// printf("read_file\n");
read_file(x, N, D, filename, isBinary);
}
// printf("# name: A\n");
// printf("# type: matrix\n");
// printf("# rows: 8000\n");
// printf("# columns: 64\n");
//
// for ( int i = N - 8000 ; i < N ; i++ ){
// for ( int j = 0 ; j < D ; j++ ){
// printf("%f ", x[i*D + j]);
// }
// printf("\n");
// }
blasJoin( x, N, D, EPS, threads, blocksize, &joinCounts, &hwcounters, queue);
watthours = blasJoin( x, N, D, EPS, threads, blocksize, &joinCounts, &hwcounters);
#pragma omp parallel
{
if ( omp_get_thread_num() == 0 ){
printf("%ld; %ld; %f; %ld; %d; %f; %lu; %lld; %f\n", N, D, EPS, blocksize, omp_get_num_threads(), hwcounters.rtime, joinCounts, hwcounters.l1, watthours);
printf("%ld;%ld;%f;%ld;%d;%f;%lu;%f\n", N, D, EPS, blocksize, omp_get_num_threads(), hwcounters.rtime, joinCounts, hwcounters.whours);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment