diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index ba38fdc4bb7f1b7173d66c647c5ad64077c734a7..ae0ac4130c5a1959db63001343241663ef1ff137 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -13,9 +13,10 @@ set(SOURCE_FILES_JOIN mainJoin.cpp blasJoin/blasJoin.cpp ${UTIL_SOURCES}) set(CMAKE_CXX_FLAGS "-std=c++11") if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=knl -mtune=knl -fpic -ffast-math -DNDEBUG -O3 -DNDDEBUG -fopenmp -lboost_system -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl") + # some issues with GNU, use Intel Compiler instead + # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=skylake-avx512 -fpic -ffast-math -DNDEBUG -O3 -DNDDEBUG -fopenmp -lboost_system -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl") elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xmic-avx512 -qopenmp -DNDEBUG -O3 -lmkl_intel_thread -liomp5 -lpthread -lmkl_core -lmkl_intel_lp64 -lboost_system") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xCore-AVX512 -qopenmp -DNDEBUG -O3 -lmkl_intel_thread -liomp5 -lpthread -lmkl_core -lmkl_intel_lp64 -lboost_system") # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -qopt-report=2") add_definitions(-DDEBUG) endif() diff --git a/cmake/blasJoin/blasJoin.cpp b/cmake/blasJoin/blasJoin.cpp index 905e5fd0482f297e3d8ad9e47fa70ef87774ddda..638723fbf03e5acb67e65d3e36825e65f6371412 100644 --- a/cmake/blasJoin/blasJoin.cpp +++ b/cmake/blasJoin/blasJoin.cpp @@ -47,6 +47,7 @@ void blasSelfJoinCountOnly(const double *x, const size_t N, const size_t D, cons // perform regular matrix multiplication // C := alpha*A*B' + beta*C + // printf("blockRow: %zu, blockCol: %zu, D:%zu\n", blockRow, blockCol, D); cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, blockRow, blockCol, D, 1.0, &x[i*BLOCKSIZE*D], D, &x[j*BLOCKSIZE*D], D, 0.0, iresult, blockCol); /*