Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
hilbertJoin
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Martin Perdacher
hilbertJoin
Commits
3217279a
Commit
3217279a
authored
Apr 25, 2019
by
Martin Perdacher
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
gcc v7.3 bugfix
parent
380c9204
Pipeline
#543
passed with stage
in 2 minutes and 34 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
24 additions
and
24 deletions
+24
-24
CMakeLists.txt
CMakeLists.txt
+1
-1
hilbertjoin/egojoin.cpp
hilbertjoin/egojoin.cpp
+16
-16
hilbertjoin/egojoin.h
hilbertjoin/egojoin.h
+3
-3
main.cpp
main.cpp
+2
-2
mainJoin.cpp
mainJoin.cpp
+2
-2
No files found.
CMakeLists.txt
View file @
3217279a
...
...
@@ -21,7 +21,7 @@ include_directories($ENV{MKLROOT}/include)
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -march=knl -mtune=knl -ffast-math -fassociative-math -O3 -fopenmp -lmkl_core -lmkl_intel_lp64 -lmkl_intel_thread -liomp5 -lboost_system")
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-std=c++11 -march=knl -mtune=knl -ffast-math -fassociative-math -O3 -fopenmp -l
mkl_core -lmkl_intel_lp64 -lmkl_intel_thread -liomp5
"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-std=c++11 -march=knl -mtune=knl -ffast-math -fassociative-math -O3 -fopenmp -l
boost_system -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl
"
)
# xeon-phi
link_directories
(
$ENV{MKLROOT}/lib/intel64
)
...
...
hilbertjoin/egojoin.cpp
View file @
3217279a
...
...
@@ -61,12 +61,12 @@ void sampleHistograms(int n, int d, double epsilon, double *array, int *reorder_
for
(
int
j
=
0
;
j
<
sizes
[
i
];
j
++
)
{
costref
[
i
]
+=
histos
[
h
]
*
(
histos
[
h
]
-
1
)
/
2
+
(
j
>
0
?
histos
[
h
-
1
]
*
histos
[
h
]
:
0
);
#ifdef OUTPUT
printf
(
"%ld "
,
histos
[
h
]);
printf
(
"%l
l
d "
,
histos
[
h
]);
#endif
h
++
;
}
#ifdef OUTPUT
printf
(
" => %ld
\n
"
,
costref
[
i
]);
printf
(
" => %l
l
d
\n
"
,
costref
[
i
]);
#endif
}
int
*
reorder_rev
=
(
int
*
)
malloc
((
d
+
8
)
*
sizeof
(
int
));
...
...
@@ -75,7 +75,7 @@ void sampleHistograms(int n, int d, double epsilon, double *array, int *reorder_
qsort
(
reorder_rev
,
d
,
sizeof
(
int
),
cmp_reorder_dim
);
#ifdef OUTPUT
for
(
int
j
=
0
;
j
<
d
+
8
;
j
++
)
printf
(
"%2d %2d %ld
\n
"
,
j
,
reorder_rev
[
j
],
j
<
d
?
costref
[
reorder_rev
[
j
]]
:
0
);
printf
(
"%2d %2d %l
l
d
\n
"
,
j
,
reorder_rev
[
j
],
j
<
d
?
costref
[
reorder_rev
[
j
]]
:
0
);
#endif
// reorder_dim = (int*) malloc((d + 8) * sizeof (int));
for
(
int
j
=
0
;
j
<
d
+
8
;
j
++
)
...
...
@@ -108,7 +108,7 @@ int test_ego_loop(size_t n, size_t d, double epsilon, double *array, long long *
EGO_END
#ifdef OUTPUT
printf
(
"count of join partners: %d
\n
"
,
iresult
);
printf
(
"count of join partners: %
ll
d
\n
"
,
iresult
);
#endif
*
result
=
iresult
;
...
...
@@ -135,14 +135,14 @@ int test_ego_loop3(size_t n, size_t d, double epsilon, double *array, long long
-
_mm512_srli_epi64
(
_mm512_castpd_si512
(
sum8
),
63
)
;
}
EGO_CONSOLIDATE
{
iresult
+=
_mm512_reduce_add_epi64
(
resultvec
);
iresult
+=
_
custom_
mm512_reduce_add_epi64
(
resultvec
);
// double testres[8] __attribute__((aligned(64)));
// _mm512_store_epi64(testres, resultvec);
// printf("par = %d: %d %d\n", par, result, testres[0]+testres[1]+testres[2]+testres[3]+testres[4]+testres[5]+testres[6]+testres[7]);
}
EGO_END_TRAN
#ifdef OUTPUT
printf
(
"result %d
\n
"
,
iresult
);
printf
(
"result %
ll
d
\n
"
,
iresult
);
#endif
*
result
=
iresult
;
}
...
...
@@ -464,7 +464,7 @@ void prepareStripes(size_t n, size_t d, int numStripes, double epsilon, double *
// }
}
static
inline
long
long
_mm512_reduce_add_epi64
(
__m512i
a
){
static
inline
long
long
_
custom_
mm512_reduce_add_epi64
(
__m512i
a
){
// __m256i low = _mm512_cvtepi64_epi32(a);
// low = _mm256_hadd_epi32(low, low);
// __m128i ulow = _mm_hadd_epi32(_mm256_castsi256_si128(low),_mm256_castsi256_si128(low));
...
...
@@ -565,10 +565,10 @@ void outputStatistics(int n, int d, double epsilon, double *array, int *reorder_
ref
+=
histo
[
i
-
1
]
*
histo
[
i
];
costref
[
j
]
=
ref
;
#ifdef OUTPUT
printf
(
"%3d %8d %8d %8d %20l
d [%
ld"
,
j
,
(
int
)
first_int
,
(
int
)
last_int
,
size
,
ref
,
histo
[
0
]);
printf
(
"%3d %8d %8d %8d %20l
ld [%l
ld"
,
j
,
(
int
)
first_int
,
(
int
)
last_int
,
size
,
ref
,
histo
[
0
]);
for
(
int
i
=
1
;
i
<
size
&&
i
<
10
;
i
++
)
printf
(
", %ld"
,
histo
[
i
]);
printf
(
", %l
l
d"
,
histo
[
i
]);
if
(
size
>
10
)
printf
(
", ...] %d %d
\n
"
,
testcount
,
testcount2
);
else
...
...
@@ -583,7 +583,7 @@ void outputStatistics(int n, int d, double epsilon, double *array, int *reorder_
qsort
(
reorder_rev
,
d
,
sizeof
(
int
),
cmp_reorder_dim
);
#ifdef OUTPUT
for
(
int
j
=
0
;
j
<
d
+
8
;
j
++
)
printf
(
"%2d %2d %ld
\n
"
,
j
,
reorder_rev
[
j
],
j
<
d
?
costref
[
reorder_rev
[
j
]]
:
0
);
printf
(
"%2d %2d %l
l
d
\n
"
,
j
,
reorder_rev
[
j
],
j
<
d
?
costref
[
reorder_rev
[
j
]]
:
0
);
#endif
// reorder_dim = (int*) malloc ((d+8)*sizeof(int));
for
(
int
j
=
0
;
j
<
d
+
8
;
j
++
)
...
...
@@ -793,7 +793,7 @@ void test_ego_loop3_macro(size_t n, size_t d, double epsilon, double *array, siz
*
sortTime
=
sortTimer
.
get_time
();
// printf("timestamp index ready %6.2f\n",timestamp()-starttimestamp);
#ifdef OUTPUT
printf
(
"overall_load: %l
d / %
ld (=n*(n-1)/2 / 64) ==> %f
\n
"
,
overall_load
,
(
long
long
)
n
/
128
*
(
n
-
1
),
(
double
)
overall_load
/
n
/
(
n
-
1
)
*
128
);
printf
(
"overall_load: %l
ld / %l
ld (=n*(n-1)/2 / 64) ==> %f
\n
"
,
overall_load
,
(
long
long
)
n
/
128
*
(
n
-
1
),
(
double
)
overall_load
/
n
/
(
n
-
1
)
*
128
);
#endif
*
loadpercent
=
(
double
)
overall_load
/
n
/
(
n
-
1
)
*
128
;
#pragma omp parallel for reduction(+:result) reduction(+:refinements)
...
...
@@ -813,7 +813,7 @@ void test_ego_loop3_macro(size_t n, size_t d, double epsilon, double *array, siz
refineload
++
;
}
EGO_CONSOLIDATE
{
result
+=
_mm512_reduce_add_epi64
(
resultvec
);
result
+=
_
custom_
mm512_reduce_add_epi64
(
resultvec
);
refinements
+=
refineload
;
int
curload
=
0
;
for
(
int
i
=
loadstart
[
par
]
;
i
<
loadstart
[
par
+
1
]
;
i
++
)
...
...
@@ -821,7 +821,7 @@ void test_ego_loop3_macro(size_t n, size_t d, double epsilon, double *array, siz
curload
+=
upper
[
s
][
i
+
nn
/
8
]
-
lower
[
s
][
i
+
nn
/
8
];
total_timer
.
stop
();
#ifdef OUTPUT
printf
(
"Consolidate %6.2f %d %d %d %d %d %l
d %
ld
\n
"
,
total_timer
.
get_time
(),
par
,
omp_get_thread_num
(),
loadstart
[
par
],
loadstart
[
par
+
1
]
-
loadstart
[
par
],
curload
,
refineload
,
result
);
printf
(
"Consolidate %6.2f %d %d %d %d %d %l
ld %l
ld
\n
"
,
total_timer
.
get_time
(),
par
,
omp_get_thread_num
(),
loadstart
[
par
],
loadstart
[
par
+
1
]
-
loadstart
[
par
],
curload
,
refineload
,
result
);
#endif
// double testres[8] __attribute__((aligned(64)));
...
...
@@ -881,7 +881,7 @@ void test_ego_loop3_noself(const size_t nA, const size_t nB, const int d, const
refineload
++
;
}
EGO_CONSOLIDATE
{
result
+=
_mm512_reduce_add_epi64
(
resultvec
);
result
+=
_
custom_
mm512_reduce_add_epi64
(
resultvec
);
refinements
+=
refineload
;
int
curload
=
0
;
for
(
int
i
=
loadstart
[
par
]
;
i
<
loadstart
[
par
+
1
]
;
i
++
)
...
...
@@ -889,7 +889,7 @@ void test_ego_loop3_noself(const size_t nA, const size_t nB, const int d, const
curload
+=
upper
[
s
][
i
+
nn
/
8
]
-
lower
[
s
][
i
+
nn
/
8
];
total_timer
.
stop
();
#ifdef OUTPUT
printf
(
"Consolidate %6.2f %d %d %d %d %d %l
d %
ld
\n
"
,
timestamp
()
-
starttimestamp
,
par
,
omp_get_thread_num
(),
loadstart
[
par
],
loadstart
[
par
+
1
]
-
loadstart
[
par
],
curload
,
refineload
,
result
);
printf
(
"Consolidate %6.2f %d %d %d %d %d %l
ld %l
ld
\n
"
,
timestamp
()
-
starttimestamp
,
par
,
omp_get_thread_num
(),
loadstart
[
par
],
loadstart
[
par
+
1
]
-
loadstart
[
par
],
curload
,
refineload
,
result
);
#endif
// double testres[8] __attribute__((aligned(64)));
...
...
@@ -904,7 +904,7 @@ void test_ego_loop3_noself(const size_t nA, const size_t nB, const int d, const
// for(int par=0 ; par<NUM_THREADS ; par++, printf("\n"))
// for(int s=0 ; s<5 ; s++)
// printf("%ld ",savedload[NUM_THREADS*s+par]);
// printf("%ld ",savedload[NUM_THREADS*s+par]);
}
...
...
hilbertjoin/egojoin.h
View file @
3217279a
...
...
@@ -49,7 +49,7 @@ void epsilonGridCompleteListMax(size_t n, int *list);
void
epsilonGridCompleteListMin
(
size_t
n
,
int
*
list
);
static
inline
void
transpose_8xd
(
size_t
n
,
size_t
d
,
double
*
EGO_array
);
void
prepareStripes
(
size_t
n
,
size_t
d
,
int
numStripes
,
double
epsilon
,
double
*
array
,
int
**
lower
,
int
**
upper
,
double
*
self
);
static
inline
long
long
_mm512_reduce_add_epi64
(
__m512i
a
);
static
inline
long
long
_
custom_
mm512_reduce_add_epi64
(
__m512i
a
);
static
inline
void
transpose_dx8
(
size_t
n
,
size_t
d
,
double
*
EGO_array
);
void
omp_qsort
(
void
*
l
,
size_t
num
,
size_t
size
,
int
(
*
compar
)(
const
void
*
,
const
void
*
));
// void test_ego_loop3_macro(size_t n, size_t d, size_t NUM_THREADS, double epsilon, double *array, size_t *countresult, int stripes, int KBLOCK, double *sorttime);
...
...
@@ -356,7 +356,7 @@ extern long long * costref;
allind += _mm512_srli_epi64(_mm512_castpd_si512(sum6), 63);\
allind += _mm512_srli_epi64(_mm512_castpd_si512(sum7), 63);\
allind += _mm512_srli_epi64(_mm512_castpd_si512(sum8), 63);\
if(_mm512_reduce_add_epi64(allind) >= 64) {k=d+1; break;}\
if(_
custom_
mm512_reduce_add_epi64(allind) >= 64) {k=d+1; break;}\
vi = _mm512_load_pd(self + (i * EGO_blocks + k/KBLOCK) * 8);\
vj = _mm512_load_pd(self + (j * EGO_blocks + k/KBLOCK) * 8);\
sum1 += vi + _mm512_permutexvar_pd(const0, vj);\
...
...
@@ -527,7 +527,7 @@ extern long long * costref;
allind += _mm512_srli_epi64(_mm512_castpd_si512(sum6), 63);\
allind += _mm512_srli_epi64(_mm512_castpd_si512(sum7), 63);\
allind += _mm512_srli_epi64(_mm512_castpd_si512(sum8), 63);\
if(_mm512_reduce_add_epi64(allind) >= 64) {k=d+1; break;}\
if(_
custom_
mm512_reduce_add_epi64(allind) >= 64) {k=d+1; break;}\
vi = _mm512_load_pd(selfA + (i * EGO_blocks + k/KBLOCK) * 8);\
vj = _mm512_load_pd(selfB + (j * EGO_blocks + k/KBLOCK) * 8);\
sum1 += vi + _mm512_permutexvar_pd(const0, vj);\
...
...
main.cpp
View file @
3217279a
...
...
@@ -118,8 +118,8 @@ int main(int argc, char** argv) {
double
jp_per_point
=
(
result
==
0
)
?
0
:
(
double
)
result
/
n
;
// HEADER:
printf
(
"N;D;JPPP;THREADS;EPSILON;STRIPES;KBLOCK;TIME;ALGTIME;SORTTIME;INDEXTIME;REORDERTIME;COUNTS;LOADPERCENT;WH
\n
"
);
printf
(
"%zu;%zu;%f;%
zu;%2.14f;%d;%d;%f;%f;%f;%f;%f;%ld
;%f;%f
\n
"
,
n
,
d
,
jp_per_point
,
NUM_THREADS
,
epsilon
,
stripes
,
KBLOCK
,
algtime
+
reorderTime
,
algtime
-
sortTime
,
sortTime
,
indexTime
,
reorderTime
,
result
,
loadpercent
,
watthours
);
//
printf("N;D;JPPP;THREADS;EPSILON;STRIPES;KBLOCK;TIME;ALGTIME;SORTTIME;INDEXTIME;REORDERTIME;COUNTS;LOADPERCENT;WH\n");
printf
(
"%zu;%zu;%f;%
d;%2.14f;%d;%d;%f;%f;%f;%f;%f;%zu
;%f;%f
\n
"
,
n
,
d
,
jp_per_point
,
NUM_THREADS
,
epsilon
,
stripes
,
KBLOCK
,
algtime
+
reorderTime
,
algtime
-
sortTime
,
sortTime
,
indexTime
,
reorderTime
,
result
,
loadpercent
,
watthours
);
// freeA64(array);
ddr_free
(
array
);
free
(
reorder_dim
);
...
...
mainJoin.cpp
View file @
3217279a
...
...
@@ -135,8 +135,8 @@ int main(int argc, char** argv) {
double
jp_per_point
=
(
result
==
0
)
?
0
:
(
double
)
result
/
n
;
// HEADER:
// N;D;JPPP;THREADS;EPSILON;STRIPES;KBLOCK;TIME;ALGTIME;SORTTIME;INDEXTIME;REORDERTIME;COUNTS;LOADPERCENT;WH
printf
(
"N;D;JPPP;THREADS;EPSILON;STRIPES;KBLOCK;TIME;ALGTIME;SORTTIME;INDEXTIME;REORDERTIME;COUNTS;LOADPERCENT;WH
\n
"
);
printf
(
"%zu;%zu;%zu;%f;%
zu
;%2.14f;%d;%d;%f;%f;%f;%f;%f;%ld;%f;%f
\n
"
,
n
,
m
,
d
,
jp_per_point
,
NUM_THREADS
,
epsilon
,
stripes
,
KBLOCK
,
algtime
+
reorderTime
,
algtime
-
sortTime
,
sortTime
,
indexTime
,
reorderTime
,
result
,
loadpercent
,
watthours
);
//
printf("N;D;JPPP;THREADS;EPSILON;STRIPES;KBLOCK;TIME;ALGTIME;SORTTIME;INDEXTIME;REORDERTIME;COUNTS;LOADPERCENT;WH\n");
printf
(
"%zu;%zu;%zu;%f;%
d
;%2.14f;%d;%d;%f;%f;%f;%f;%f;%ld;%f;%f
\n
"
,
n
,
m
,
d
,
jp_per_point
,
NUM_THREADS
,
epsilon
,
stripes
,
KBLOCK
,
algtime
+
reorderTime
,
algtime
-
sortTime
,
sortTime
,
indexTime
,
reorderTime
,
result
,
loadpercent
,
watthours
);
ddr_free
(
x1
);
ddr_free
(
x2
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment