Commit 7b766831 authored by Ernst Naschenweng's avatar Ernst Naschenweng

C++ code bsc thesis

parents
cmake_minimum_required(VERSION 3.8)
project(Bachelor_Thesis)
set(CMAKE_CXX_STANDARD 11)
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xHost -auto-p32 -fast -fp-model precise -no-prec-div -ipo -O3 -qopt-report=2 -qopt-report-phase=ipo -fimf-precision=low -parallel -qopt-prefetch-distance=64,32 -ffast-math -qopenmp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xHost -fast -no-prec-div -ipo -O3 -qopenmp")
set(SOURCE_FILES main.cpp fw.cpp hilloop.cpp)
add_executable(Bachelor_Thesis ${SOURCE_FILES})
This diff is collapsed.
/*
* Author: Ernst Naschenweng
* Bachelor's Thesis in Scientific Computing
* University of Vienna
*/
#ifndef BACHELOR_THESIS_FW_H
#define BACHELOR_THESIS_FW_H
#include <chrono>
// starts timer on object creation, stops it and returns time with end()
struct Timer {
private:
std::string message;
std::chrono::time_point<std::chrono::system_clock> start;
public:
explicit Timer(std::string message);
std::string end();
};
// contains distance matrix and Floyd-Warshall methods as well as reference implementation in Boost
// read from and write to distance matrix with at(x, y)
// result of shortest paths overwrites the original distance matrix
// "FUR" refers to the Hilbert curve, "ZUR" to the Z-curve
struct Matrix {
private:
int size_;
double* data_;
public:
explicit Matrix(int size);
~Matrix();
inline double& at(int x, int y); // read and write to x/y coordinate in matrix
void copyData(double boost[]);
bool verifyAgainstBoost(const double data[]);
void generateData(double minWeight, double maxWeight, unsigned short density, unsigned int seed);
bool isReachable(int x, int y);
std::string simple_FW();
std::string simple_FW_FUR(); // hilbert
std::string simple_FW_ZUR(); // z-curve
std::string simple_FW_AVX(bool avx512); // if false, regular AVX is used (256 bit)
std::string boost_FW();
// based on research by Venkataraman et al. (source: https://www.cise.ufl.edu/~sahni/papers/shortc.pdf)
std::string blocked_FW(int blockFactor);
void blocked_FW_phase1(int blocksize, int currentBlock);
void blocked_FW_phase2_horizontal(int blocksize, int currentBlock, int pivotBlock);
void blocked_FW_phase2_vertical(int blocksize, int currentBlock, int pivotBlock);
void blocked_FW_phase3(int blocksize, int currentCol, int currentRow, int pivotBlock);
std::string blocked_FW_FUR(int blockFactor);
void blocked_FW_FUR_phase1(int blocksize, int currentBlock, int i_[], int j_[]);
void blocked_FW_FUR_phase2_horizontal(int blocksize, int currentBlock, int pivotBlock, int i_[], int j_[]);
void blocked_FW_FUR_phase2_vertical(int blocksize, int currentBlock, int pivotBlock, int i_[], int j_[]);
void blocked_FW_FUR_phase3(int blocksize, int currentCol, int currentRow, int pivotBlock, int i_[], int j_[]);
std::string blocked_FW_ZUR(int blockFactor);
std::string blocked_FW_AVX(int blockFactor, bool use_avx512);
void blocked_FW_AVX_phase1(int blocksize, int currentBlock, bool use_avx512);
void blocked_FW_AVX_phase2_horizontal(int blocksize, int currentBlock, int pivotBlock, bool use_avx512);
void blocked_FW_AVX_phase2_vertical(int blocksize, int currentBlock, int pivotBlock, bool use_avx512);
void blocked_FW_AVX_phase3(int blocksize, int currentCol, int currentRow, int pivotBlock, bool use_avx512);
std::string blocked_FW_AVX_OMP(int blockFactor, int threads, bool use_avx512);
};
#endif //BACHELOR_THESIS_FW_H
This diff is collapsed.
This diff is collapsed.
/*
* Author: Ernst Naschenweng
* Bachelor's Thesis in Scientific Computing
* University of Vienna
*/
#include <iostream>
#include "fw.h"
std::string wrapBoolToTextOutput(bool verifyResult) {
if(verifyResult)
return "OK";
return "ERROR";
}
// CAREFUL! command line arguments are not sanity checked or sanitized
// argv[1] = matrix size n
// argv[2] = how often to loop the benchmark
// argv[3] = blocking factor
// argv[4] = number of threads for omp
// example call: ./a.out 512 5 9 64
int main(int argc, char* argv[]) {
int n = std::atoi(argv[1]);
int runs = std::atoi(argv[2]);
bool use_avx512 = true;
double minWeight = 2;
double maxWeight = 70;
unsigned short density = 80;
unsigned int seed = 3729;
Matrix* data = new Matrix(n);
for(int r = 0; r < runs; ++r) {
std::cout << "matrix size: " << n << std::endl;
std::cout << "run: " << r+1 << std::endl;
// Boost reference implementation
data->generateData(minWeight, maxWeight, density, seed);
std:: cout << data->boost_FW() << std::endl;
// backup boost result for verification
double* boost = new double[n*n]();
data->copyData(boost);
// Single threaded, unoptimized Floyd-Warshall
data->generateData(minWeight, maxWeight, density, seed);
std::cout << data->simple_FW();
std::cout << " - verify: " << wrapBoolToTextOutput(data->verifyAgainstBoost(boost)) << std::endl;
// Single threaded, FUR-optimized Floyd-Warshall
data->generateData(minWeight, maxWeight, density, seed);
std:: cout << data->simple_FW_FUR();
std::cout << " - verify: " << wrapBoolToTextOutput(data->verifyAgainstBoost(boost)) << std::endl;
// Single threaded, unoptimized, AVX Floyd-Warshall
data->generateData(minWeight, maxWeight, density, seed);
std:: cout << data->simple_FW_AVX(use_avx512);
std::cout << " - verify: " << wrapBoolToTextOutput(data->verifyAgainstBoost(boost)) << std::endl;
int blockFactor = std::atoi(argv[3]);
int threads = std::atoi(argv[4]);
// Blocked Floyd-Warshall
data->generateData(minWeight, maxWeight, density, seed);
std:: cout << data->blocked_FW(blockFactor);
std::cout << " - verify: " << wrapBoolToTextOutput(data->verifyAgainstBoost(boost)) << std::endl;
// Blocked Floyd-Warshall FUR
data->generateData(minWeight, maxWeight, density, seed);
std:: cout << data->blocked_FW_FUR(blockFactor);
std::cout << " - verify: " << wrapBoolToTextOutput(data->verifyAgainstBoost(boost)) << std::endl;
// Blocked Floyd-Warshall AVX
data->generateData(minWeight, maxWeight, density, seed);
std:: cout << data->blocked_FW_AVX(blockFactor, use_avx512);
std::cout << " - verify: " << wrapBoolToTextOutput(data->verifyAgainstBoost(boost)) << std::endl;
// Blocked Floyd-Warshall AVX OMP
data->generateData(minWeight, maxWeight, density, seed);
std:: cout << data->blocked_FW_AVX_OMP(blockFactor, threads, use_avx512);
std::cout << " - verify: " << wrapBoolToTextOutput(data->verifyAgainstBoost(boost)) << std::endl;
// Single threaded, ZUR-optimized Floyd-Warshall
data->generateData(minWeight, maxWeight, density, seed);
std:: cout << data->simple_FW_ZUR();
std::cout << " - verify: " << wrapBoolToTextOutput(data->verifyAgainstBoost(boost)) << std::endl;
// Blocked Floyd-Warshall ZUR
data->generateData(minWeight, maxWeight, density, seed);
std:: cout << data->blocked_FW_ZUR(blockFactor);
std::cout << " - verify: " << wrapBoolToTextOutput(data->verifyAgainstBoost(boost)) << std::endl;
std::cout << "-end run " << r+1 << "-" << std::endl;
}
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment