diff --git a/CMakeLists.txt b/CMakeLists.txt index 70349fa..a9e9999 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,7 +70,10 @@ set(PCG_DIRECTORY ${Galois_SOURCE_DIR}/external/pcg-cpp/include) add_subdirectory(microbench) add_subdirectory(scripts) -add_subdirectory(wf4) +add_subdirectory(bfs) +add_subdirectory(cc) +add_subdirectory(include) +add_subdirectory(pagerank) # tests option(BUILD_TESTING "Build tests." ON) diff --git a/bfs/CMakeLists.txt b/bfs/CMakeLists.txt new file mode 100644 index 0000000..7f45c18 --- /dev/null +++ b/bfs/CMakeLists.txt @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: BSD-2-Clause +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +set(sources + bfs.cpp +) + +add_executable(bfs) +target_sources(bfs PRIVATE ${sources}) +target_include_directories(bfs PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include ${PHMAP_DIRECTORY} ${PCG_DIRECTORY} ${graph-log-sketch_SOURCE_DIR}/include) +target_link_libraries(bfs PRIVATE Galois::cusp Galois::dist_async Galois::gluon Galois::wmd Boost::program_options Scea::lib) diff --git a/bfs/bfs.cpp b/bfs/bfs.cpp new file mode 100644 index 0000000..361d28d --- /dev/null +++ b/bfs/bfs.cpp @@ -0,0 +1,464 @@ +// SPDX-License-Identifier: BSD-2-Clause +// Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +#include +#include +#include +#include +#include +#include +#include + +#include "importer.hpp" +#include "stats.hpp" +#include "galois/DistGalois.h" +#include "galois/graphs/GluonSubstrate.h" +#include "galois/wmd/WMDPartitioner.h" +#include "galois/graphs/GenericPartitioners.h" +#include "galois/DTerminationDetector.h" +#include "galois/DReducible.h" +#include "galois/gstl.h" +#include "galois/runtime/SyncStructures.h" +#include "galois/runtime/Tracer.h" +#include "galois/runtime/GraphUpdateManager.h" + +namespace po = boost::program_options; + +const uint32_t infinity = std::numeric_limits::max() / 4; + +struct NodeData { + std::atomic dist_current; + uint32_t dist_old; + NodeData() : dist_current(0), dist_old(0) {} + NodeData(uint32_t cur_dist, uint32_t old_dist) + : dist_current(cur_dist), dist_old(old_dist) {} + // Copy constructor + NodeData(const NodeData& other) + : dist_current(other.dist_current.load()), dist_old(other.dist_old) {} + // Move constructor + NodeData(NodeData&& other) noexcept + : dist_current(other.dist_current.load()), + dist_old(std::exchange(other.dist_old, 0)) {} + + //~NodeData() {std::cout << "NodeData destructor called" << std::endl;} +}; + +galois::DynamicBitSet bitset_dist_current; + +#include "bfs_pull_sync.hh" + +uint64_t src_node = 0; +uint64_t maxIterations = 1000; + +typedef galois::graphs::DistLocalGraph Graph; +typedef galois::graphs::WMDGraph< + galois::graphs::ELVertex, galois::graphs::ELEdge, NodeData, void, OECPolicy> + ELGraph; +typedef typename Graph::GraphNode GNode; +std::unique_ptr> syncSubstrate; + +struct InitializeGraph { + const uint32_t& local_infinity; + uint64_t& local_src_node; + Graph* graph; + + InitializeGraph(uint64_t& _src_node, const uint32_t& _infinity, Graph* _graph) + : local_infinity(_infinity), local_src_node(_src_node), graph(_graph) {} + + static void go(Graph& _graph) { + const auto& allNodes = _graph.allNodesRange(); + galois::do_all( + galois::iterate(allNodes), InitializeGraph(src_node, infinity, &_graph), + galois::no_stats(), + galois::loopname( + syncSubstrate->get_run_identifier("InitializeGraph").c_str())); + } + + void operator()(GNode src) const { + NodeData& sdata = graph->getData(src); + sdata.dist_current = + (graph->getGID(src) == local_src_node) ? 0 : local_infinity; + sdata.dist_old = + (graph->getGID(src) == local_src_node) ? 0 : local_infinity; + } +}; + +template +struct FirstItr_BFS { + Graph* graph; + + explicit FirstItr_BFS(Graph* _graph) : graph(_graph) {} + + static void go(Graph& _graph) { + uint32_t __begin, __end; + if (_graph.isLocal(src_node)) { + __begin = _graph.getLID(src_node); + __end = __begin + 1; + } else { + __begin = 0; + __end = 0; + } + syncSubstrate->set_num_round(0); + galois::do_all( + galois::iterate(__begin, __end), FirstItr_BFS{&_graph}, + galois::no_stats(), + galois::loopname(syncSubstrate->get_run_identifier("BFS").c_str())); + + syncSubstrate->sync("BFS"); + + galois::runtime::reportStat_Tsum( + "BFS", syncSubstrate->get_run_identifier("NumWorkItems"), + __end - __begin); + } + + void operator()(GNode src) const { + NodeData& snode = graph->getData(src); + snode.dist_old = snode.dist_current; + + for (auto jj : graph->edges(src)) { + GNode dst = graph->getEdgeDst(jj); + auto& dnode = graph->getData(dst); + uint32_t new_dist = 1 + snode.dist_current; + uint32_t old_dist = galois::atomicMin(dnode.dist_current, new_dist); + if (old_dist > new_dist) { + bitset_dist_current.set(dst); + } + } + } +}; + +template +struct BFS { + uint32_t local_priority; + Graph* graph; + using DGTerminatorDetector = + typename std::conditional, + galois::DGAccumulator>::type; + using DGAccumulatorTy = galois::DGAccumulator; + + DGTerminatorDetector& active_vertices; + DGAccumulatorTy& work_edges; + + BFS(uint32_t _local_priority, Graph* _graph, DGTerminatorDetector& _dga, + DGAccumulatorTy& _work_edges) + : local_priority(_local_priority), graph(_graph), active_vertices(_dga), + work_edges(_work_edges) {} + + static void go(Graph& _graph) { + FirstItr_BFS::go(_graph); + unsigned _num_iterations = 1; + + const auto& nodesWithEdges = _graph.allNodesRange(); + + uint32_t priority = std::numeric_limits::max(); + DGTerminatorDetector dga; + DGAccumulatorTy work_edges; + + do { + syncSubstrate->set_num_round(_num_iterations); + dga.reset(); + work_edges.reset(); + galois::do_all( + galois::iterate(nodesWithEdges), + BFS(priority, &_graph, dga, work_edges), galois::steal(), + galois::no_stats(), + galois::loopname(syncSubstrate->get_run_identifier("BFS").c_str())); + galois::runtime::getHostBarrier().wait(); + syncSubstrate->sync("BFS"); + + galois::runtime::reportStat_Tsum( + "BFS", syncSubstrate->get_run_identifier("NumWorkItems"), + static_cast(work_edges.read_local())); + + ++_num_iterations; + } while ((async || (_num_iterations < maxIterations)) && + dga.reduce(syncSubstrate->get_run_identifier())); + galois::runtime::reportStat_Tmax( + "BFS", "NumIterations_" + std::to_string(syncSubstrate->get_run_num()), + static_cast(_num_iterations)); + } + + void operator()(GNode src) const { + NodeData& snode = graph->getData(src); + auto& net = galois::runtime::getSystemNetworkInterface(); + if (snode.dist_old > snode.dist_current) { + active_vertices += 1; + + if (local_priority > snode.dist_current) { + snode.dist_old = snode.dist_current; + + for (auto jj : graph->edges(src)) { + work_edges += 1; + + GNode dst = graph->getEdgeDst(jj); + auto& dnode = graph->getData(dst); + uint32_t new_dist = 1 + snode.dist_current; + uint32_t old_dist = galois::atomicMin(dnode.dist_current, new_dist); + if (old_dist > new_dist) + bitset_dist_current.set(dst); + } + } + } + } +}; + +/******************************************************************************/ +/* Sanity check operators */ +/******************************************************************************/ + +/* Prints total number of nodes visited + max distance */ +struct BFSSanityCheck { + const uint32_t& local_infinity; + Graph* graph; + + galois::DGAccumulator& DGAccumulator_sum; + galois::DGReduceMax& DGMax; + + BFSSanityCheck(const uint32_t& _infinity, Graph* _graph, + galois::DGAccumulator& dgas, + galois::DGReduceMax& dgm) + : local_infinity(_infinity), graph(_graph), DGAccumulator_sum(dgas), + DGMax(dgm) {} + + static void go(Graph& _graph, galois::DGAccumulator& dgas, + galois::DGReduceMax& dgm) { + dgas.reset(); + dgm.reset(); + + galois::do_all(galois::iterate(_graph.masterNodesRange().begin(), + _graph.masterNodesRange().end()), + BFSSanityCheck(infinity, &_graph, dgas, dgm), + galois::no_stats(), galois::loopname("BFSSanityCheck")); + + uint64_t num_visited = dgas.reduce(); + uint32_t max_distance = dgm.reduce(); + + // Only host 0 will print the info + if (galois::runtime::getSystemNetworkInterface().ID == 0) { + galois::gPrint("Number of nodes visited from source ", src_node, " is ", + num_visited, "\n"); + galois::gPrint("Max distance from source ", src_node, " is ", + max_distance, "\n"); + } + } + + void operator()(GNode src) const { + NodeData& src_data = graph->getData(src); + + if (src_data.dist_current < local_infinity) { + DGAccumulator_sum += 1; + DGMax.update(src_data.dist_current); + } + } +}; + +/******************************************************************************/ +/* Make results */ +/******************************************************************************/ + +void printUnorderedMap( + std::unordered_map>& edits, uint64_t id) { + for (const auto& pair : edits) { + std::cout << " Printing for host " << id << " src " << pair.first << " "; + for (auto dst : pair.second) { + std::cout << dst << " "; + } + std::cout << std::endl; + } +} + +void CheckGraph(std::unique_ptr& hg, + std::unordered_map>& mp) { + galois::do_all( + galois::iterate(hg->allNodesRange()), + [&](size_t lid) { + auto token = hg->getGID(lid); + std::vector edgeDst; + auto end = hg->edge_end(lid); + auto itr = hg->edge_begin(lid); + for (; itr != end; itr++) { + edgeDst.push_back(hg->getGID(hg->getEdgeDst(itr))); + } + std::vector edgeDstDbg; + for (auto& e : hg->edges(lid)) { + edgeDstDbg.push_back(hg->getGID(hg->getEdgeDst(e))); + } + assert(edgeDst == edgeDstDbg); + std::sort(edgeDst.begin(), edgeDst.end()); + // std::cout << token << " "; + for (auto edge : edgeDst) { + mp[token].push_back(edge); + } + }, + galois::steal()); +} + +void PrintMasterMirrorNodes(Graph& hg, uint64_t id) { + std::cout << "Master nodes on host " << id << std::endl; + for (auto node : hg.masterNodesRange()) { + std::cout << hg.getGID(node) << " "; + } + std::cout << std::endl; + std::cout << "Mirror nodes on host " << id << std::endl; + auto mirrors = hg.getMirrorNodes(); + for (auto vec : mirrors) { + for (auto node : vec) { + std::cout << node << " "; + } + } + std::cout << std::endl; +} + +const char* elGetOne(const char* line, std::uint64_t& val) { + bool found = false; + val = 0; + char c; + while ((c = *line++) != '\0' && isspace(c)) { + } + do { + if (isdigit(c)) { + found = true; + val *= 10; + val += (c - '0'); + } else if (c == '_') { + continue; + } else { + break; + } + } while ((c = *line++) != '\0' && !isspace(c)); + if (!found) + val = UINT64_MAX; + return line; +} + +void parser(const char* line, Graph& hg, + std::vector>& delta_mirrors, + std::unordered_set& mirrors) { + uint64_t src, dst; + line = elGetOne(line, src); + line = elGetOne(line, dst); + if ((hg.isOwned(src)) && (!hg.isLocal(dst))) { + uint32_t h = hg.getHostID(dst); + if (mirrors.find(dst) == mirrors.end()) { + mirrors.insert(dst); + delta_mirrors[h].push_back(dst); + } + } +} + +std::vector> +genMirrorNodes(Graph& hg, std::string filename, int batch) { + auto& net = galois::runtime::getSystemNetworkInterface(); + std::vector> delta_mirrors(net.Num); + std::unordered_set mirrors; + + for (uint32_t i = 0; i < net.Num; i++) { + std::string dynamicFile = filename + "_batch" + std::to_string(batch) + + "_host" + std::to_string(i) + ".el"; + std::ifstream file(dynamicFile); + std::string line; + while (std::getline(file, line)) { + parser(line.c_str(), hg, delta_mirrors, mirrors); + } + } + return delta_mirrors; +} + +int main(int argc, char* argv[]) { + std::string filename; + uint64_t src_node; + uint64_t numVertices; + uint64_t num_batches; + po::options_description desc("Allowed options"); + desc.add_options()("help", "print help info")( + "staticFile", po::value(&filename)->required(), + "Input file for initial static graph")( + "numVertices", po::value(&numVertices)->required(), + "Number of total vertices")("numBatches", + po::value(&num_batches)->required(), + "Number of Batches")( + "srcNode", po::value(&src_node)->default_value(0), + "Source node for BFS"); + + po::variables_map vm; + try { + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + + if (vm.count("help")) { + std::cout << desc << "\n"; + return 1; + } + + galois::DistMemSys G; + + std::unique_ptr hg; + + hg = distLocalGraphInitialization(filename, numVertices); + syncSubstrate = gluonInitialization(hg); + + std::unordered_map> edits; + CheckGraph(hg, edits); + // printUnorderedMap(edits, galois::runtime::getSystemNetworkInterface().ID) ; + + galois::runtime::getHostBarrier().wait(); + + ELGraph* wg = dynamic_cast(hg.get()); + + auto& net = galois::runtime::getSystemNetworkInterface(); + for (int i = 0; i < num_batches; i++) { + std::vector edit_files; + std::string dynFile = "edits"; + std::string dynamicFile = dynFile + "_batch" + std::to_string(i) + "_host" + + std::to_string(net.ID) + ".el"; + edit_files.emplace_back(dynamicFile); + // IMPORTANT: CAll genMirrorNodes before creating the + // graphUpdateManager!!!!!!!! + std::vector> delta_mirrors = + genMirrorNodes(*hg, dynFile, i); + galois::runtime::getHostBarrier().wait(); + graphUpdateManager + GUM(std::make_unique>( + 1, edit_files), + 100, wg); + GUM.update(); + galois::runtime::getHostBarrier().wait(); + + syncSubstrate->addDeltaMirrors(delta_mirrors); + galois::runtime::getHostBarrier().wait(); + delta_mirrors.clear(); + bitset_dist_current.resize(hg->size()); + galois::DGAccumulator DGAccumulator_sum; + galois::DGReduceMax m; + InitializeGraph::go(*hg); + galois::runtime::getHostBarrier().wait(); + { + // BENCHMARK_SCOPE("bfs-push", + // galois::runtime::getSystemNetworkInterface().ID); + std::string timer_str("Timer_" + std::to_string(i)); + galois::StatTimer StatTimer_main(timer_str.c_str(), "BFS"); + StatTimer_main.start(); + BFS::go(*hg); + StatTimer_main.stop(); + // sanity check + BFSSanityCheck::go(*hg, DGAccumulator_sum, m); + if ((i + 1) != num_batches) { + bitset_dist_current.reset(); + (*syncSubstrate).set_num_run(i + 1); + galois::runtime::getHostBarrier().wait(); + } + } + } + + return 0; +} diff --git a/bfs/bfs_pull_sync.hh b/bfs/bfs_pull_sync.hh new file mode 100644 index 0000000..a17d8cb --- /dev/null +++ b/bfs/bfs_pull_sync.hh @@ -0,0 +1,28 @@ +/* + * This file belongs to the Galois project, a C++ library for exploiting + * parallelism. The code is being released under the terms of the 3-Clause BSD + * License (a copy is located in LICENSE.txt at the top-level directory). + * + * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. + * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS + * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF + * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF + * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH + * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances + * shall University be liable for incidental, special, indirect, direct or + * consequential damages or loss of profits, interruption of business, or + * related expenses which may arise from use of Software or Documentation, + * including but not limited to those resulting from defects in Software and/or + * Documentation, or loss or inaccuracy of data of any kind. + */ +#ifndef GRAPH_LOG_SKETCH_BFS_BFS_PULL_SYNC_HH_ +#define GRAPH_LOG_SKETCH_BFS_BFS_PULL_SYNC_HH_ + +#include "galois/runtime/SyncStructures.h" + +GALOIS_SYNC_STRUCTURE_REDUCE_SET(dist_current, unsigned int); +GALOIS_SYNC_STRUCTURE_REDUCE_MIN(dist_current, unsigned int); +GALOIS_SYNC_STRUCTURE_BITSET(dist_current); + +#endif /* GRAPH_LOG_SKETCH_BFS_BFS_PULL_SYNC_HH_ */ diff --git a/bfs/importer.cpp b/bfs/importer.cpp new file mode 100644 index 0000000..e8babad --- /dev/null +++ b/bfs/importer.cpp @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: BSD-2-Clause +// Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +#include "galois/wmd/WMDPartitioner.h" +#include "galois/graphs/DistributedLocalGraph.h" +#include "galois/graphs/GluonSubstrate.h" +#include "galois/graphs/GenericPartitioners.h" + +template +using DistLocalGraphPtr = std::unique_ptr< + galois::graphs::WMDGraph>; +template +using DistLocalPtr = + std::unique_ptr>; +template +DistLocalPtr +distLocalGraphInitialization(std::string& inputFile, uint64_t numVertices) { + using Graph = + galois::graphs::WMDGraph; + DistLocalPtr dg; + std::vector filenames; + filenames.emplace_back(inputFile); + std::vector>> + parsers; + parsers.emplace_back( + std::make_unique>(2, filenames)); + const auto& net = galois::runtime::getSystemNetworkInterface(); + return std::make_unique(parsers, net.ID, net.Num, true, false, + numVertices, + galois::graphs::BALANCED_EDGES_OF_MASTERS); +} + +template +using DistLocalPtr = + std::unique_ptr>; +template +using DistLocalSubstratePtr = std::unique_ptr>>; +template +DistLocalSubstratePtr gluonInitialization( + std::unique_ptr>& g) { + // DistLocalGraphPtr g; + using Graph = galois::graphs::DistLocalGraph; + using Substrate = galois::graphs::GluonSubstrate; + DistLocalSubstratePtr s; + + const auto& net = galois::runtime::getSystemNetworkInterface(); + // load substrate + s = std::make_unique(*g, net.ID, net.Num, false); + return s; +} diff --git a/cc/CMakeLists.txt b/cc/CMakeLists.txt new file mode 100644 index 0000000..7cd5ea9 --- /dev/null +++ b/cc/CMakeLists.txt @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: BSD-2-Clause +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +set(sources + cc.cpp +) + +add_executable(cc) +target_sources(cc PRIVATE ${sources}) +target_include_directories(cc PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include ${PHMAP_DIRECTORY} ${PCG_DIRECTORY} ${graph-log-sketch_SOURCE_DIR}/include) +target_link_libraries(cc PRIVATE Galois::cusp Galois::dist_async Galois::gluon Galois::wmd Boost::program_options Scea::lib) diff --git a/cc/cc.cpp b/cc/cc.cpp new file mode 100644 index 0000000..b0cc2af --- /dev/null +++ b/cc/cc.cpp @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: BSD-2-Clause +// Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +#include +#include +#include + +#include "importer.hpp" + +#include "galois/graphs/DistributedLocalGraph.h" +#include "galois/graphs/GluonSubstrate.h" +#include "galois/wmd/WMDPartitioner.h" +#include "galois/graphs/GenericPartitioners.h" +#include "galois/DistGalois.h" +#include "galois/DTerminationDetector.h" +#include "galois/runtime/SyncStructures.h" +#include "galois/DReducible.h" +#include "galois/gstl.h" +#include "galois/runtime/Tracer.h" +#include "galois/runtime/GraphUpdateManager.h" + +namespace po = boost::program_options; + +struct NodeData { + std::atomic comp_current; + uint32_t comp_old; + NodeData() + : comp_current(std::numeric_limits::max()), comp_old(0) {} + explicit NodeData(uint32_t x) : comp_current(x), comp_old(0) {} + NodeData(const NodeData& o) + : comp_current(o.comp_current.load()), comp_old(o.comp_old) {} +}; + +constexpr static const char* const REGION_NAME = "ConnectedComp"; + +galois::DynamicBitSet bitset_comp_current; + +typedef galois::graphs::DistLocalGraph Graph; +typedef galois::graphs::WMDGraph< + galois::graphs::ELVertex, galois::graphs::ELEdge, NodeData, void, OECPolicy> + ELGraph; +typedef typename Graph::GraphNode GNode; + +std::unique_ptr> syncSubstrate; + +uint64_t maxIterations = 1000; + +#include "cc_push_sync.hh" + +/******************************************************************************/ +/* Algorithm structures */ +/******************************************************************************/ + +struct InitializeGraph { + Graph* graph; + + explicit InitializeGraph(Graph* _graph) : graph(_graph) {} + + static void go(Graph& _graph) { + const auto& allNodes = _graph.allNodesRange(); + + galois::do_all( + galois::iterate(allNodes.begin(), allNodes.end()), + InitializeGraph{&_graph}, galois::no_stats(), + galois::loopname( + syncSubstrate->get_run_identifier("InitializeGraph").c_str())); + } + + void operator()(GNode src) const { + NodeData& sdata = graph->getData(src); + sdata.comp_current = graph->getGID(src); + sdata.comp_old = graph->getGID(src); + } +}; + +template +struct FirstItr_ConnectedComp { + Graph* graph; + explicit FirstItr_ConnectedComp(Graph* _graph) : graph(_graph) {} + + static void go(Graph& _graph) { + const auto& nodesWithEdges = _graph.allNodesWithEdgesRange(); + syncSubstrate->set_num_round(0); + galois::do_all( + galois::iterate(nodesWithEdges), FirstItr_ConnectedComp{&_graph}, + galois::steal(), galois::no_stats(), + galois::loopname( + syncSubstrate->get_run_identifier("ConnectedComp").c_str())); + + syncSubstrate->sync("ConnectedComp"); + + galois::runtime::reportStat_Tsum( + REGION_NAME, "NumWorkItems_" + (syncSubstrate->get_run_identifier()), + _graph.allNodesRange().end() - _graph.allNodesRange().begin()); + } + + void operator()(GNode src) const { + NodeData& snode = graph->getData(src); + snode.comp_old = snode.comp_current; + + for (auto jj : graph->edges(src)) { + GNode dst = graph->getEdgeDst(jj); + auto& dnode = graph->getData(dst); + uint32_t new_dist = snode.comp_current; + uint32_t old_dist = galois::atomicMin(dnode.comp_current, new_dist); + if (old_dist > new_dist) + bitset_comp_current.set(dst); + } + } +}; + +template +struct ConnectedComp { + Graph* graph; + using DGTerminatorDetector = + typename std::conditional, + galois::DGAccumulator>::type; + + DGTerminatorDetector& active_vertices; + + ConnectedComp(Graph* _graph, DGTerminatorDetector& _dga) + : graph(_graph), active_vertices(_dga) {} + + static void go(Graph& _graph) { + FirstItr_ConnectedComp::go(_graph); + + unsigned _num_iterations = 1; + DGTerminatorDetector dga; + + const auto& nodesWithEdges = _graph.allNodesWithEdgesRange(); + + do { + syncSubstrate->set_num_round(_num_iterations); + dga.reset(); + galois::do_all( + galois::iterate(nodesWithEdges), ConnectedComp(&_graph, dga), + galois::no_stats(), galois::steal(), + galois::loopname( + syncSubstrate->get_run_identifier("ConnectedComp").c_str())); + + syncSubstrate->sync("ConnectedComp"); + + galois::runtime::reportStat_Tsum( + REGION_NAME, "NumWorkItems_" + (syncSubstrate->get_run_identifier()), + (uint64_t)dga.read_local()); + ++_num_iterations; + } while ((async || (_num_iterations < maxIterations)) && + dga.reduce(syncSubstrate->get_run_identifier())); + + galois::runtime::reportStat_Tmax( + REGION_NAME, + "NumIterations_" + std::to_string(syncSubstrate->get_run_num()), + (uint64_t)_num_iterations); + } + + void operator()(GNode src) const { + NodeData& snode = graph->getData(src); + + if (snode.comp_old > snode.comp_current) { + snode.comp_old = snode.comp_current; + + for (auto jj : graph->edges(src)) { + active_vertices += 1; + + GNode dst = graph->getEdgeDst(jj); + auto& dnode = graph->getData(dst); + uint32_t new_dist = snode.comp_current; + uint32_t old_dist = galois::atomicMin(dnode.comp_current, new_dist); + if (old_dist > new_dist) + bitset_comp_current.set(dst); + } + } + } +}; + +/******************************************************************************/ +/* Sanity check operators */ +/******************************************************************************/ + +/* Get/print the number of components */ +struct ConnectedCompSanityCheck { + Graph* graph; + + galois::DGAccumulator& active_vertices; + + ConnectedCompSanityCheck(Graph* _graph, galois::DGAccumulator& _dga) + : graph(_graph), active_vertices(_dga) {} + + static void go(Graph& _graph, galois::DGAccumulator& dga) { + dga.reset(); + + galois::do_all(galois::iterate(_graph.masterNodesRange().begin(), + _graph.masterNodesRange().end()), + ConnectedCompSanityCheck(&_graph, dga), galois::no_stats(), + galois::loopname("ConnectedCompSanityCheck")); + + uint64_t num_components = dga.reduce(); + + // Only node 0 will print the number visited + if (galois::runtime::getSystemNetworkInterface().ID == 0) { + galois::gPrint("Number of components is ", num_components, "\n"); + } + } + + /* Check if a node's component is the same as its ID. + * if yes, then increment an accumulator */ + void operator()(GNode src) const { + NodeData& src_data = graph->getData(src); + + if (src_data.comp_current == graph->getGID(src)) { + active_vertices += 1; + } + } +}; + +const char* elGetOne(const char* line, std::uint64_t& val) { + bool found = false; + val = 0; + char c; + while ((c = *line++) != '\0' && isspace(c)) { + } + do { + if (isdigit(c)) { + found = true; + val *= 10; + val += (c - '0'); + } else if (c == '_') { + continue; + } else { + break; + } + } while ((c = *line++) != '\0' && !isspace(c)); + if (!found) + val = UINT64_MAX; + return line; +} + +void parser(const char* line, Graph& hg, + std::vector>& delta_mirrors, + std::unordered_set& mirrors) { + uint64_t src, dst; + line = elGetOne(line, src); + line = elGetOne(line, dst); + if ((hg.isOwned(src)) && (!hg.isLocal(dst))) { + uint32_t h = hg.getHostID(dst); + if (mirrors.find(dst) == mirrors.end()) { + mirrors.insert(dst); + delta_mirrors[h].push_back(dst); + } + } +} + +std::vector> +genMirrorNodes(Graph& hg, std::string filename, int batch) { + auto& net = galois::runtime::getSystemNetworkInterface(); + std::vector> delta_mirrors(net.Num); + std::unordered_set mirrors; + + for (uint32_t i = 0; i < net.Num; i++) { + std::string dynamicFile = filename + "_batch" + std::to_string(batch) + + "_host" + std::to_string(i) + ".el"; + std::ifstream file(dynamicFile); + std::string line; + while (std::getline(file, line)) { + parser(line.c_str(), hg, delta_mirrors, mirrors); + } + } + return delta_mirrors; +} + +void PrintMasterMirrorNodes(Graph& hg, uint64_t id) { + std::cout << "Master nodes on host " << id << std::endl; + for (auto node : hg.masterNodesRange()) { + std::cout << hg.getGID(node) << " "; + } + std::cout << std::endl; + std::cout << "Mirror nodes on host " << id << std::endl; + auto mirrors = hg.getMirrorNodes(); + for (auto vec : mirrors) { + for (auto node : vec) { + std::cout << node << " "; + } + } + std::cout << std::endl; +} + +int main(int argc, char* argv[]) { + std::string filename; + uint64_t numVertices; + uint64_t num_batches; + po::options_description desc("Allowed options"); + desc.add_options()("help", "print help info")( + "staticFile", po::value(&filename)->required(), + "Input file for initial static graph")( + "numVertices", po::value(&numVertices)->required(), + "Number of total vertices")("numBatches", + po::value(&num_batches)->required(), + "Number of Batches"); + + po::variables_map vm; + try { + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + + galois::DistMemSys G; + + std::unique_ptr hg; + + hg = distLocalGraphInitialization(filename, numVertices); + syncSubstrate = gluonInitialization(hg); + + bitset_comp_current.resize(hg->size()); + + galois::runtime::getHostBarrier().wait(); + + ELGraph* wg = dynamic_cast(hg.get()); + + auto& net = galois::runtime::getSystemNetworkInterface(); + + for (int i = 0; i < num_batches; i++) { + std::vector edit_files; + std::string dynFile = "edits"; + std::string dynamicFile = dynFile + "_batch" + std::to_string(i) + "_host" + + std::to_string(net.ID) + ".el"; + edit_files.emplace_back(dynamicFile); + // IMPORTANT: CAll genMirrorNodes before creating the + // graphUpdateManager!!!!!!!! + std::vector> delta_mirrors = + genMirrorNodes(*hg, dynFile, i); + galois::runtime::getHostBarrier().wait(); + graphUpdateManager + GUM(std::make_unique>( + 1, edit_files), + 100, wg); + GUM.update(); + galois::runtime::getHostBarrier().wait(); + + syncSubstrate->addDeltaMirrors(delta_mirrors); + galois::runtime::getHostBarrier().wait(); + delta_mirrors.clear(); + + InitializeGraph::go((*hg)); + galois::runtime::getHostBarrier().wait(); + + galois::DGAccumulator active_vertices64; + + ConnectedComp::go(*hg); + + ConnectedCompSanityCheck::go(*hg, active_vertices64); + + if ((i + 1) != 1) { + bitset_comp_current.reset(); + + (*syncSubstrate).set_num_run(i + 1); + galois::runtime::getHostBarrier().wait(); + } + } + + return 0; +} diff --git a/cc/cc_push_sync.hh b/cc/cc_push_sync.hh new file mode 100644 index 0000000..d20ea73 --- /dev/null +++ b/cc/cc_push_sync.hh @@ -0,0 +1,29 @@ +/* + * This file belongs to the Galois project, a C++ library for exploiting + * parallelism. The code is being released under the terms of the 3-Clause BSD + * License (a copy is located in LICENSE.txt at the top-level directory). + * + * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. + * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS + * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF + * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF + * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH + * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances + * shall University be liable for incidental, special, indirect, direct or + * consequential damages or loss of profits, interruption of business, or + * related expenses which may arise from use of Software or Documentation, + * including but not limited to those resulting from defects in Software and/or + * Documentation, or loss or inaccuracy of data of any kind. + */ + +#ifndef GRAPH_LOG_SKETCH_CC_CC_PUSH_SYNC_HH_ +#define GRAPH_LOG_SKETCH_CC_CC_PUSH_SYNC_HH_ + +#include "galois/runtime/SyncStructures.h" + +GALOIS_SYNC_STRUCTURE_REDUCE_SET(comp_current, uint32_t); +GALOIS_SYNC_STRUCTURE_REDUCE_MIN(comp_current, uint32_t); +GALOIS_SYNC_STRUCTURE_BITSET(comp_current); + +#endif // GRAPH_LOG_SKETCH_CC_CC_PUSH_SYNC_HH_ diff --git a/galois b/galois index 11b843e..377d6e8 160000 --- a/galois +++ b/galois @@ -1 +1 @@ -Subproject commit 11b843e5829238e07b9317b5a3e2fe73cfd1978a +Subproject commit 377d6e8bbf1c74f24097b0ce80b392b83dcf7899 diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt new file mode 100644 index 0000000..5402b98 --- /dev/null +++ b/include/CMakeLists.txt @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: BSD-2-Clause +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +# Create a library target +add_library(libdist INTERFACE) +add_library(Scea::lib ALIAS libdist) +add_dependencies(lib libdist) + +# Set include directories for the library target +target_include_directories(libdist INTERFACE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/scea) + +# Optionally, you can add additional configurations or dependencies for the library target + +install(TARGETS libdist + EXPORT GLSTargets + LIBRARY + DESTINATION "${CMAKE_INSTALL_LIBDIR}" + COMPONENT shlib + ARCHIVE + DESTINATION "${CMAKE_INSTALL_LIBDIR}" + COMPONENT lib + INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" +) diff --git a/include/importer.hpp b/include/importer.hpp new file mode 100644 index 0000000..ed27401 --- /dev/null +++ b/include/importer.hpp @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: BSD-2-Clause +// Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +#pragma once + +#include +#include +#include + +#include "galois/wmd/WMDPartitioner.h" +#include "galois/graphs/DistributedLocalGraph.h" +#include "galois/graphs/GluonSubstrate.h" +#include "galois/graphs/GenericPartitioners.h" + +template +using DistLocalGraphPtr = std::unique_ptr< + galois::graphs::WMDGraph>; +template +using DistLocalPtr = + std::unique_ptr>; +template +DistLocalPtr +distLocalGraphInitialization(std::string& inputFile, uint64_t numVertices) { + using Graph = + galois::graphs::WMDGraph; + DistLocalPtr dg; + std::vector filenames; + filenames.emplace_back(inputFile); + std::vector>> + parsers; + parsers.emplace_back( + std::make_unique>(2, filenames)); + const auto& net = galois::runtime::getSystemNetworkInterface(); + return std::make_unique(parsers, net.ID, net.Num, true, false, + numVertices, + galois::graphs::BALANCED_EDGES_OF_MASTERS); +} + +template +using DistLocalPtr = + std::unique_ptr>; +template +using DistLocalSubstratePtr = std::unique_ptr>>; +template +DistLocalSubstratePtr gluonInitialization( + std::unique_ptr>& g) { + // DistLocalGraphPtr g; + using Graph = galois::graphs::DistLocalGraph; + using Substrate = galois::graphs::GluonSubstrate; + DistLocalSubstratePtr s; + + const auto& net = galois::runtime::getSystemNetworkInterface(); + // load substrate + s = std::make_unique(*g, net.ID, net.Num, false); + return s; +} diff --git a/include/scea/algo/bc.hpp b/include/scea/algo/bc.hpp new file mode 100644 index 0000000..ef950d6 --- /dev/null +++ b/include/scea/algo/bc.hpp @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: BSD-2-Clause +// Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +#pragma once + +#include +#include +#include +#include + +#include "algo_interface.hpp" + +namespace scea::algo { + +class BetweennessCentrality : public Algo { +public: + BetweennessCentrality() = default; + + static std::vector compute(scea::graph::MutableGraph& g) { + std::vector centralityScores(g.size(), 0.0); + + for (uint64_t s = 0; s < g.size(); ++s) { + std::vector> predecessors(g.size()); + std::vector shortestPathCount(g.size(), 0); + std::vector distance(g.size(), std::numeric_limits::max()); + + bfsShortestPaths(g, s, predecessors, shortestPathCount, distance); + + std::vector dependency(g.size(), 0.0); + + for (int i = g.size() - 1; i >= 0; --i) { + for (uint64_t pred : predecessors[i]) { + double ratio = static_cast(shortestPathCount[pred] / + shortestPathCount[i]); + dependency[pred] += (1 + dependency[i]) * ratio; + } + if (i != s) { + centralityScores[i] += dependency[i]; + } + } + } + + return centralityScores; + } + + static void bfsShortestPaths(scea::graph::MutableGraph& g, uint64_t s, + std::vector>& predecessors, + std::vector& shortestPathCount, + std::vector& distance) { + std::queue q; + q.push(s); + distance[s] = 0; + shortestPathCount[s] = 1; + + while (!q.empty()) { + uint64_t u = q.front(); + q.pop(); + g.for_each_edge(u, [&](uint64_t v) { + if (distance[v] == std::numeric_limits::max()) { + q.push(v); + distance[v] = distance[u] + 1; + } + if (distance[v] == distance[u] + 1) { + shortestPathCount[v] += shortestPathCount[u]; + predecessors[v].push_back(u); + } + }); + } + } + + void operator()(scea::graph::MutableGraph& g) override { compute(g); } +}; + +} // namespace scea::algo diff --git a/include/scea/algo/pr.hpp b/include/scea/algo/pr.hpp new file mode 100644 index 0000000..c3be3f6 --- /dev/null +++ b/include/scea/algo/pr.hpp @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: BSD-2-Clause +// Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +#pragma once + +#include +#include +#include +#include + +#include "algo_interface.hpp" +#include "galois/LargeArray.h" +#include "galois/AtomicHelpers.h" +#include "galois/Reduction.h" + +namespace scea::algo { + +class PageRank : public Algo { + static constexpr double DAMPING_FACTOR = 0.85; + static constexpr int MAX_ITERATIONS = 100; + static constexpr double TOLERANCE = 1.0e-4; + +public: + PageRank() = default; + + static void compute(scea::graph::MutableGraph& g) { + const uint64_t numNodes = g.size(); + std::vector> newRank(numNodes); + std::vector rank(numNodes, 1.0 / numNodes); + std::vector out_degrees(numNodes, 0); + + galois::do_all( + galois::iterate(0ul, numNodes), + [&](uint64_t i) { + g.for_each_edge(i, [&](uint64_t const&) { out_degrees[i]++; }); + }, + galois::no_stats(), galois::loopname("ComputeOutDegrees"), + galois::steal()); + + for (int iter = 0; iter < MAX_ITERATIONS; ++iter) { + std::for_each(newRank.begin(), newRank.end(), + [](std::atomic& n) { n.store(0.0); }); + + galois::do_all( + galois::iterate(0ul, numNodes), + [&](uint64_t i) { + g.for_each_edge(i, [&](uint64_t dst) { + galois::atomicAdd(newRank[dst], rank[i] / out_degrees[i]); + }); + }, + galois::no_stats(), galois::loopname("DistributeContributions"), + galois::steal()); + + galois::do_all( + galois::iterate(0ul, numNodes), + [&](uint64_t i) { + newRank[i].store((1.0 - DAMPING_FACTOR) / numNodes + + DAMPING_FACTOR * + newRank[i].load(std::memory_order_relaxed), + std::memory_order_relaxed); + }, + galois::no_stats(), galois::loopname("ComputeNewRanks"), + galois::steal()); + + galois::GAccumulator diff; + galois::do_all( + galois::iterate(0ul, numNodes), + [&](uint64_t i) { + diff += + std::abs(newRank[i].load(std::memory_order_relaxed) - rank[i]); + rank[i] = newRank[i].load(std::memory_order_relaxed); + }, + galois::no_stats(), galois::loopname("CheckConvergenceAndUpdate"), + galois::steal()); + + if (diff.reduce() < TOLERANCE) { + break; + } + } + } + + void operator()(scea::graph::MutableGraph& g) override { compute(g); } +}; + +} // namespace scea::algo diff --git a/include/scea/stats.hpp b/include/scea/stats.hpp index 3eea141..c2377dd 100644 --- a/include/scea/stats.hpp +++ b/include/scea/stats.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -86,13 +87,15 @@ class PerfEvent { class ScopeBenchmarker { private: + std::string scopeName; HighResTimer timer; PerfEvent cacheMissesEvent; PerfEvent cacheReferencesEvent; PerfEvent instructionsEvent; PerfEvent minorPageFaultsEvent; PerfEvent majorPageFaultsEvent; - std::string scopeName; + std::string fileName; + bool isDist = false; static uint64_t getMaxRSS() { struct rusage usage; @@ -122,6 +125,17 @@ class ScopeBenchmarker { majorPageFaultsEvent.start(); } + explicit ScopeBenchmarker(const std::string& name, + const std::string& _fileName) + : scopeName(name), + cacheMissesEvent(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES), + cacheReferencesEvent(PERF_TYPE_HARDWARE, + PERF_COUNT_HW_CACHE_REFERENCES), + instructionsEvent(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS), + minorPageFaultsEvent(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN), + majorPageFaultsEvent(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ), + fileName(_fileName), isDist(true) {} + ~ScopeBenchmarker() { timer.stop(); cacheMissesEvent.stop(); @@ -137,16 +151,25 @@ class ScopeBenchmarker { uint64_t minorPageFaults = minorPageFaultsEvent.readValue(); uint64_t majorPageFaults = majorPageFaultsEvent.readValue(); - std::cout << "Benchmark results for " << scopeName << ":" << std::endl // - << "Duration: " << timer.getDurationNano() << " nanoseconds" - << std::endl // - << "Max RSS: " << max_rss << " KB" << std::endl // - << "Cache Misses: " << cacheMisses << std::endl // - << "Cache References: " << cacheReferences << std::endl // - << "Instructions: " << instructions << std::endl // - << "Minor Page Faults: " << minorPageFaults << std::endl // - << "Major Page Faults: " << majorPageFaults << std::endl; + std::string output = + "Benchmark results for " + scopeName + ":\n" + + "Duration: " + std::to_string(timer.getDurationNano()) + + " nanoseconds\n" + "Max RSS: " + std::to_string(max_rss) + " KB\n" + + "Cache Misses: " + std::to_string(cacheMisses) + "\n" + + "Cache References: " + std::to_string(cacheReferences) + "\n" + + "Instructions: " + std::to_string(instructions) + "\n" + + "Minor Page Faults: " + std::to_string(minorPageFaults) + "\n" + + "Major Page Faults: " + std::to_string(majorPageFaults) + "\n"; + if (isDist) { + std::ofstream fp(fileName, std::ios::out); + fp << output; + fp.close(); + } else { + std::cout << output; + } } }; #define BENCHMARK_SCOPE(name) ScopeBenchmarker benchmarker##__LINE__(name) +#define BENCHMARK_SCOPE_FILE(name, file_name) \ + ScopeBenchmarker benchmarker##__LINE__(name, file_name) diff --git a/microbench/edit_scalability.cpp b/microbench/edit_scalability.cpp index 47e6c38..9416a95 100644 --- a/microbench/edit_scalability.cpp +++ b/microbench/edit_scalability.cpp @@ -14,6 +14,8 @@ #include "scea/algo/bfs.hpp" #include "scea/algo/nop.hpp" #include "scea/algo/tc.hpp" +#include "scea/algo/pr.hpp" +#include "scea/algo/bc.hpp" #include "scea/graph/lscsr.hpp" #include "scea/graph/morph.hpp" #include "scea/graph/adj.hpp" @@ -21,7 +23,7 @@ #include "scea/stats.hpp" enum GraphType { lscsr, morph, adj, lccsr }; -enum AlgoName { nop, sssp_bfs, tc }; +enum AlgoName { nop, sssp_bfs, tc, pr, bc }; std::istream& operator>>(std::istream& in, GraphType& type) { std::string name; @@ -53,6 +55,10 @@ std::istream& operator>>(std::istream& in, AlgoName& name) { name = sssp_bfs; } else if (type == "tc") { name = tc; + } else if (type == "pr") { + name = pr; + } else if (type == "bc") { + name = bc; } else { // Handle invalid input (throw exception, print error message, etc.) in.setstate(std::ios_base::failbit); @@ -151,6 +157,14 @@ int main(int argc, char const* argv[]) { algo = std::make_unique(); break; } + case AlgoName::pr: { + algo = std::make_unique(); + break; + } + case AlgoName::bc: { + algo = std::make_unique(); + break; + } default: throw std::runtime_error("unknown algorithm"); } diff --git a/pagerank/CMakeLists.txt b/pagerank/CMakeLists.txt new file mode 100644 index 0000000..959266f --- /dev/null +++ b/pagerank/CMakeLists.txt @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: BSD-2-Clause +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +set(sources + pagerank.cpp +) + +add_executable(pagerank) +target_sources(pagerank PRIVATE ${sources}) +target_include_directories(pagerank PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include ${PHMAP_DIRECTORY} ${PCG_DIRECTORY} ${graph-log-sketch_SOURCE_DIR}/include) +target_link_libraries(pagerank PRIVATE Galois::cusp Galois::dist_async Galois::gluon Galois::wmd Boost::program_options Scea::lib) diff --git a/pagerank/pagerank.cpp b/pagerank/pagerank.cpp new file mode 100644 index 0000000..ed24c9e --- /dev/null +++ b/pagerank/pagerank.cpp @@ -0,0 +1,486 @@ +// SPDX-License-Identifier: BSD-2-Clause +// Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +#include +#include +#include "importer.hpp" +#include "galois/graphs/DistributedLocalGraph.h" +#include "galois/graphs/GluonSubstrate.h" +#include "galois/wmd/WMDPartitioner.h" +#include "galois/graphs/GenericPartitioners.h" +#include "galois/DTerminationDetector.h" +#include "galois/DistGalois.h" +#include "galois/DReducible.h" +#include "galois/gstl.h" +#include "galois/runtime/SyncStructures.h" +#include "galois/runtime/Tracer.h" +#include "galois/runtime/GraphUpdateManager.h" +#include + +namespace po = boost::program_options; + +static const float alpha = (1.0 - 0.85); +static const float tolerance = 1e-5; +struct NodeData { + float value; + std::atomic nout; + float delta; + std::atomic residual; + NodeData() : value(0), nout(0), delta(0), residual(0) {} + explicit NodeData(float v) : value(v), nout(0), delta(0), residual(0) {} + + // Copy constructor + NodeData(const NodeData& other) { + value = other.value; + nout = other.nout.load(); + delta = other.delta; + residual = other.residual.load(); + } +}; + +uint64_t maxIterations = 1000; + +galois::DynamicBitSet bitset_residual; +galois::DynamicBitSet bitset_nout; + +typedef galois::graphs::DistLocalGraph Graph; +typedef galois::graphs::WMDGraph< + galois::graphs::ELVertex, galois::graphs::ELEdge, NodeData, void, OECPolicy> + ELGraph; +typedef typename Graph::GraphNode GNode; +typedef GNode WorkItem; + +std::unique_ptr> syncSubstrate; + +#include "pagerank_push_sync.hh" + +struct ResetGraph { + Graph* graph; + + explicit ResetGraph(Graph* _graph) : graph(_graph) {} + static void go(Graph& _graph) { + const auto& allNodes = _graph.allNodesRange(); + galois::do_all( + galois::iterate(allNodes.begin(), allNodes.end()), ResetGraph{&_graph}, + galois::no_stats(), + galois::loopname( + syncSubstrate->get_run_identifier("ResetGraph").c_str())); + } + + void operator()(GNode src) const { + NodeData& sdata = graph->getData(src); + sdata.value = 0; + sdata.nout = 0; + sdata.residual = 0; + sdata.delta = 0; + } +}; + +// Initialize residual at nodes with outgoing edges + find nout for +// nodes with outgoing edges +struct InitializeGraph { + const float& local_alpha; + Graph* graph; + + InitializeGraph(const float& _alpha, Graph* _graph) + : local_alpha(_alpha), graph(_graph) {} + + static void go(Graph& _graph) { + // first initialize all fields to 0 via ResetGraph (can't assume all zero + // at start) + ResetGraph::go(_graph); + + const auto& nodesWithEdges = _graph.allNodesRange(); + + // regular do all without stealing; just initialization of nodes with + // outgoing edges + galois::do_all( + galois::iterate(nodesWithEdges.begin(), nodesWithEdges.end()), + InitializeGraph{alpha, &_graph}, galois::steal(), galois::no_stats(), + galois::loopname( + syncSubstrate->get_run_identifier("InitializeGraph").c_str())); + + syncSubstrate->sync( + "InitializeGraphNout"); + } + + void operator()(GNode src) const { + NodeData& sdata = graph->getData(src); + sdata.residual = local_alpha; + uint32_t num_edges = + std::distance(graph->edge_begin(src), graph->edge_end(src)); + galois::atomicAdd(sdata.nout, num_edges); + bitset_nout.set(src); + } +}; + +struct PageRank_delta { + const float& local_alpha; + const float& local_tolerance; + Graph* graph; + + PageRank_delta(const float& _local_alpha, const float& _local_tolerance, + Graph* _graph) + : local_alpha(_local_alpha), local_tolerance(_local_tolerance), + graph(_graph) {} + + static void go(Graph& _graph) { + const auto& nodesWithEdges = _graph.allNodesRange(); + + galois::do_all( + galois::iterate(nodesWithEdges.begin(), nodesWithEdges.end()), + PageRank_delta{alpha, tolerance, &_graph}, galois::no_stats(), + galois::loopname( + syncSubstrate->get_run_identifier("PageRank_delta").c_str())); + } + + void operator()(WorkItem src) const { + NodeData& sdata = graph->getData(src); + + if (sdata.residual > 0) { + float residual_old = sdata.residual; + sdata.residual = 0; + sdata.value += residual_old; + if (residual_old > this->local_tolerance) { + if (sdata.nout > 0) { + sdata.delta = residual_old * (1 - local_alpha) / sdata.nout; + } + } + } + } +}; + +template +struct PageRank { + Graph* graph; + using DGTerminatorDetector = + typename std::conditional, + galois::DGAccumulator>::type; + + DGTerminatorDetector& active_vertices; + + PageRank(Graph* _g, DGTerminatorDetector& _dga) + : graph(_g), active_vertices(_dga) {} + + static void go(Graph& _graph) { + unsigned _num_iterations = 0; + const auto& nodesWithEdges = _graph.allNodesRange(); + DGTerminatorDetector dga; + + do { + syncSubstrate->set_num_round(_num_iterations); + PageRank_delta::go(_graph); + dga.reset(); + // reset residual on mirrors + syncSubstrate->reset_mirrorField(); + + galois::do_all( + galois::iterate(nodesWithEdges), PageRank{&_graph, dga}, + galois::no_stats(), galois::steal(), + galois::loopname( + syncSubstrate->get_run_identifier("PageRank").c_str())); + + syncSubstrate->sync("PageRank"); + + galois::runtime::reportStat_Tsum( + "PAGERANK", "NumWorkItems_" + (syncSubstrate->get_run_identifier()), + (uint64_t)dga.read_local()); + + ++_num_iterations; + } while ((async || (_num_iterations < maxIterations)) && + dga.reduce(syncSubstrate->get_run_identifier())); + + if (galois::runtime::getSystemNetworkInterface().ID == 0) { + galois::runtime::reportStat_Single( + "PAGERANK", + "NumIterations_" + std::to_string(syncSubstrate->get_run_num()), + (uint64_t)_num_iterations); + } + } + + void operator()(WorkItem src) const { + NodeData& sdata = graph->getData(src); + if (sdata.delta > 0) { + float _delta = sdata.delta; + sdata.delta = 0; + + active_vertices += 1; // this should be moved to Pagerank_delta operator + + for (auto nbr : graph->edges(src)) { + GNode dst = graph->getEdgeDst(nbr); + NodeData& ddata = graph->getData(dst); + + galois::atomicAdd(ddata.residual, _delta); + + bitset_residual.set(dst); + } + } + } +}; + +/******************************************************************************/ +/* Sanity check operators */ +/******************************************************************************/ + +// Gets various values from the pageranks values/residuals of the graph +struct PageRankSanity { + const float& local_tolerance; + Graph* graph; + + galois::DGAccumulator& DGAccumulator_sum; + galois::DGAccumulator& DGAccumulator_sum_residual; + galois::DGAccumulator& DGAccumulator_residual_over_tolerance; + + galois::DGReduceMax& max_value; + galois::DGReduceMin& min_value; + galois::DGReduceMax& max_residual; + galois::DGReduceMin& min_residual; + + PageRankSanity( + const float& _local_tolerance, Graph* _graph, + galois::DGAccumulator& _DGAccumulator_sum, + galois::DGAccumulator& _DGAccumulator_sum_residual, + galois::DGAccumulator& _DGAccumulator_residual_over_tolerance, + galois::DGReduceMax& _max_value, + galois::DGReduceMin& _min_value, + galois::DGReduceMax& _max_residual, + galois::DGReduceMin& _min_residual) + : local_tolerance(_local_tolerance), graph(_graph), + DGAccumulator_sum(_DGAccumulator_sum), + DGAccumulator_sum_residual(_DGAccumulator_sum_residual), + DGAccumulator_residual_over_tolerance( + _DGAccumulator_residual_over_tolerance), + max_value(_max_value), min_value(_min_value), + max_residual(_max_residual), min_residual(_min_residual) {} + + static void go(Graph& _graph, galois::DGAccumulator& DGA_sum, + galois::DGAccumulator& DGA_sum_residual, + galois::DGAccumulator& DGA_residual_over_tolerance, + galois::DGReduceMax& max_value, + galois::DGReduceMin& min_value, + galois::DGReduceMax& max_residual, + galois::DGReduceMin& min_residual) { + DGA_sum.reset(); + DGA_sum_residual.reset(); + max_value.reset(); + max_residual.reset(); + min_value.reset(); + min_residual.reset(); + DGA_residual_over_tolerance.reset(); + + galois::do_all(galois::iterate(_graph.masterNodesRange().begin(), + _graph.masterNodesRange().end()), + PageRankSanity(tolerance, &_graph, DGA_sum, DGA_sum_residual, + DGA_residual_over_tolerance, max_value, + min_value, max_residual, min_residual), + galois::no_stats(), galois::loopname("PageRankSanity")); + + float max_rank = max_value.reduce(); + float min_rank = min_value.reduce(); + float rank_sum = DGA_sum.reduce(); + float residual_sum = DGA_sum_residual.reduce(); + uint64_t over_tolerance = DGA_residual_over_tolerance.reduce(); + float max_res = max_residual.reduce(); + float min_res = min_residual.reduce(); + + // Only node 0 will print data + if (galois::runtime::getSystemNetworkInterface().ID == 0) { + galois::gPrint("Max rank is ", max_rank, "\n"); + galois::gPrint("Min rank is ", min_rank, "\n"); + galois::gPrint("Rank sum is ", rank_sum, "\n"); + galois::gPrint("Residual sum is ", residual_sum, "\n"); + galois::gPrint("# nodes with residual over ", tolerance, + " (tolerance) is ", over_tolerance, "\n"); + galois::gPrint("Max residual is ", max_res, "\n"); + galois::gPrint("Min residual is ", min_res, "\n"); + } + } + + /* Gets the max, min rank from all owned nodes and + * also the sum of ranks */ + void operator()(GNode src) const { + NodeData& sdata = graph->getData(src); + + max_value.update(sdata.value); + min_value.update(sdata.value); + max_residual.update(sdata.residual); + min_residual.update(sdata.residual); + + DGAccumulator_sum += sdata.value; + DGAccumulator_sum_residual += sdata.residual; + + if (sdata.residual > local_tolerance) { + DGAccumulator_residual_over_tolerance += 1; + } + } +}; + +const char* elGetOne(const char* line, std::uint64_t& val) { + bool found = false; + val = 0; + char c; + while ((c = *line++) != '\0' && isspace(c)) { + } + do { + if (isdigit(c)) { + found = true; + val *= 10; + val += (c - '0'); + } else if (c == '_') { + continue; + } else { + break; + } + } while ((c = *line++) != '\0' && !isspace(c)); + if (!found) + val = UINT64_MAX; + return line; +} + +void parser(const char* line, Graph& hg, + std::vector>& delta_mirrors, + std::unordered_set& mirrors) { + uint64_t src, dst; + line = elGetOne(line, src); + line = elGetOne(line, dst); + if ((hg.isOwned(src)) && (!hg.isLocal(dst))) { + uint32_t h = hg.getHostID(dst); + if (mirrors.find(dst) == mirrors.end()) { + mirrors.insert(dst); + delta_mirrors[h].push_back(dst); + } + } +} + +std::vector> +genMirrorNodes(Graph& hg, std::string filename, int batch) { + auto& net = galois::runtime::getSystemNetworkInterface(); + std::vector> delta_mirrors(net.Num); + std::unordered_set mirrors; + + for (uint32_t i = 0; i < net.Num; i++) { + std::string dynamicFile = filename + "_batch" + std::to_string(batch) + + "_host" + std::to_string(i) + ".el"; + std::ifstream file(dynamicFile); + std::string line; + while (std::getline(file, line)) { + parser(line.c_str(), hg, delta_mirrors, mirrors); + } + } + return delta_mirrors; +} + +void PrintMasterMirrorNodes(Graph& hg, uint64_t id) { + std::cout << "Master nodes on host " << id << std::endl; + for (auto node : hg.masterNodesRange()) { + std::cout << hg.getGID(node) << " "; + } + std::cout << std::endl; + std::cout << "Mirror nodes on host " << id << std::endl; + auto mirrors = hg.getMirrorNodes(); + for (auto vec : mirrors) { + for (auto node : vec) { + std::cout << node << " "; + } + } + std::cout << std::endl; +} + +int main(int argc, char* argv[]) { + std::string filename; + uint64_t numVertices; + uint64_t num_batches; + po::options_description desc("Allowed options"); + desc.add_options()("help", "print help info")( + "staticFile", po::value(&filename)->required(), + "Input file for initial static graph")( + "numVertices", po::value(&numVertices)->required(), + "Number of total vertices")("numBatches", + po::value(&num_batches)->required(), + "Number of Batches"); + + po::variables_map vm; + try { + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + + galois::DistMemSys G; + + std::unique_ptr hg; + + hg = distLocalGraphInitialization(filename, numVertices); + syncSubstrate = gluonInitialization(hg); + + if (hg == nullptr || syncSubstrate == nullptr) { + std::cerr << "Initialization failed."; + return 1; + } + + galois::runtime::getHostBarrier().wait(); + + ELGraph* wg = dynamic_cast(hg.get()); + + auto& net = galois::runtime::getSystemNetworkInterface(); + for (int i = 0; i < num_batches; i++) { + std::vector edit_files; + std::string dynFile = "edits"; + std::string dynamicFile = dynFile + "_batch" + std::to_string(i) + "_host" + + std::to_string(net.ID) + ".el"; + edit_files.emplace_back(dynamicFile); + // IMPORTANT: CAll genMirrorNodes before creating the + // graphUpdateManager!!!!!!!! + std::vector> delta_mirrors = + genMirrorNodes(*hg, dynFile, i); + galois::runtime::getHostBarrier().wait(); + graphUpdateManager + GUM(std::make_unique>( + 1, edit_files), + 100, wg); + GUM.update(); + galois::runtime::getHostBarrier().wait(); + + syncSubstrate->addDeltaMirrors(delta_mirrors); + galois::runtime::getHostBarrier().wait(); + delta_mirrors.clear(); + + bitset_residual.resize(hg->size()); + bitset_nout.resize(hg->size()); + + InitializeGraph::go((*hg)); + galois::runtime::getHostBarrier().wait(); + + galois::DGAccumulator DGA_sum; + galois::DGAccumulator DGA_sum_residual; + galois::DGAccumulator DGA_residual_over_tolerance; + galois::DGReduceMax max_value; + galois::DGReduceMin min_value; + galois::DGReduceMax max_residual; + galois::DGReduceMin min_residual; + + PageRank::go(*hg); + + // sanity check + PageRankSanity::go(*hg, DGA_sum, DGA_sum_residual, + DGA_residual_over_tolerance, max_value, min_value, + max_residual, min_residual); + + if ((i + 1) != num_batches) { + bitset_residual.reset(); + bitset_nout.reset(); + + (*syncSubstrate).set_num_run(i + 1); + galois::runtime::getHostBarrier().wait(); + } + } + + return 0; +} diff --git a/pagerank/pagerank_push_sync.hh b/pagerank/pagerank_push_sync.hh new file mode 100644 index 0000000..14aa1d4 --- /dev/null +++ b/pagerank/pagerank_push_sync.hh @@ -0,0 +1,31 @@ +/* + * This file belongs to the Galois project, a C++ library for exploiting + * parallelism. The code is being released under the terms of the 3-Clause BSD + * License (a copy is located in LICENSE.txt at the top-level directory). + * + * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. + * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS + * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF + * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF + * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH + * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances + * shall University be liable for incidental, special, indirect, direct or + * consequential damages or loss of profits, interruption of business, or + * related expenses which may arise from use of Software or Documentation, + * including but not limited to those resulting from defects in Software and/or + * Documentation, or loss or inaccuracy of data of any kind. + */ +#ifndef GRAPH_LOG_SKETCH_PAGERANK_PAGERANK_PUSH_SYNC_HH_ +#define GRAPH_LOG_SKETCH_PAGERANK_PAGERANK_PUSH_SYNC_HH_ + +#include "galois/runtime/SyncStructures.h" + +GALOIS_SYNC_STRUCTURE_REDUCE_ADD(nout, unsigned int); +GALOIS_SYNC_STRUCTURE_BITSET(nout); + +GALOIS_SYNC_STRUCTURE_REDUCE_SET(residual, float); +GALOIS_SYNC_STRUCTURE_REDUCE_ADD(residual, float); +GALOIS_SYNC_STRUCTURE_BITSET(residual); + +#endif // GRAPH_LOG_SKETCH_PAGERANK_PAGERANK_PUSH_SYNC_HH_ diff --git a/scripts/create_batches_for_multiple_hosts.py b/scripts/create_batches_for_multiple_hosts.py new file mode 100644 index 0000000..8bd4eb3 --- /dev/null +++ b/scripts/create_batches_for_multiple_hosts.py @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: BSD-2-Clause +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +import argparse +import random + +def distribute_edges(file_path, out_path, num_batches, num_hosts): + with open(file_path, 'r') as file: + edges = file.readlines() + + random.shuffle(edges) + + total_parts = num_batches * num_hosts + total_edges = len(edges) + part_size = total_edges // total_parts + + remainder = total_edges % total_parts + + current_edge = 0 + max_batch_size = 0 + for i in range(num_batches): + for j in range(num_hosts): + this_part_size = part_size + (1 if remainder > 0 else 0) + remainder -= 1 + + part_edges = edges[current_edge:current_edge + this_part_size] + current_edge += this_part_size + + if len(part_edges) > max_batch_size: + max_batch_size = len(part_edges) + + filename = f"{out_path}/edits_batch{i}_host{j}.el" + with open(filename, 'w') as outfile: + outfile.writelines(part_edges) + + print(f"Maximum batch size: {max_batch_size}") + +def main(): + parser = argparse.ArgumentParser(description="Distribute edge list into batches for hosts and print max batch size.") + parser.add_argument('file_path', type=str, help="The path to the edge list file.") + parser.add_argument('out_path', type=str, help="The path to the edge list file.") + parser.add_argument('num_batches', type=int, help="The number of batches.") + parser.add_argument('num_hosts', type=int, help="The number of hosts.") + + args = parser.parse_args() + + distribute_edges(args.file_path, args.out_path, args.num_batches, args.num_hosts) + +if __name__ == "__main__": + main() diff --git a/scripts/dist-exps/e2e_bfs-pull_test/adj/1/bfs-pull-0_0.out b/scripts/dist-exps/e2e_bfs-pull_test/adj/1/bfs-pull-0_0.out new file mode 100644 index 0000000..16f150e --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/adj/1/bfs-pull-0_0.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 3317062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/adj/1/bfs-pull-0_1.out b/scripts/dist-exps/e2e_bfs-pull_test/adj/1/bfs-pull-0_1.out new file mode 100644 index 0000000..87b11d5 --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/adj/1/bfs-pull-0_1.out @@ -0,0 +1,8 @@ +Benchmark results for bfs-pull: +Duration: 3297970 nanoseconds +Max RSS: 243780 KB +Cache Misses: 316 +Cache References: 55377 +Instructions: 12524946 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/adj/1/bfs-pull-0_2.out b/scripts/dist-exps/e2e_bfs-pull_test/adj/1/bfs-pull-0_2.out new file mode 100644 index 0000000..3980cf6 --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/adj/1/bfs-pull-0_2.out @@ -0,0 +1,8 @@ +Benchmark results for bfs-pull: +Duration: 3342929 nanoseconds +Max RSS: 243780 KB +Cache Misses: 232 +Cache References: 55329 +Instructions: 12527067 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/adj/1/total_e2e.out b/scripts/dist-exps/e2e_bfs-pull_test/adj/1/total_e2e.out new file mode 100644 index 0000000..796506a --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/adj/1/total_e2e.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 5317062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/adj/2/total_e2e.out b/scripts/dist-exps/e2e_bfs-pull_test/adj/2/total_e2e.out new file mode 100644 index 0000000..16f150e --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/adj/2/total_e2e.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 3317062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/adj/4/total_e2e.out b/scripts/dist-exps/e2e_bfs-pull_test/adj/4/total_e2e.out new file mode 100644 index 0000000..77495ab --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/adj/4/total_e2e.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 817062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/lccsr/1/bfs-pull-0_0.out b/scripts/dist-exps/e2e_bfs-pull_test/lccsr/1/bfs-pull-0_0.out new file mode 100644 index 0000000..16f150e --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/lccsr/1/bfs-pull-0_0.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 3317062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/lccsr/1/bfs-pull-0_1.out b/scripts/dist-exps/e2e_bfs-pull_test/lccsr/1/bfs-pull-0_1.out new file mode 100644 index 0000000..87b11d5 --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/lccsr/1/bfs-pull-0_1.out @@ -0,0 +1,8 @@ +Benchmark results for bfs-pull: +Duration: 3297970 nanoseconds +Max RSS: 243780 KB +Cache Misses: 316 +Cache References: 55377 +Instructions: 12524946 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/lccsr/1/bfs-pull-0_2.out b/scripts/dist-exps/e2e_bfs-pull_test/lccsr/1/bfs-pull-0_2.out new file mode 100644 index 0000000..3980cf6 --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/lccsr/1/bfs-pull-0_2.out @@ -0,0 +1,8 @@ +Benchmark results for bfs-pull: +Duration: 3342929 nanoseconds +Max RSS: 243780 KB +Cache Misses: 232 +Cache References: 55329 +Instructions: 12527067 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/lccsr/1/total_e2e.out b/scripts/dist-exps/e2e_bfs-pull_test/lccsr/1/total_e2e.out new file mode 100644 index 0000000..8974a76 --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/lccsr/1/total_e2e.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 4317062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/lccsr/2/total_e2e.out b/scripts/dist-exps/e2e_bfs-pull_test/lccsr/2/total_e2e.out new file mode 100644 index 0000000..3da0158 --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/lccsr/2/total_e2e.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 2317062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/lccsr/4/total_e2e.out b/scripts/dist-exps/e2e_bfs-pull_test/lccsr/4/total_e2e.out new file mode 100644 index 0000000..8d80072 --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/lccsr/4/total_e2e.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 717062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/lscsr/1/bfs-pull-0_0.out b/scripts/dist-exps/e2e_bfs-pull_test/lscsr/1/bfs-pull-0_0.out new file mode 100644 index 0000000..16f150e --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/lscsr/1/bfs-pull-0_0.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 3317062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/lscsr/1/bfs-pull-0_1.out b/scripts/dist-exps/e2e_bfs-pull_test/lscsr/1/bfs-pull-0_1.out new file mode 100644 index 0000000..87b11d5 --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/lscsr/1/bfs-pull-0_1.out @@ -0,0 +1,8 @@ +Benchmark results for bfs-pull: +Duration: 3297970 nanoseconds +Max RSS: 243780 KB +Cache Misses: 316 +Cache References: 55377 +Instructions: 12524946 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/lscsr/1/bfs-pull-0_2.out b/scripts/dist-exps/e2e_bfs-pull_test/lscsr/1/bfs-pull-0_2.out new file mode 100644 index 0000000..3980cf6 --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/lscsr/1/bfs-pull-0_2.out @@ -0,0 +1,8 @@ +Benchmark results for bfs-pull: +Duration: 3342929 nanoseconds +Max RSS: 243780 KB +Cache Misses: 232 +Cache References: 55329 +Instructions: 12527067 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/lscsr/1/total_e2e.out b/scripts/dist-exps/e2e_bfs-pull_test/lscsr/1/total_e2e.out new file mode 100644 index 0000000..16f150e --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/lscsr/1/total_e2e.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 3317062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/lscsr/2/total_e2e.out b/scripts/dist-exps/e2e_bfs-pull_test/lscsr/2/total_e2e.out new file mode 100644 index 0000000..1f23c07 --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/lscsr/2/total_e2e.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 1317062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/e2e_bfs-pull_test/lscsr/4/total_e2e.out b/scripts/dist-exps/e2e_bfs-pull_test/lscsr/4/total_e2e.out new file mode 100644 index 0000000..23af520 --- /dev/null +++ b/scripts/dist-exps/e2e_bfs-pull_test/lscsr/4/total_e2e.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 617062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ls6_graphtype_test_scripts/batch_ls6_all.sh b/scripts/dist-exps/ls6_graphtype_test_scripts/batch_ls6_all.sh new file mode 100755 index 0000000..603bfb6 --- /dev/null +++ b/scripts/dist-exps/ls6_graphtype_test_scripts/batch_ls6_all.sh @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: BSD-2-Clause +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +EXECS=("bfs") + +# Format: list("#-of-hosts,time") + +#SET="4,03:00:00 8,03:00:00 16,03:00:00 32,03:00:00" +#SET="16,00:30:00" +SET="1,00:10:00" +#SET="4,02:30:00 8,02:30:00 16,02:30:00" +#SET="4,00:40:00 8,00:40:00 16,00:40:00 32,00:40:00" +#SET="16,05:00:00" +#SET=" 16,02:00:00 32,02:00:00" +#SET="4,01:00:00 8,01:00:00" +#SET="4,01:00:00 8,01:00:00 16,01:00:00 32,01:00:00 64,01:00:00" +#SET="4,01:00:00 32,01:00:00" +#SET="8,01:00:00" +#SET="32,02:00:00" +#SET="32,01:00:00" +#SET="16,01:10:00 32,01:10:00" + +# Format: (input-graph;\"${SET}\"") + +INPUTS=("chain.el;\"${SET}\"" "chain2.el;\"${SET}\"") + +QUEUE=normal + +GRAPH_TYPES=("lscsr" "adj") + +for j in "${INPUTS[@]}"; do + IFS=";" + set $j + for i in "${EXECS[@]}"; do + for g in "${GRAPH_TYPES[@]}"; do + echo "./run_stampede_all.sh ${i} ${1} ${2} $QUEUE $a $g" + ./run_ls6_all.sh ${i} ${1} ${2} $QUEUE $a $g |& tee -a jobs + done + done +done diff --git a/scripts/dist-exps/ls6_graphtype_test_scripts/run_ls6.sbatch b/scripts/dist-exps/ls6_graphtype_test_scripts/run_ls6.sbatch new file mode 100755 index 0000000..bbefc93 --- /dev/null +++ b/scripts/dist-exps/ls6_graphtype_test_scripts/run_ls6.sbatch @@ -0,0 +1,56 @@ +#!/bin/bash +#SBATCH -J bfs_chain2.el_1 +#SBATCH -o bfs_chain2.el_1_%j.out +#SBATCH -N 1 -n 1 +#SBATCH -p normal +#SBATCH -t 00:10:00 +#SBATCH --mail-user=hochan@utexas.edu +#SBATCH --mail-type=begin +#SBATCH --mail-type=fail +#SBATCH --mail-type=end +#SBATCH -A ASC23036 + +EXEC_NAME=${1} +INPUT=${2} +NUM_HOSTS=${3} +THREADS=${4} +GRAPH_TYPE=${5} + +execdir=$(pwd) +EXEC=${execdir}/bin/${EXEC_NAME} +OUTPUT=${execdir}/${EXEC_NAME} + +# Create directory structures +E2E_OUT_DIR="e2e_${EXEC_NAME}_${INPUT}/${GRAPH_TYPE}/${NUM_HOSTS}" +mkdir -p $E2E_OUT_DIR + +outname=${EXEC_NAME}_${INPUT}_${NUM_HOSTS}_${SLURM_JOB_ID}.out +basename=${EXEC_NAME}_${INPUT}_${SLURM_NNODES}_${SLURM_JOB_ID} +statname=outputs/${basename}.stats + +RUN=mpirun +mpirun --version + +set -x #echo on +echo $RUN --bind-to none -np $NUM_HOSTS $EXEC $INPUT 0 10 +$RUN --bind-to none -np $NUM_HOSTS $EXEC $INPUT 0 10 + +mv total_e2e_*.out $E2E_OUT_DIR + +set +x #echo off + +# give permissions to output files +#chmod 664 ${outname} +#chmod 664 $statname + +succeed=$(ls ${statname}.* 2>/dev/null | wc -l) +if [[ "$succeed" != "0" ]]; then + mv ${outname} outputs +else + mv ${outname} fails +fi + +echo "Algorithm: " $EXEC_NAME +echo "Input: " $INPUT +echo "Number of nodes: " $SLURM_NNODES +comment diff --git a/scripts/dist-exps/ls6_graphtype_test_scripts/run_ls6.template.sbatch b/scripts/dist-exps/ls6_graphtype_test_scripts/run_ls6.template.sbatch new file mode 100755 index 0000000..a8b434b --- /dev/null +++ b/scripts/dist-exps/ls6_graphtype_test_scripts/run_ls6.template.sbatch @@ -0,0 +1,39 @@ +#!/bin/bash +#SBATCH --mail-user=hochan@utexas.edu +#SBATCH --mail-type=begin +#SBATCH --mail-type=fail +#SBATCH --mail-type=end +#SBATCH -A ASC23036 + +EXEC_NAME=${1} +INPUT=${2} +NUM_HOSTS=${3} +THREADS=${4} +GRAPH_TYPE=${5} + +execdir=$(pwd) +EXEC=${execdir}/bin/${EXEC_NAME} +OUTPUT=${execdir}/${EXEC_NAME} + +# Create directory structures +E2E_OUT_DIR="e2e_${EXEC_NAME}_${INPUT}/${GRAPH_TYPE}/${NUM_HOSTS}" +mkdir -p $E2E_OUT_DIR + +outname=${EXEC_NAME}_${INPUT}_${NUM_HOSTS}_${SLURM_JOB_ID}.out +basename=${EXEC_NAME}_${INPUT}_${SLURM_NNODES}_${SLURM_JOB_ID} +statname=outputs/${basename}.stats + +RUN=mpirun +mpirun --version + +set -x #echo on +echo $RUN --bind-to none -np $NUM_HOSTS $EXEC $INPUT 0 10 +$RUN --bind-to none -np $NUM_HOSTS $EXEC $INPUT 0 10 + +mv total_e2e_*.out $E2E_OUT_DIR + +set +x #echo off + +echo "Algorithm: " $EXEC_NAME +echo "Input: " $INPUT +echo "Number of nodes: " $SLURM_NNODES diff --git a/scripts/dist-exps/ls6_graphtype_test_scripts/run_ls6_all.sh b/scripts/dist-exps/ls6_graphtype_test_scripts/run_ls6_all.sh new file mode 100755 index 0000000..8655a07 --- /dev/null +++ b/scripts/dist-exps/ls6_graphtype_test_scripts/run_ls6_all.sh @@ -0,0 +1,40 @@ +#!/bin/sh + +# SPDX-License-Identifier: BSD-2-Clause +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +EXEC=$1 +INPUT=$2 +SET=$3 # hosts + time +QUEUE=$4 +GRAPH_TYPE=$5 + +echo EXEC:" $EXEC " INPUT:" $INPUT " SET:"$SET " QUEUE:"$QUEUE" GRAPH_TYPE:" $GRAPH_TYPE + +# Remove " from the tail +SET="${SET%\"}" +# Remove " from the head +SET="${SET#\"}" + +for task in $SET; do + IFS="," + set $task + cp run_ls6.template.sbatch run_ls6.sbatch + + sed -i "2i#SBATCH -t $2" run_ls6.sbatch + sed -i "2i#SBATCH -p $QUEUE" run_ls6.sbatch + sed -i "2i#SBATCH -N $1 -n $1" run_ls6.sbatch + #sed -i "2i#SBATCH --ntasks-per-node 1" run_ls6.sbatch + sed -i "2i#SBATCH -o ${EXEC}_${INPUT}_${1}_%j.out" run_ls6.sbatch + sed -i "2i#SBATCH -J ${EXEC}_${INPUT}_${1}" run_ls6.sbatch + threads=48 + + if [[ $QUEUE == "normal" || $QUEUE == "development" || $QUEUE == "large" || $QUEUE == "long" ]]; then + threads=272 + fi + + echo "CPU-only " $EXEC $INPUT $1 $threads ${EXEC}_${INPUT}_${1}_${GRAPH_TYPE} + sbatch run_ls6.sbatch $EXEC $INPUT $1 $threads ${GRAPH_TYPE} + # source run_ls6.sbatch $EXEC $INPUT $PART $1 $threads ${GRAPH_TYPE} + #rm run_ls6.sbatch +done diff --git a/scripts/dist-exps/ls6_ppolicy_test_scripts/batch_ls6_all.sh b/scripts/dist-exps/ls6_ppolicy_test_scripts/batch_ls6_all.sh new file mode 100755 index 0000000..973b82a --- /dev/null +++ b/scripts/dist-exps/ls6_ppolicy_test_scripts/batch_ls6_all.sh @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: BSD-2-Clause +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +EXECS=("connected-components-cli-dist") + +# Format: list("#-of-hosts,time") + +#SET="4,03:00:00 8,03:00:00 16,03:00:00 32,03:00:00" +#SET="16,00:30:00" +SET="2,01:30:00" +#SET="4,02:30:00 8,02:30:00 16,02:30:00" +#SET="4,00:40:00 8,00:40:00 16,00:40:00 32,00:40:00" +#SET="16,05:00:00" +#SET=" 16,02:00:00 32,02:00:00" +#SET="4,01:00:00 8,01:00:00" +#SET="4,01:00:00 8,01:00:00 16,01:00:00 32,01:00:00 64,01:00:00" +#SET="4,01:00:00 32,01:00:00" +#SET="8,01:00:00" +#SET="32,02:00:00" +#SET="32,01:00:00" +#SET="16,01:10:00 32,01:10:00" + +# Format: (input-graph;\"${SET}\"") + +INPUTS=("twitter40;\"${SET}\"") + +QUEUE=skx-normal +#QUEUE=skx-large +#QUEUE=skx-dev +#QUEUE=normal +#QUEUE=development#PARTS=( "oec" "cvc" ) +#PARTS=( "oec" "cvc" ) +#PARTS=( "oec" "cvc" ) +#PARTS=( "oec" ) +#PARTS=( "cvc" ) + +#PARTS=( "oec" "cvc" ) +#PARTS=( "blocked-oec" ) +PARTS=("oec" "divija" "cvc") +#PARTS=( "random-oec" ) +#PARTS=( "random-oec" ) +#PARTS=( "random-cvc" ) + +for j in "${INPUTS[@]}"; do + IFS=";" + set $j + for i in "${EXECS[@]}"; do + for p in "${PARTS[@]}"; do + echo "./run_stampede_all.sh ${i} ${1} ${2} $QUEUE $p $a" + ./run_ls6_all.sh ${i} ${1} ${2} $QUEUE $p $a |& tee -a jobs + done + done +done diff --git a/scripts/dist-exps/ls6_ppolicy_test_scripts/run_ls6.sbatch b/scripts/dist-exps/ls6_ppolicy_test_scripts/run_ls6.sbatch new file mode 100755 index 0000000..2e77cb7 --- /dev/null +++ b/scripts/dist-exps/ls6_ppolicy_test_scripts/run_ls6.sbatch @@ -0,0 +1,58 @@ +#!/bin/bash +#SBATCH -J connected-components-cli-dist_twitter40_cvc_2 +#SBATCH -o connected-components-cli-dist_twitter40_cvc_2_%j.out +#SBATCH -N 2 -n 2 +#SBATCH -p skx-normal +#SBATCH -t 01:30:00 +#SBATCH --mail-user=hochan@utexas.edu +#SBATCH --mail-type=begin +#SBATCH --mail-type=fail +#SBATCH --mail-type=end + +EXEC_NAME=${1} +INPUT=${2} +PART=${3} +NUM_HOSTS=${4} +THREADS=${5} + +execdir=$(pwd) +EXEC=${execdir}/binaries/${EXEC_NAME} +OUTPUT=${execdir}/${EXEC_NAME} + +# Create directory structures +PPOLICY_OUT_DIR="${EXEC_NAME}_${INPUT}/${PART}/${NUM_HOSTS}" +mkdir -p $PPOLICY_OUT_DIR + +outname=${EXEC_NAME}_${INPUT}_${PART}_${NUM_HOSTS}_${SLURM_JOB_ID}.out +basename=${EXEC_NAME}_${INPUT}_${PART}_${SLURM_NNODES}_${SLURM_JOB_ID} +statname=outputs/${basename}.stats + +</dev/null | wc -l) +if [[ "$succeed" != "0" ]]; then + mv ${outname} outputs +else + mv ${outname} fails +fi + +echo "Algorithm: " $EXEC_NAME +echo "Input: " $INPUT +echo "Number of nodes: " $SLURM_NNODES +comment diff --git a/scripts/dist-exps/ls6_ppolicy_test_scripts/run_ls6.template.sbatch b/scripts/dist-exps/ls6_ppolicy_test_scripts/run_ls6.template.sbatch new file mode 100755 index 0000000..a8d90e6 --- /dev/null +++ b/scripts/dist-exps/ls6_ppolicy_test_scripts/run_ls6.template.sbatch @@ -0,0 +1,23 @@ +#!/bin/bash +#SBATCH --mail-user=hochan@utexas.edu +#SBATCH --mail-type=begin +#SBATCH --mail-type=fail +#SBATCH --mail-type=end + +EXEC_NAME=${1} +INPUT=${2} +PART=${3} +NUM_HOSTS=${4} +THREADS=${5} + +execdir=$(pwd) +EXEC=${execdir}/binaries/${EXEC_NAME} +OUTPUT=${execdir}/${EXEC_NAME} + +# Create directory structures +PPOLICY_OUT_DIR="${EXEC_NAME}_${INPUT}/${PART}/${NUM_HOSTS}" +mkdir -p $PPOLICY_OUT_DIR + +outname=${EXEC_NAME}_${INPUT}_${PART}_${NUM_HOSTS}_${SLURM_JOB_ID}.out +basename=${EXEC_NAME}_${INPUT}_${PART}_${SLURM_NNODES}_${SLURM_JOB_ID} +statname=outputs/${basename}.stats diff --git a/scripts/dist-exps/ls6_ppolicy_test_scripts/run_ls6_all.sh b/scripts/dist-exps/ls6_ppolicy_test_scripts/run_ls6_all.sh new file mode 100755 index 0000000..b8b5f59 --- /dev/null +++ b/scripts/dist-exps/ls6_ppolicy_test_scripts/run_ls6_all.sh @@ -0,0 +1,40 @@ +#!/bin/sh + +# SPDX-License-Identifier: BSD-2-Clause +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +EXEC=$1 +INPUT=$2 +SET=$3 # hosts + time +QUEUE=$4 +PART=$5 + +echo EXEC:" $EXEC " INPUT:" $INPUT " SET:"$SET " QUEUE:"$QUEUE " PART:"$PART + +# Remove " from the tail +SET="${SET%\"}" +# Remove " from the head +SET="${SET#\"}" + +for task in $SET; do + IFS="," + set $task + cp run_ls6.template.sbatch run_ls6.sbatch + + sed -i "2i#SBATCH -t $2" run_ls6.sbatch + sed -i "2i#SBATCH -p $QUEUE" run_ls6.sbatch + sed -i "2i#SBATCH -N $1 -n $1" run_ls6.sbatch + #sed -i "2i#SBATCH --ntasks-per-node 1" run_ls6.sbatch + sed -i "2i#SBATCH -o ${EXEC}_${INPUT}_${PART}_${1}_%j.out" run_ls6.sbatch + sed -i "2i#SBATCH -J ${EXEC}_${INPUT}_${PART}_${1}" run_ls6.sbatch + threads=48 + + if [[ $QUEUE == "normal" || $QUEUE == "development" || $QUEUE == "large" || $QUEUE == "long" ]]; then + threads=272 + fi + + echo "CPU-only " $EXEC $INPUT $PART $1 $threads ${EXEC}_${INPUT}_${PART}_${1} + # sbatch run_ls6.sbatch $EXEC $INPUT $PART $1 $threads ${GRAPH_TYPE} + source run_ls6.sbatch $EXEC $INPUT $PART $1 $threads + #rm run_ls6.sbatch +done diff --git a/scripts/dist-exps/perhost_partitioning_csv.py b/scripts/dist-exps/perhost_partitioning_csv.py new file mode 100644 index 0000000..9ad4e52 --- /dev/null +++ b/scripts/dist-exps/perhost_partitioning_csv.py @@ -0,0 +1,127 @@ +# SPDX-License-Identifier: BSD-2-Clause +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +import subprocess +import re +import matplotlib.pyplot as plt +import numpy as np +import csv +import argparse +import os + + +def parse(log_path: str, algo: str): + print(f"Export {log_path} on {algo} to csv..") + command = f"cat {log_path}" + + process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + stdout, stderr = process.communicate() + + output = stdout.decode() + + # Parse batch number and duration + ingestion_pattern = re.compile(rf"Benchmark {algo} ingestion results for batch (\d+):.*?Duration: (\d+) nanoseconds", re.DOTALL) + algorithm_pattern = re.compile(rf"Benchmark {algo} algorithm results for batch (\d+):.*?Duration: (\d+) nanoseconds", re.DOTALL) + + ingestion_durations = {} + algorithm_durations = {} + + ingestion_matches = re.findall(ingestion_pattern, output) + algorithm_matches = re.findall(algorithm_pattern, output) + + for batch, duration in ingestion_matches: + ingestion_durations[int(batch)] = int(duration) / 1000000 + + for batch, duration in algorithm_matches: + algorithm_durations[int(batch)] = int(duration) / 1000000 + + print(">> ingestion_durations:", ingestion_durations) + print(">> algorithm_durations:", algorithm_durations) + + return ingestion_durations, algorithm_durations + +def save_policy_to_csv(results, filepath): + with open(filepath, 'w', newline='') as csvfile: + fieldnames = ['Policy', 'Hosts', 'HostID', 'Ingestion_Duration', 'Algorithm_Duration'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + print("results:", results) + for policy in results.keys(): + for hosts in results[policy].keys(): + for host_id in range(int(hosts)): + data = results[policy][hosts][host_id] + ingestion_duration = data['ingestion'][0] if data['ingestion'] != {} else 0 + algorithm_duration = data['algorithm'][0] if data['algorithm'] != {} else 0 + print("ing:", ingestion_duration, " algo:", algorithm_duration) + writer.writerow({ + 'Policy': policy, + 'Hosts': hosts, + 'HostID': str(host_id), + 'Ingestion_Duration': ingestion_duration, + 'Algorithm_Duration': algorithm_duration + }) + +def main(): + """ + This script expects the following directory structure. + + [app_name]_[input_graph] + |_ [partitioning_policy1] + | |_ [num_hosts0] + | | |_ [host_0].out + | | |_ [host_1].out + | | ... + | | |_ [host_[num_hosts0-1]].out + | | + | |_ [num_hosts1] + | + |_ [partitioning_policy2] + ... + """ + parser = argparse.ArgumentParser(description='Run and plot benchmark results based on command line flags.') + # parser.add_argument('--ingest', action='store_true', help='Plot ingestion durations.') + parser.add_argument('--output', type=str, help='Output file path to plot.', default=None) + + args = parser.parse_args() + + output_path = args.output + if not output_path: + print("No output file path provided. Use --output.") + return + + # The output_path's last directory name format is [algo]_[input_graph] + fname = os.path.basename(os.path.normpath(output_path)) + fname_split = fname.split('_') + assert fname_split[0] == "ppolicy", " Output directory's prefix should be 'ppolicy'" + algo_name = fname_split[1] + input_name = fname_split[2] + + print("algo name:", algo_name, "input name: ", input_name) + perhost_comp_results = {} + for ppolicy in os.listdir(output_path): + perhost_comp_results[ppolicy] = {} + for hosts in os.listdir(output_path+"/"+ppolicy): + + perhost_comp_results[ppolicy][hosts] = {} + + # We have interest on e2e execution time per-host or/and all execution. + # The e2e execution file names are total_e2e.out and host[host number]_e2e.out. + # We do not parse per-batch results. + + for host_id in range(int(hosts)): + perhost_fname = f"{str(host_id)}.out" + log_path = output_path+"/"+ppolicy+"/"+hosts+"/"+perhost_fname + print("host_id:", host_id, " log path:", log_path) + ingestion_durations, algorithm_durations = parse(log_path, algo_name) + perhost_comp_results[ppolicy][hosts][host_id] = { + 'ingestion': ingestion_durations, + 'algorithm': algorithm_durations, + } + + print(perhost_comp_results) + save_policy_to_csv(perhost_comp_results, f"ppolicy_{algo_name}_{input_name}.csv") + + +if __name__ == "__main__": + main() diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/1/0.out b/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/1/0.out new file mode 100644 index 0000000..16f150e --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/1/0.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 3317062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/2/0.out b/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/2/0.out new file mode 100644 index 0000000..3da0158 --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/2/0.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 2317062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/2/1.out b/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/2/1.out new file mode 100644 index 0000000..f29a7a5 --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/2/1.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 2297970 nanoseconds +Max RSS: 243780 KB +Cache Misses: 316 +Cache References: 55377 +Instructions: 12524946 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/4/0.out b/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/4/0.out new file mode 100644 index 0000000..1f23c07 --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/4/0.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 1317062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/4/1.out b/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/4/1.out new file mode 100644 index 0000000..f548f92 --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/4/1.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 1297970 nanoseconds +Max RSS: 243780 KB +Cache Misses: 316 +Cache References: 55377 +Instructions: 12524946 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/4/2.out b/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/4/2.out new file mode 100644 index 0000000..f548f92 --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/4/2.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 1297970 nanoseconds +Max RSS: 243780 KB +Cache Misses: 316 +Cache References: 55377 +Instructions: 12524946 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/4/3.out b/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/4/3.out new file mode 100644 index 0000000..f548f92 --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/cvc/4/3.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 1297970 nanoseconds +Max RSS: 243780 KB +Cache Misses: 316 +Cache References: 55377 +Instructions: 12524946 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/divija/1/0.out b/scripts/dist-exps/ppolicy_bfs-pull_test/divija/1/0.out new file mode 100644 index 0000000..52d88a4 --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/divija/1/0.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 5417062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/divija/2/0.out b/scripts/dist-exps/ppolicy_bfs-pull_test/divija/2/0.out new file mode 100644 index 0000000..b2bd833 --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/divija/2/0.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 4417062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/divija/2/1.out b/scripts/dist-exps/ppolicy_bfs-pull_test/divija/2/1.out new file mode 100644 index 0000000..67285fc --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/divija/2/1.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 4397970 nanoseconds +Max RSS: 243780 KB +Cache Misses: 316 +Cache References: 55377 +Instructions: 12524946 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/divija/4/0.out b/scripts/dist-exps/ppolicy_bfs-pull_test/divija/4/0.out new file mode 100644 index 0000000..989a2b7 --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/divija/4/0.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 1417062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/divija/4/1.out b/scripts/dist-exps/ppolicy_bfs-pull_test/divija/4/1.out new file mode 100644 index 0000000..fdb53f4 --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/divija/4/1.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 1397970 nanoseconds +Max RSS: 243780 KB +Cache Misses: 316 +Cache References: 55377 +Instructions: 12524946 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/divija/4/2.out b/scripts/dist-exps/ppolicy_bfs-pull_test/divija/4/2.out new file mode 100644 index 0000000..3a8733c --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/divija/4/2.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 1497970 nanoseconds +Max RSS: 243780 KB +Cache Misses: 316 +Cache References: 55377 +Instructions: 12524946 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/divija/4/3.out b/scripts/dist-exps/ppolicy_bfs-pull_test/divija/4/3.out new file mode 100644 index 0000000..706d17e --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/divija/4/3.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 1997970 nanoseconds +Max RSS: 243780 KB +Cache Misses: 316 +Cache References: 55377 +Instructions: 12524946 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/oec/1/0.out b/scripts/dist-exps/ppolicy_bfs-pull_test/oec/1/0.out new file mode 100644 index 0000000..16f150e --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/oec/1/0.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 3317062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/oec/2/0.out b/scripts/dist-exps/ppolicy_bfs-pull_test/oec/2/0.out new file mode 100644 index 0000000..91f040e --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/oec/2/0.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 2017062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/oec/2/1.out b/scripts/dist-exps/ppolicy_bfs-pull_test/oec/2/1.out new file mode 100644 index 0000000..eaead0b --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/oec/2/1.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 2097970 nanoseconds +Max RSS: 243780 KB +Cache Misses: 316 +Cache References: 55377 +Instructions: 12524946 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/oec/4/0.out b/scripts/dist-exps/ppolicy_bfs-pull_test/oec/4/0.out new file mode 100644 index 0000000..a1f764b --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/oec/4/0.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 1017062 nanoseconds +Max RSS: 243780 KB +Cache Misses: 947 +Cache References: 57356 +Instructions: 12526376 +Minor Page Faults: 3 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/oec/4/1.out b/scripts/dist-exps/ppolicy_bfs-pull_test/oec/4/1.out new file mode 100644 index 0000000..c86fb47 --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/oec/4/1.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 1097970 nanoseconds +Max RSS: 243780 KB +Cache Misses: 316 +Cache References: 55377 +Instructions: 12524946 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/oec/4/2.out b/scripts/dist-exps/ppolicy_bfs-pull_test/oec/4/2.out new file mode 100644 index 0000000..c86fb47 --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/oec/4/2.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 1097970 nanoseconds +Max RSS: 243780 KB +Cache Misses: 316 +Cache References: 55377 +Instructions: 12524946 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_bfs-pull_test/oec/4/3.out b/scripts/dist-exps/ppolicy_bfs-pull_test/oec/4/3.out new file mode 100644 index 0000000..c86fb47 --- /dev/null +++ b/scripts/dist-exps/ppolicy_bfs-pull_test/oec/4/3.out @@ -0,0 +1,8 @@ +Benchmark bfs-pull algorithm results for batch 0: +Duration: 1097970 nanoseconds +Max RSS: 243780 KB +Cache Misses: 316 +Cache References: 55377 +Instructions: 12524946 +Minor Page Faults: 0 +Major Page Faults: 0 diff --git a/scripts/dist-exps/ppolicy_plot.R b/scripts/dist-exps/ppolicy_plot.R new file mode 100644 index 0000000..af7a085 --- /dev/null +++ b/scripts/dist-exps/ppolicy_plot.R @@ -0,0 +1,45 @@ +library("optparse") +library("data.table") +library(dplyr) +library(ggplot2) +library(Rmisc) +library(RColorBrewer) +library(ggh4x) + +args=commandArgs(trailingOnly=TRUE) + +data_summary <- function(data, varname, groupnames) { + require(plyr) + summary_func <- function(x, col) { + c(mean = mean(x[[col]], na.rm=TRUE), sd = sd(x[[col]], na.rm=TRUE)) + } + data_sum <- ddply(data, groupnames, .fun=summary_func, varname) + data_sum <- rename(data_sum, c("mean" = varname)) + return (data_sum) +} + +num_hosts <- args[1] +data <- read.csv(args[2], header=T, fileEncoding="UTF-8-BOM") +data <- data %>% filter(data$Hosts == num_hosts) +data <- data_summary(data, varname="Algorithm_Duration", groupnames=c("Policy", "Hosts", "HostID")) +data$Policy <- factor(data$Policy, levels=c("oec", "divija", "cvc")) + +time_plot <- ggplot(data=data, aes(x=factor(Policy), fill=factor(HostID), y=Algorithm_Duration +)) + + geom_bar(mapping=aes(x=factor(Policy), fill=factor(HostID), y=Algorithm_Duration), + position=position_dodge(), stat="identity") + + labs(fill="Number of Hosts", y="Total Computation Time (s)", x="Graph Partitioning Policies")+ + theme(axis.title = element_text(color="black", size=20), + axis.text.y = element_text(color="black", size=20), + axis.text.x = element_text(color="black", size=20, angle=90), + axis.title.y = element_text(margin=margin(t=0, r=0, l=20, b=0), size=20), + axis.title.x = element_text(margin=margin(t=20, r=0, l=0, b=0), size=20)) + + #scale_fill_manual(values = c("oec"="#FF6600", "divija"="66", "cvc"="blue")) + + geom_text(aes(label=sprintf("%1.2f", Algorithm_Duration)), + position=position_dodge(1), vjust=0) + + #theme_minimal() + + geom_errorbar(aes(ymin=Algorithm_Duration + -sd, ymax=Algorithm_Duration + +sd), width=.2, position=position_dodge(.9)) + +ggsave(args[3], height=6, width=6) diff --git a/scripts/dist-exps/sample_perhost_bfs-pull_test.pdf b/scripts/dist-exps/sample_perhost_bfs-pull_test.pdf new file mode 100644 index 0000000..ef376b7 Binary files /dev/null and b/scripts/dist-exps/sample_perhost_bfs-pull_test.pdf differ diff --git a/scripts/dist-exps/sample_totale2e_bfs-pull_test.pdf b/scripts/dist-exps/sample_totale2e_bfs-pull_test.pdf new file mode 100644 index 0000000..f044d00 Binary files /dev/null and b/scripts/dist-exps/sample_totale2e_bfs-pull_test.pdf differ diff --git a/scripts/dist-exps/test_plot.sh b/scripts/dist-exps/test_plot.sh new file mode 100644 index 0000000..811598a --- /dev/null +++ b/scripts/dist-exps/test_plot.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# SPDX-License-Identifier: BSD-2-Clause +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +E2E_OUTPUT_DIR=$1 +PPOLICY_OUTPUT_DIR=$2 + +python total_e2e_csv.py --output $E2E_OUTPUT_DIR +Rscript totale2e_plot.R totale2e_bfs-pull_test.csv sample_totale2e_bfs-pull_test.pdf + +python perhost_partitioning_csv.py --output $PPOLICY_OUTPUT_DIR +Rscript ppolicy_plot.R 4 ppolicy_bfs-pull_test.csv sample_perhost_bfs-pull_test.pdf diff --git a/scripts/dist-exps/total_e2e_csv.py b/scripts/dist-exps/total_e2e_csv.py new file mode 100644 index 0000000..74b5f79 --- /dev/null +++ b/scripts/dist-exps/total_e2e_csv.py @@ -0,0 +1,118 @@ +# SPDX-License-Identifier: BSD-2-Clause +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +import subprocess +import re +import matplotlib.pyplot as plt +import numpy as np +import csv +import argparse +import os + + +def parse(log_path: str, algo: str): + print(f"Export {log_path} on {algo} to csv..") + command = f"cat {log_path}" + + process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + stdout, stderr = process.communicate() + + output = stdout.decode() + + # Parse batch number and duration + ingestion_pattern = re.compile(rf"Benchmark {algo} ingestion results for batch (\d+):.*?Duration: (\d+) nanoseconds", re.DOTALL) + algorithm_pattern = re.compile(rf"Benchmark {algo} algorithm results for batch (\d+):.*?Duration: (\d+) nanoseconds", re.DOTALL) + + ingestion_durations = {} + algorithm_durations = {} + + ingestion_matches = re.findall(ingestion_pattern, output) + algorithm_matches = re.findall(algorithm_pattern, output) + + for batch, duration in ingestion_matches: + ingestion_durations[int(batch)] = int(duration) / 1000000 + + for batch, duration in algorithm_matches: + algorithm_durations[int(batch)] = int(duration) / 1000000 + + # print(">> ingestion_durations:", ingestion_durations) + # print(">> algorithm_durations:", algorithm_durations) + + return ingestion_durations, algorithm_durations + +def save_totale2e_to_csv(results, filepath): + with open(filepath, 'w', newline='') as csvfile: + fieldnames = ['Graph_Type', 'Hosts', 'Ingestion_Duration', 'Algorithm_Duration'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + print("results:", results) + for graph_type in results.keys(): + for hosts in results[graph_type].keys(): + data = results[graph_type][hosts] + ingestion_duration = data['ingestion'][0] if data['ingestion'] != {} else 0 + algorithm_duration = data['algorithm'][0] if data['algorithm'] != {} else 0 + print("ing:", ingestion_duration, " algo:", algorithm_duration) + writer.writerow({ + 'Graph_Type': graph_type, + 'Hosts': hosts, + 'Ingestion_Duration': ingestion_duration, + 'Algorithm_Duration': algorithm_duration + }) + +def main(): + """ + This script expects the following directory structure. + + e2e_[app_name]_[input_graph] + |_ [graph_type1] + | |_ [num_hosts0] + | | |_ total_e2e_[host_id].out + | | + | |_ [num_hosts1] + | |_ total_e2e_[host_id].out + | + |_ [graph_type2] + ... + """ + parser = argparse.ArgumentParser(description='Run and plot benchmark results based on command line flags.') + # parser.add_argument('--ingest', action='store_true', help='Plot ingestion durations.') + parser.add_argument('--output', type=str, help='Output file path to plot.', default=None) + + args = parser.parse_args() + + output_path = args.output + if not output_path: + print("No output file path provided. Use --output.") + return + + # The output_path's last directory name format is [algo]_[input_graph] + fname = os.path.basename(os.path.normpath(output_path)) + fname_split = fname.split('_') + assert fname_split[0] == "e2e", " Output directory's prefix should be 'e2e'" + algo_name = fname_split[1] + input_name = fname_split[2] + + total_e2e_results = {} + for graph in os.listdir(output_path): + total_e2e_results[graph] = {} + for hosts in os.listdir(output_path+"/"+graph): + total_e2e_results[graph][hosts] = {} + + # We have interest on e2e execution time per-host or/and all execution. + # The e2e execution file names are total_e2e.out and host[host number]_e2e.out. + # We do not parse per-batch results. + + total_e2e_fname = "total_e2e.out" + log_path = output_path+"/"+graph+"/"+hosts+"/"+total_e2e_fname + ingestion_durations, algorithm_durations = parse(log_path, algo_name) + total_e2e_results[graph][hosts] = { + 'ingestion': ingestion_durations, + 'algorithm': algorithm_durations + } + + save_totale2e_to_csv(total_e2e_results, f"totale2e_{algo_name}_{input_name}.csv") + + +if __name__ == "__main__": + main() diff --git a/scripts/dist-exps/totale2e_plot.R b/scripts/dist-exps/totale2e_plot.R new file mode 100644 index 0000000..133dccd --- /dev/null +++ b/scripts/dist-exps/totale2e_plot.R @@ -0,0 +1,43 @@ +library("optparse") +library("data.table") +library(dplyr) +library(ggplot2) +library(Rmisc) +library(RColorBrewer) +library(ggh4x) + +args=commandArgs(trailingOnly=TRUE) + +data_summary <- function(data, varname, groupnames) { + require(plyr) + summary_func <- function(x, col) { + c(mean = mean(x[[col]], na.rm=TRUE), sd = sd(x[[col]], na.rm=TRUE)) + } + data_sum <- ddply(data, groupnames, .fun=summary_func, varname) + data_sum <- rename(data_sum, c("mean" = varname)) + return (data_sum) +} + +data <- read.csv(args[1], header=T, fileEncoding="UTF-8-BOM") +data <- data_summary(data, varname="Algorithm_Duration", groupnames=c("Graph_Type", "Hosts")) +data$Graph_Type <- factor(data$Graph_Type, levels=c("adj", "lccsr", "lscsr")) + +time_plot <- ggplot(data=data, aes(x=factor(Hosts), fill=factor(Graph_Type), y=Algorithm_Duration +)) + + geom_bar(mapping=aes(x=factor(Hosts), fill=factor(Graph_Type), y=Algorithm_Duration), + position=position_dodge(), stat="identity") + + labs(fill="Graph Type", y="End-To-End Execution time (s)", x="Number of Hosts")+ + theme(axis.title = element_text(color="black", size=20), + axis.text.y = element_text(color="black", size=20), + axis.text.x = element_text(color="black", size=20), + axis.title.y = element_text(margin=margin(t=0, r=0, l=20, b=0), size=20), + axis.title.x = element_text(margin=margin(t=20, r=0, l=0, b=0), size=20)) + + scale_fill_manual(values = c("adj"="#FF6600", "lccsr"="66", "lscsr"="blue")) + + geom_text(aes(label=sprintf("%1.2f", Algorithm_Duration)), + position=position_dodge(1), vjust=0) + + #theme_minimal() + + geom_errorbar(aes(ymin=Algorithm_Duration + -sd, ymax=Algorithm_Duration + +sd), width=.2, position=position_dodge(.9)) + +ggsave(args[2], height=6, width=10)