Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions include/pando-lib-galois/utility/gptr_monad.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#ifndef PANDO_LIB_GALOIS_UTILITY_GPTR_MONAD_HPP_
#define PANDO_LIB_GALOIS_UTILITY_GPTR_MONAD_HPP_

#include <utility>

/**
* @brief lifts a function with no arguments to work on references
*/
Expand Down Expand Up @@ -70,4 +72,45 @@ auto applyFunc(pando::GlobalRef<T> ref, F func) {
return func(obj);
}

#if 1
#define apply(ref, func, ...) \
__extension__({ \
auto ptrComputed##__LINE__ = &(ref); \
typename std::pointer_traits<decltype(ptrComputed##__LINE__)>::element_type tmp = \
*ptrComputed##__LINE__; \
auto ret = tmp.func(__VA_ARGS__); \
ret; \
})

#elif 0

/* F is a method pointer */
template <typename T, typename F, typename... Args>
auto apply(pando::GlobalRef<T> ref, F func, Args... args) {
T obj = ref;
return (obj.*func)(args...);
}

/* F is a method pointer */
template <typename T, typename F, typename... Args>
auto apply(T& ref, F func, Args... args) {
T obj = ref;
return (obj.*func)(args...);
}
#else

template <typename R, typename... As, typename T>
R apply(pando::GlobalRef<T> ref, R (T::*func)(As...), As... args) {
T obj = ref;
return (obj.*func)(args...);
}

template <typename R, typename... As, typename T>
R apply(T& ref, R (T::*func)(As...), As... args) {
T obj = ref;
return (obj.*func)(args...);
}

#endif

#endif // PANDO_LIB_GALOIS_UTILITY_GPTR_MONAD_HPP_
7 changes: 4 additions & 3 deletions microbench/triangle-counting/include/tc_algos.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

#include "utils.hpp"

template <typename GraphType>
template <typename GraphType, bool binary_search>
void tc_no_chunk(pando::GlobalPtr<GraphType> graph_ptr,
galois::DAccumulator<uint64_t> final_tri_count);

Expand All @@ -15,11 +15,12 @@ void tc_chunk_edges(pando::GlobalPtr<GraphType> graph_ptr,
galois::DAccumulator<uint64_t> final_tri_count);
*/

template <typename GraphType>
template <typename GraphType, bool binary_search>
void tc_chunk_vertices(pando::GlobalPtr<GraphType> graph_ptr,
galois::DAccumulator<uint64_t> final_tri_count);

void HBMainTC(pando::Array<char> filename, int64_t num_vertices, bool load_balanced_graph,
TC_CHUNK tc_chunk, galois::DAccumulator<uint64_t> final_tri_count);
TC_CHUNK tc_chunk, bool binary_search,
galois::DAccumulator<uint64_t> final_tri_count);

#endif // TRIANGLE_COUNTING_INCLUDE_TC_ALGOS_HPP_
21 changes: 12 additions & 9 deletions microbench/triangle-counting/include/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,24 @@ struct CommandLineOptions {
int64_t num_vertices = 0;
bool load_balanced_graph = false;
TC_CHUNK tc_chunk = TC_CHUNK::NO_CHUNK;
bool binary_search = false;

void print() {
printf("******** CommandLineOptions ******** \n");
std::cout << "elFile = " << elFile << '\n';
std::cout << "num_vertices = " << num_vertices << '\n';
std::cout << "load_balanced_graph = " << load_balanced_graph << '\n';
std::cout << "tc_chunk = " << tc_chunk << '\n';
std::cout << "binary_search = " << binary_search << '\n';
printf("******** END CommandLineOptions ******** \n");
}

CommandLineOptions()
: elFile(""), num_vertices(0), load_balanced_graph(false), tc_chunk(TC_CHUNK::NO_CHUNK) {}
: elFile(""),
num_vertices(0),
load_balanced_graph(false),
tc_chunk(TC_CHUNK::NO_CHUNK),
binary_search(false) {}
};

std::unique_ptr<CommandLineOptions> read_cmd_line_args(int argc, char** argv);
Expand All @@ -63,7 +69,7 @@ void printUsage(char* argv0);
// CONNECTION KERNELS
// #####################################################################
template <typename GraphType>
void intersect_dag_merge(galois::WaitGroup::HandleType wgh, pando::GlobalPtr<GraphType> graph_ptr,
void intersect_dag_merge(pando::GlobalPtr<GraphType> graph_ptr,
typename GraphType::VertexTopologyID v0,
typename GraphType::VertexTopologyID v1,
galois::DAccumulator<uint64_t> final_tri_count) {
Expand All @@ -90,12 +96,10 @@ void intersect_dag_merge(galois::WaitGroup::HandleType wgh, pando::GlobalPtr<Gra
count++;
}
final_tri_count.add(count);
wgh.done();
}

template <typename GraphType>
void intersect_dag_merge_double_binary(galois::WaitGroup::HandleType wgh,
pando::GlobalPtr<GraphType> graph_ptr,
void intersect_dag_merge_double_binary(pando::GlobalPtr<GraphType> graph_ptr,
typename GraphType::VertexTopologyID v0,
typename GraphType::VertexTopologyID v1,
galois::DAccumulator<uint64_t> final_tri_count) {
Expand Down Expand Up @@ -144,7 +148,6 @@ void intersect_dag_merge_double_binary(galois::WaitGroup::HandleType wgh,
}
}
final_tri_count.add(count);
wgh.done();
}

// #####################################################################
Expand Down Expand Up @@ -212,17 +215,17 @@ void vertexset_intersection(pando::GlobalPtr<GraphType> graph_ptr,
auto [graph_ptr, v1, final_tri_count, connection_kernel, v1_token] = state;
GraphType g = *graph_ptr;
(void)eh; // Required to prevent -Werror=unused-parameter
return fmap(g, getLocalityVertex, v1);
return apply(g, getLocalityVertex, v1);
};

galois::doAll(
state, graph.edges(v0),
+[](decltype(state) state, typename GraphType::EdgeHandle eh) {
auto [graph_ptr, v1, final_tri_count, connection_kernel, v1_token] = state;
GraphType g = *graph_ptr;
typename GraphType::VertexTopologyID neighbor_of_v0 = fmap(g, getEdgeDst, eh);
typename GraphType::VertexTopologyID neighbor_of_v0 = apply(g, getEdgeDst, eh);
typename GraphType::VertexTokenID neighbor_of_v0_token =
fmap(g, getTokenID, neighbor_of_v0);
apply(g, getTokenID, neighbor_of_v0);

// Because of DAG optimization
if (neighbor_of_v0_token <= v1_token)
Expand Down
2 changes: 1 addition & 1 deletion microbench/triangle-counting/src/tc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ int pandoMain(int argc, char** argv) {
PANDO_CHECK(final_tri_count.initialize());

HBMainTC(filename, opts->num_vertices, opts->load_balanced_graph, opts->tc_chunk,
final_tri_count);
opts->binary_search, final_tri_count);
std::cout << "*** FINAL TRI COUNT = " << final_tri_count.reduce() << "\n";

#if BENCHMARK
Expand Down
92 changes: 60 additions & 32 deletions microbench/triangle-counting/src/tc_algos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Copyright (c) 2023. University of Texas at Austin. All rights reserved.

#include <tc_algos.hpp>
#include <type_traits>

// #####################################################################
// TC UTILS
Expand All @@ -13,33 +14,31 @@
* @param[in] graph_ptr Pointer to the in-memory graph
* @param[in] final_tri_count Thread-safe counter
*/
template <typename Graph>
void edge_tc_counting(pando::GlobalPtr<Graph> graph_ptr, typename Graph::VertexTopologyID v0,
typename Graph::EdgeRange edge_range,
template <typename Graph, bool binary_search>
void edge_tc_counting(galois::WaitGroup::HandleType wgh, pando::GlobalPtr<Graph> graph_ptr,
typename Graph::VertexTopologyID v0, typename Graph::EdgeRange edge_range,
galois::DAccumulator<uint64_t> final_tri_count) {
galois::WaitGroup wg;
PANDO_CHECK(wg.initialize(0));
auto wgh = wg.getHandle();
auto innerState = galois::make_tpl(graph_ptr, v0, wgh, final_tri_count);
Graph graph = *graph_ptr;
galois::doAll(
wgh, innerState, edge_range,
+[](decltype(innerState) innerState, typename Graph::EdgeHandle eh) {
auto [graph_ptr, v0, wgh, final_tri_count] = innerState;
Graph g = *graph_ptr;
typename Graph::VertexTopologyID v1 = fmap(g, getEdgeDst, eh);
wgh.addOne();
intersect_dag_merge<Graph>(wgh, graph_ptr, v0, v1, final_tri_count);
typename Graph::VertexTopologyID v1 = apply(g, getEdgeDst, eh);
if (binary_search)
intersect_dag_merge_double_binary<Graph>(graph_ptr, v0, v1, final_tri_count);
else
intersect_dag_merge<Graph>(graph_ptr, v0, v1, final_tri_count);
},
[&graph](decltype(innerState) innerState, typename Graph::EdgeHandle eh) -> pando::Place {
auto v0 = std::get<1>(innerState);
typename Graph::VertexTopologyID v1 = fmap(graph, getEdgeDst, eh);
bool v0_higher_degree = fmap(graph, getNumEdges, v0) >= fmap(graph, getNumEdges, v1);
pando::Place locality = v0_higher_degree ? fmap(graph, getLocalityVertex, v0)
: fmap(graph, getLocalityVertex, v1);
typename Graph::VertexTopologyID v1 = apply(graph, getEdgeDst, eh);
bool v0_higher_degree = apply(graph, getNumEdges, v0) >= apply(graph, getNumEdges, v1);
pando::Place locality = v0_higher_degree ? apply(graph, getLocalityVertex, v0)
: apply(graph, getLocalityVertex, v1);
return locality;
});
PANDO_CHECK(wg.wait());
}

// #####################################################################
Expand All @@ -51,23 +50,32 @@ void edge_tc_counting(pando::GlobalPtr<Graph> graph_ptr, typename Graph::VertexT
* @param[in] graph_ptr Pointer to the in-memory graph
* @param[in] final_tri_count Thread-safe counter
*/
template <typename GraphType>
template <typename GraphType, bool binary_search>
void tc_no_chunk(pando::GlobalPtr<GraphType> graph_ptr,
galois::DAccumulator<uint64_t> final_tri_count) {
GraphType graph = *graph_ptr;
auto state = galois::make_tpl(graph_ptr, final_tri_count);

galois::WaitGroup wg;
PANDO_CHECK(wg.initialize(0));
auto wgh = wg.getHandle();
auto state = galois::make_tpl(graph_ptr, final_tri_count, wgh);

galois::doAll(
state, graph.vertices(), +[](decltype(state) state, typename GraphType::VertexTopologyID v0) {
auto [graph_ptr, final_tri_count] = state;
wgh, state, graph.vertices(),
+[](decltype(state) state, typename GraphType::VertexTopologyID v0) {
auto [graph_ptr, final_tri_count, wgh] = state;
GraphType graph = *graph_ptr;

// Degree Filtering Optimization
uint64_t v0_degree = graph.getNumEdges(v0);
if (v0_degree < (TC_EMBEDDING_SZ - 1))
return;

edge_tc_counting<GraphType>(graph_ptr, v0, graph.edges(v0), final_tri_count);
edge_tc_counting<GraphType, binary_search>(wgh, graph_ptr, v0, graph.edges(v0),
final_tri_count);
});
PANDO_CHECK(wg.wait());
wg.deinitialize();
}

/**
Expand Down Expand Up @@ -131,8 +139,11 @@ void tc_chunk_edges(pando::GlobalPtr<GraphDL> graph_ptr,
* @param[in] graph_ptr Pointer to the in-memory graph
* @param[in] final_tri_count Thread-safe counter
*/
template <bool binary_search>
void tc_chunk_vertices(pando::GlobalPtr<GraphDL> graph_ptr,
galois::DAccumulator<uint64_t> final_tri_count) {
using LCSR = galois::LCSR<VT, ET>;

GraphDL graph = *graph_ptr;
uint64_t query_sz = 1;
uint64_t iters = 0;
Expand All @@ -159,27 +170,33 @@ void tc_chunk_vertices(pando::GlobalPtr<GraphDL> graph_ptr,
auto lcsr = graph.getLocalCSR();
uint64_t host_vertex_iter_offset = host_vertex_iter_offset_ref;

auto inner_state = galois::make_tpl(graph_ptr, final_tri_count);
galois::WaitGroup wg;
PANDO_CHECK(wg.initialize(0));
auto wgh = wg.getHandle();
auto inner_state = galois::make_tpl(graph_ptr, final_tri_count, wgh);
galois::doAll(
inner_state, fmap(lcsr, vertices, host_vertex_iter_offset, query_sz),
inner_state, apply(lcsr, vertices, host_vertex_iter_offset, query_sz),
+[](decltype(inner_state) inner_state, typename GraphDL::VertexTopologyID v0) {
auto [graph_ptr, final_tri_count] = inner_state;
auto [graph_ptr, final_tri_count, wgh] = inner_state;
GraphDL graph = *graph_ptr;

// Degree Filtering Optimization
uint64_t v0_degree = graph.getNumEdges(v0);
if (v0_degree < (TC_EMBEDDING_SZ - 1))
return;

edge_tc_counting<GraphDL>(graph_ptr, v0, graph.edges(v0), final_tri_count);
edge_tc_counting<GraphDL, binary_search>(wgh, graph_ptr, v0, graph.edges(v0),
final_tri_count);
});
PANDO_CHECK(wg.wait());

// Move iter offset
uint64_t lcsr_num_vertices = fmap(lcsr, size);
uint64_t lcsr_num_vertices = apply(lcsr, size);
host_vertex_iter_offset += query_sz;
if (host_vertex_iter_offset < lcsr_num_vertices)
work_remaining.increment();
host_vertex_iter_offset_ref = host_vertex_iter_offset;
wg.deinitialize();
});

uint64_t current_count = final_tri_count.reduce();
Expand All @@ -198,7 +215,8 @@ void tc_chunk_vertices(pando::GlobalPtr<GraphDL> graph_ptr,
// TC GRAPH HBMAINS
// #####################################################################
void HBGraphDL(pando::Place thisPlace, pando::Array<char> filename, int64_t num_vertices,
TC_CHUNK tc_chunk, galois::DAccumulator<uint64_t> final_tri_count) {
TC_CHUNK tc_chunk, bool binary_search,
galois::DAccumulator<uint64_t> final_tri_count) {
#if BENCHMARK
auto time_graph_import_st = std::chrono::high_resolution_clock().now();
#endif
Expand Down Expand Up @@ -227,15 +245,21 @@ void HBGraphDL(pando::Place thisPlace, pando::Array<char> filename, int64_t num_

switch (tc_chunk) {
case TC_CHUNK::CHUNK_VERTICES:
tc_chunk_vertices(graph_ptr, final_tri_count);
if (binary_search)
tc_chunk_vertices<true>(graph_ptr, final_tri_count);
else
tc_chunk_vertices<false>(graph_ptr, final_tri_count);
break;
/**
case TC_CHUNK::CHUNK_EDGES:
tc_chunk_edges(graph_ptr, final_tri_count);
break;
*/
default:
tc_no_chunk<GraphDL>(graph_ptr, final_tri_count);
if (binary_search)
tc_no_chunk<GraphDL, true>(graph_ptr, final_tri_count);
else
tc_no_chunk<GraphDL, false>(graph_ptr, final_tri_count);
break;
}

Expand All @@ -253,7 +277,7 @@ void HBGraphDL(pando::Place thisPlace, pando::Array<char> filename, int64_t num_
}

void HBGraphDA(pando::Place thisPlace, pando::Array<char> filename, int64_t num_vertices,
galois::DAccumulator<uint64_t> final_tri_count) {
bool binary_search, galois::DAccumulator<uint64_t> final_tri_count) {
#if BENCHMARK
auto time_graph_import_st = std::chrono::high_resolution_clock().now();
#endif
Expand All @@ -280,7 +304,10 @@ void HBGraphDA(pando::Place thisPlace, pando::Array<char> filename, int64_t num_
auto time_tc_algo_st = std::chrono::high_resolution_clock().now();
#endif
PANDO_MEM_STAT_NEW_KERNEL("TC_DFS_Algo Start");
tc_no_chunk<GraphDA>(graph_ptr, final_tri_count);
if (binary_search)
tc_no_chunk<GraphDA, true>(graph_ptr, final_tri_count);
else
tc_no_chunk<GraphDA, false>(graph_ptr, final_tri_count);
#if BENCHMARK
auto time_tc_algo_end = std::chrono::high_resolution_clock().now();
if (thisPlace.node.id == COORDINATOR_ID)
Expand All @@ -295,11 +322,12 @@ void HBGraphDA(pando::Place thisPlace, pando::Array<char> filename, int64_t num_
}

void HBMainTC(pando::Array<char> filename, int64_t num_vertices, bool load_balanced_graph,
TC_CHUNK tc_chunk, galois::DAccumulator<uint64_t> final_tri_count) {
TC_CHUNK tc_chunk, bool binary_search,
galois::DAccumulator<uint64_t> final_tri_count) {
auto thisPlace = pando::getCurrentPlace();

if (load_balanced_graph)
HBGraphDL(thisPlace, filename, num_vertices, tc_chunk, final_tri_count);
HBGraphDL(thisPlace, filename, num_vertices, tc_chunk, binary_search, final_tri_count);
else
HBGraphDA(thisPlace, filename, num_vertices, final_tri_count);
HBGraphDA(thisPlace, filename, num_vertices, binary_search, final_tri_count);
}
Loading