diff --git a/CMakeLists.txt b/CMakeLists.txt index 38d34684..95e90896 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,8 +107,7 @@ add_library(GraphZeppelinVerifyCC src/cc_alg_configuration.cpp src/sketch.cpp src/util.cpp - test/util/file_graph_verifier.cpp - test/util/mat_graph_verifier.cpp) + test/util/graph_verifier.cpp) add_dependencies(GraphZeppelinVerifyCC GutterTree StreamingUtilities) target_link_libraries(GraphZeppelinVerifyCC PUBLIC xxhash GutterTree StreamingUtilities) target_include_directories(GraphZeppelinVerifyCC PUBLIC include/ include/test/) @@ -123,7 +122,6 @@ if (BUILD_EXE) test/sketch_test.cpp test/dsu_test.cpp test/util_test.cpp - test/util/file_graph_verifier.cpp test/util/graph_verifier_test.cpp) add_dependencies(tests GraphZeppelinVerifyCC) target_link_libraries(tests PRIVATE GraphZeppelinVerifyCC) diff --git a/README.md b/README.md index 625f5464..121a9a80 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ int main() { DriverConfiguration() // configuration }; driver.process_stream_until(END_OF_STREAM); // Tell the driver to process the entire graph stream - driver.prep_query(); // Ensure that all updates have been processed + driver.prep_query(CONNECTIVITY); // Ensure algorithm is ready for a connectivity query auto CC = cc_alg.connected_components(); // Extract the connected components } ``` diff --git a/include/cc_sketch_alg.h b/include/cc_sketch_alg.h index b47795a4..9e9d3f8c 100644 --- a/include/cc_sketch_alg.h +++ b/include/cc_sketch_alg.h @@ -56,6 +56,12 @@ struct alignas(64) GlobalMergeData { } }; +// What type of query is the user going to perform. Used for has_cached_query() +enum QueryCode { + CONNECTIVITY, // connected components and spanning forest of graph + KSPANNINGFORESTS, // k disjoint spanning forests +}; + /** * Algorithm for computing connected components on undirected graph streams * (no self-edges or multi-edges) @@ -85,6 +91,11 @@ class CCSketchAlg { Sketch **delta_sketches = nullptr; size_t num_delta_sketches; + CCAlgConfiguration config; +#ifdef VERIFY_SAMPLES_F + std::unique_ptr verifier; +#endif + /** * Run the first round of Boruvka. We can do things faster here because we know there will * be no merging we have to do. @@ -92,13 +103,13 @@ class CCSketchAlg { bool run_round_zero(); /** - * Update the query array with new samples - * @param query an array of sketch sample results - * @param reps an array containing node indices for the representative of each supernode + * Sample a single supernode represented by a single sketch containing one or more vertices. + * Updates the dsu and spanning forest with query results if edge contains new connectivity info. + * @param skt sketch to sample + * @return [bool] true if the query result indicates we should run an additional round. */ bool sample_supernode(Sketch &skt); - /** * Calculate the instructions for what vertices to merge to form each component */ @@ -117,10 +128,6 @@ class CCSketchAlg { */ void boruvka_emulation(); - FRIEND_TEST(GraphTestSuite, TestCorrectnessOfReheating); - - CCAlgConfiguration config; - // constructor for use when reading from a serialized file CCSketchAlg(node_id_t num_vertices, size_t seed, std::ifstream &binary_stream, CCAlgConfiguration config); @@ -174,7 +181,13 @@ class CCSketchAlg { * Return if we have cached an answer to query. * This allows the driver to avoid flushing the gutters before calling query functions. */ - bool has_cached_query() { return shared_dsu_valid; } + bool has_cached_query(int query_code) { + QueryCode code = (QueryCode) query_code; + if (code == CONNECTIVITY) + return shared_dsu_valid; + else + return false; + } /** * Print the configuration of the connected components graph sketching. @@ -201,7 +214,7 @@ class CCSketchAlg { /** * Main parallel query algorithm utilizing Boruvka and L_0 sampling. - * @return a vector of the connected components in the graph. + * @return the connected components in the graph. */ ConnectedComponents connected_components(); @@ -217,12 +230,11 @@ class CCSketchAlg { * Return a spanning forest of the graph utilizing Boruvka and L_0 sampling * IMPORTANT: The updates to this algorithm MUST NOT be a function of the output of this query * that is, unless you really know what you're doing. - * @return an adjacency list representation of the spanning forest of the graph + * @return the spanning forest of the graph */ SpanningForest calc_spanning_forest(); #ifdef VERIFY_SAMPLES_F - std::unique_ptr verifier; void set_verifier(std::unique_ptr verifier) { this->verifier = std::move(verifier); } diff --git a/include/graph_sketch_driver.h b/include/graph_sketch_driver.h index 38f77eba..e627e443 100644 --- a/include/graph_sketch_driver.h +++ b/include/graph_sketch_driver.h @@ -1,4 +1,4 @@ - +#pragma once #include #include #include @@ -6,41 +6,66 @@ #include "driver_configuration.h" #include "graph_stream.h" #include "worker_thread_group.h" +#ifdef VERIFY_SAMPLES_F +#include "graph_verifier.h" +#endif + +class DriverException : public std::exception { + private: + std::string err_msg; + public: + DriverException(std::string msg) : err_msg(msg) {} + virtual const char* what() const throw() { + return err_msg.c_str(); + } +}; /** * GraphSketchDriver class: * Driver for sketching algorithms on a single machine. * Templatized by the "top level" sketching algorithm to manage. * - * Algorithms need to implement the following functions to be managed by the driver + * Algorithms need to implement the following functions to be managed by the driver: * * 1) void allocate_worker_memory(size_t num_workers) * For performance reasons it is often helpful for the algorithm to allocate some scratch - * space to be used by an individual worker threads. For example, in the connected - * components algorithm, we allocate a delta sketch for each worker. + * space to be used by individual worker threads. This scratch memory is managed by the + * algorithm. For example, in the connected components algorithm, we allocate a delta + * sketch for each worker. * * 2) size_t get_desired_updates_per_batch() * Return the number of updates the algorithm would like us to batch. This serves as the - * maximum number of updates in a batch. We only provide smaller batches if force_flush'd + * maximum number of updates in a batch. We only provide smaller batches during + * prep_query() * * 3) node_id_t get_num_vertices() * Returns the number of vertices in the Graph or an appropriate upper bound. * * 4) void pre_insert(GraphUpdate upd, node_id_t thr_id) * Called before each update is added to the guttering system for the purpose of eager - * query heuristics. This function must be fast executing. + * query heuristics. This function must be thread-safe and fast executing. The algorithm + * may choose to make this function a no-op. * * 5) void apply_update_batch(size_t thr_id, node_id_t src_vertex, const std::vector * &dst_vertices) - * Called by worker threads to apply a batch of updates destined for a single vertex. + * Called by worker threads to apply a batch of updates destined for a single vertex. This + * function must be thread-safe. * - * 6) bool has_cached_query() - * Check if the algorithm already has a cached answer for its query type. If so, the driver - * can skip flushing the updates and applying them in prep_query(). + * 6) bool has_cached_query(int query_type) + * Check if the algorithm already has a cached answer for a given query type. If so, the + * driver can skip flushing the updates and applying them in prep_query(). The query_type + * should be defined by the algorithm as an enum (see cc_sketch_alg.h) but is typed in this + * code as an integer to ensure compatability across algorithms. * * 7) void print_configuration() * Print the configuration of the algorithm. The algorithm may choose to print the * configurations of subalgorithms as well. + * + * 8) void set_verifier(std::unique_ptr verifier); + * If VERIFIER_SAMPLES_F is defined, then the driver provides the algorithm with a + * verifier. The verifier encodes the graph state at the time of a query losslessly + * and should be used by the algorithm to check its query answer. This is only used for + * correctness testing, not for production code. */ template class GraphSketchDriver { @@ -48,6 +73,10 @@ class GraphSketchDriver { GutteringSystem *gts; Alg *sketching_alg; GraphStream *stream; +#ifdef VERIFY_SAMPLES_F + GraphVerifier *verifier; + std::mutex verifier_mtx; +#endif WorkerThreadGroup *worker_threads; @@ -55,7 +84,6 @@ class GraphSketchDriver { static constexpr size_t update_array_size = 4000; std::atomic total_updates; - FRIEND_TEST(GraphTest, TestSupernodeRestoreAfterCCFailure); public: GraphSketchDriver(Alg *sketching_alg, GraphStream *stream, DriverConfiguration config, size_t num_stream_threads = 1) @@ -83,10 +111,14 @@ class GraphSketchDriver { sketching_alg->print_configuration(); if (num_stream_threads > 1 && !stream->get_update_is_thread_safe()) { - std::cerr << "WARNING: stream get_update is not thread safe. Setting num inserters to 1" - << std::endl; + std::cerr + << "WARNING: stream get_update is not thread safe. Setting number of stream threads to 1" + << std::endl; num_stream_threads = 1; } +#ifdef VERIFY_SAMPLES_F + verifier = new GraphVerifier(sketching_alg->get_num_vertices()); +#endif total_updates = 0; std::cout << std::endl; @@ -95,17 +127,29 @@ class GraphSketchDriver { ~GraphSketchDriver() { delete worker_threads; delete gts; +#ifdef VERIFY_SAMPLES_F + delete verifier; +#endif } + /** + * Processes the stream until a given edge index, at which point the function returns + * @param break_edge_idx the breakpoint edge index. All updates up to but not including this + * index are processed by this call. + * @throws DriverException if we cannot set the requested breakpoint. + */ void process_stream_until(edge_id_t break_edge_idx) { if (!stream->set_break_point(break_edge_idx)) { - std::cerr << "ERROR: COULD NOT CORRECTLY SET BREAKPOINT!" << std::endl; + DriverException("Could not correctly set breakpoint: " + std::to_string(break_edge_idx)); exit(EXIT_FAILURE); } worker_threads->resume_workers(); auto task = [&](int thr_id) { GraphStreamUpdate update_array[update_array_size]; +#ifdef VERIFY_SAMPLES_F + GraphVerifier local_verifier(sketching_alg->get_num_vertices()); +#endif while (true) { size_t updates = stream->get_update_buffer(update_array, update_array_size); @@ -114,6 +158,11 @@ class GraphSketchDriver { upd.edge = update_array[i].edge; upd.type = static_cast(update_array[i].type); if (upd.type == BREAKPOINT) { + // reached the breakpoint. Update verifier if applicable and return +#ifdef VERIFY_SAMPLES_F + std::lock_guard lk(verifier_mtx); + verifier->combine(local_verifier); +#endif return; } else { @@ -121,6 +170,9 @@ class GraphSketchDriver { Edge edge = upd.edge; gts->insert({edge.src, edge.dst}, thr_id); gts->insert({edge.dst, edge.src}, thr_id); +#ifdef VERIFY_SAMPLES_F + local_verifier.edge_update(edge); +#endif } } } @@ -131,10 +183,15 @@ class GraphSketchDriver { // wait for threads to finish for (size_t i = 0; i < num_stream_threads; i++) threads[i].join(); + + // pass the verifier to the algorithm +#ifdef VERIFY_SAMPLES_F + sketching_alg->set_verifier(std::make_unique(*verifier)); +#endif } - void prep_query() { - if (sketching_alg->has_cached_query()) { + void prep_query(int query_code) { + if (sketching_alg->has_cached_query(query_code)) { flush_start = flush_end = std::chrono::steady_clock::now(); return; } @@ -151,6 +208,19 @@ class GraphSketchDriver { sketching_alg->apply_update_batch(thr_id, src_vertex, dst_vertices); } +#ifdef VERIFY_SAMPLES_F + /** + * checks that the verifier we constructed in process_stream_until matches another verifier + * @param expected the ground truth verifier + * @throws DriverException if the verifiers do not match + */ + void check_verifier(const GraphVerifier &expected) { + if (*verifier != expected) { + throw DriverException("Mismatch between driver verifier and expected verifier"); + } + } +#endif + size_t get_total_updates() { return total_updates.load(); } // time hooks for experiments diff --git a/include/return_types.h b/include/return_types.h index d7967c2e..b329bfe6 100644 --- a/include/return_types.h +++ b/include/return_types.h @@ -1,4 +1,5 @@ // This file defines the query return types from the cc algorithm class +#pragma once #include #include #include @@ -20,8 +21,8 @@ class ConnectedComponents { ~ConnectedComponents(); std::vector> get_component_sets(); - bool is_connected(node_id_t a, node_id_t b) { return parent_arr[a] == parent_arr[b]; } - node_id_t size() { return num_cc; } + bool is_connected(node_id_t a, node_id_t b) const { return parent_arr[a] == parent_arr[b]; } + node_id_t size() const { return num_cc; } }; // This class defines a spanning forest of a graph @@ -32,5 +33,5 @@ class SpanningForest { public: SpanningForest(node_id_t num_vertices, const std::unordered_set *spanning_forest); - const std::vector& get_edges() { return edges; } + const std::vector& get_edges() const { return edges; } }; diff --git a/include/sketch.h b/include/sketch.h index 1b50ed30..80473ecc 100644 --- a/include/sketch.h +++ b/include/sketch.h @@ -72,7 +72,7 @@ class Sketch { * @return The number of samples */ static size_t calc_cc_samples(node_id_t num_vertices, double f) { - return ceil(f * log2(num_vertices) / num_samples_div); + return std::max(size_t(18), (size_t) ceil(f * log2(num_vertices) / num_samples_div)); } /** @@ -191,8 +191,14 @@ class Sketch { }; class OutOfSamplesException : public std::exception { + private: + std::string err_msg; public: + OutOfSamplesException(size_t seed, size_t num_samples, size_t sample_idx) + : err_msg("This sketch (seed=" + std::to_string(seed) + + ", max samples=" + std::to_string(num_samples) + + ") cannot be sampled more times (cur idx=" + std::to_string(sample_idx) + ")!") {} virtual const char* what() const throw() { - return "This sketch cannot be sampled more times!"; + return err_msg.c_str(); } }; diff --git a/include/test/file_graph_verifier.h b/include/test/file_graph_verifier.h deleted file mode 100644 index 733548dc..00000000 --- a/include/test/file_graph_verifier.h +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once -#include "graph_verifier.h" - -#include - -#include "dsu.h" - -/** - * A plugin for the Graph class that runs Boruvka alongside the graph algorithm - * and verifies the edges and connected components that the graph algorithm - * generates. Takes a reference graph from a file. - */ -class FileGraphVerifier : public GraphVerifier { - std::vector> kruskal_ref; - -public: - FileGraphVerifier(node_id_t n, const std::string& input_file); - - void verify_edge(Edge edge); - void verify_soln(std::vector>& retval); - - /** - * Runs Kruskal's (deterministic) CC algo. - * @param input_file the file to read input from. - * @return an array of connected components. - */ - static std::vector> kruskal(const std::string& input_file = "cumul_sample.txt"); -}; diff --git a/include/test/graph_verifier.h b/include/test/graph_verifier.h index 7142f981..b4e445b7 100644 --- a/include/test/graph_verifier.h +++ b/include/test/graph_verifier.h @@ -3,6 +3,7 @@ #include #include "types.h" +#include "return_types.h" /** * A plugin for the Graph class that runs Boruvka alongside the graph algorithm @@ -10,39 +11,98 @@ * generates. */ class GraphVerifier { -protected: +private: + const node_id_t num_vertices; std::vector> adj_matrix; + DisjointSetUnion kruskal_dsu; + node_id_t kruskal_ccs; + bool need_query_compute = true; + /** + * Runs Kruskal's (deterministic) CC algo to compute the kruskal dsu. + */ + void kruskal(); public: + /** + * Empty Graph Verifier constructor + */ + GraphVerifier(node_id_t vertices); + + /** + * Construct GraphVerifier from a cumulative stream file + */ + GraphVerifier(node_id_t num_vertices, const std::string &cumul_file_name); + + /** + * Copy a GraphVerifier + */ + GraphVerifier(const GraphVerifier &oth_verifier) + : num_vertices(oth_verifier.num_vertices), + adj_matrix(oth_verifier.adj_matrix), + kruskal_dsu(oth_verifier.kruskal_dsu) {}; + + /** + * Flip an edge in the adjacency list. + * @param edge the edge to flip + */ + void edge_update(Edge edge); + /** * Verifies an edge exists in the graph. * @param edge the edge to be tested. * @throws BadEdgeException if the edge does not exist in the graph. */ - virtual void verify_edge(Edge edge) = 0; + void verify_edge(Edge edge); /** * Verifies the connected components solution is correct. Compares * retval against kruskal_ref. + * @throws IncorrectCCException if the solution cannot be verified */ - virtual void verify_soln(std::vector> &retval) = 0; + void verify_connected_components(const ConnectedComponents &cc); - std::vector> extract_adj_matrix() { return adj_matrix; } + /** + * Verifies that one or more spanning forests are valid + * Additionally, enforces that spanning forests must be edge disjoint. + * @param SFs the spanning forests to check + * @throws IncorrectForestException if a bad spanning forest is found + */ + void verify_spanning_forests(std::vector SFs); - GraphVerifier() = default; - GraphVerifier(std::vector> _adj) : adj_matrix(std::move(_adj)) {}; + /* + * Merge two GraphVerifiers that have seen two different streams. + * Yields a GraphVerifier that has seen both streams. + * @param oth a GraphVerifier to combine into this one. + */ + void combine(const GraphVerifier &oth); + + std::vector> extract_adj_matrix() { return adj_matrix; } + node_id_t get_num_kruskal_ccs() { return kruskal_ccs; } - virtual ~GraphVerifier() {}; + bool operator==(const GraphVerifier &oth) { return adj_matrix == oth.adj_matrix; } + bool operator!=(const GraphVerifier &oth) { return !(*this == oth); } }; class BadEdgeException : public std::exception { - virtual const char* what() const throw() { - return "The edge is not in the cut of the sample!"; - } + private: + std::string err_msg; + public: + BadEdgeException(std::string err) : err_msg(err) {}; + virtual const char* what() const throw() { return err_msg.c_str(); } }; class IncorrectCCException : public std::exception { - virtual const char* what() const throw() { - return "The connected components are incorrect!"; - } + private: + std::string err_msg; + public: + IncorrectCCException(std::string err) : err_msg(err) {}; + virtual const char* what() const throw() { return err_msg.c_str(); } +}; + +class IncorrectForestException : public std::exception { + private: + std::string err_msg; + public: + IncorrectForestException(std::string err) : err_msg(err) {}; + virtual const char* what() const throw() { return err_msg.c_str(); } }; diff --git a/include/test/mat_graph_verifier.h b/include/test/mat_graph_verifier.h deleted file mode 100644 index 1b028f2b..00000000 --- a/include/test/mat_graph_verifier.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once -#include "graph_verifier.h" - -#include - -#include "dsu.h" - -/** - * A plugin for the Graph class that runs Boruvka alongside the graph algorithm - * and verifies the edges and connected components that the graph algorithm - * generates. Takes a reference graph from a packed in-memory adjacency matrix. - */ -class MatGraphVerifier : public GraphVerifier { - std::vector> kruskal_ref; - node_id_t n; - - /** - * Runs Kruskal's (deterministic) CC algo. - * @param input_file the file to read input from. - * @return an array of connected components. - */ - std::vector> kruskal(); -public: - MatGraphVerifier(node_id_t n); - - // When we want to build a MatGraphVerifier without iterative edge_updates - MatGraphVerifier(node_id_t n, std::vector> _adj) - : GraphVerifier(_adj), n(n) { reset_cc_state(); }; - - void reset_cc_state(); // run this function before using as a verifier in CC - void edge_update(node_id_t src, node_id_t dst); - - void verify_edge(Edge edge); - void verify_soln(std::vector> &retval); -}; diff --git a/src/cc_sketch_alg.cpp b/src/cc_sketch_alg.cpp index ace96bce..a1e688db 100644 --- a/src/cc_sketch_alg.cpp +++ b/src/cc_sketch_alg.cpp @@ -274,9 +274,6 @@ bool CCSketchAlg::perform_boruvka_round(const size_t cur_round, bool local_except = false; std::exception_ptr local_err; - // node_id_t left_root = merge_instr[start].root; - // node_id_t right_root = merge_instr[end - 1].root; - bool root_from_left = false; if (start > 0) { root_from_left = merge_instr[start - 1].root == merge_instr[start].root; @@ -553,8 +550,7 @@ ConnectedComponents CCSketchAlg::connected_components() { ConnectedComponents cc(num_vertices, dsu); #ifdef VERIFY_SAMPLES_F - auto cc_sets = cc.get_component_sets(); - verifier->verify_soln(cc_sets); + verifier->verify_connected_components(cc); #endif cc_alg_end = std::chrono::steady_clock::now(); return cc; @@ -564,7 +560,11 @@ SpanningForest CCSketchAlg::calc_spanning_forest() { // TODO: Could probably optimize this a bit by writing new code connected_components(); - return SpanningForest(num_vertices, spanning_forest); + SpanningForest ret(num_vertices, spanning_forest); +#ifdef VERIFY_SAMPLES_F + verifier->verify_spanning_forests(std::vector{ret}); +#endif + return ret; } bool CCSketchAlg::point_query(node_id_t a, node_id_t b) { @@ -584,7 +584,6 @@ bool CCSketchAlg::point_query(node_id_t a, node_id_t b) { else { bool except = false; std::exception_ptr err; - bool ret; try { boruvka_emulation(); } catch (...) { @@ -604,8 +603,7 @@ bool CCSketchAlg::point_query(node_id_t a, node_id_t b) { #ifdef VERIFY_SAMPLES_F ConnectedComponents cc(num_vertices, dsu); - auto cc_sets = cc.get_component_sets(); - verifier->verify_soln(cc_sets); + verifier->verify_connected_components(cc); #endif bool retval = (dsu.find_root(a) == dsu.find_root(b)); diff --git a/src/sketch.cpp b/src/sketch.cpp index 0c687fa1..ac674c5e 100644 --- a/src/sketch.cpp +++ b/src/sketch.cpp @@ -93,7 +93,7 @@ void Sketch::zero_contents() { SketchSample Sketch::sample() { if (sample_idx >= num_samples) { - throw OutOfSamplesException(); + throw OutOfSamplesException(seed, num_samples, sample_idx); } size_t idx = sample_idx++; @@ -117,7 +117,7 @@ SketchSample Sketch::sample() { ExhaustiveSketchSample Sketch::exhaustive_sample() { if (sample_idx >= num_samples) { - throw OutOfSamplesException(); + throw OutOfSamplesException(seed, num_samples, sample_idx); } std::unordered_set ret; diff --git a/test/cc_alg_test.cpp b/test/cc_alg_test.cpp index a92cd406..5f395b01 100644 --- a/test/cc_alg_test.cpp +++ b/test/cc_alg_test.cpp @@ -7,13 +7,14 @@ #include #include "cc_sketch_alg.h" -#include "file_graph_verifier.h" #include "graph_sketch_driver.h" -#include "mat_graph_verifier.h" +#include "graph_verifier.h" static size_t get_seed() { auto now = std::chrono::high_resolution_clock::now(); - return std::chrono::duration_cast(now.time_since_epoch()).count(); + size_t s = std::chrono::duration_cast(now.time_since_epoch()).count(); + std::cout << "Seed = " << s << std::endl; + return s; } // helper function to generate a dynamic binary stream and its cumulative insert only stream @@ -31,16 +32,6 @@ void generate_stream(size_t seed, node_id_t num_vertices, double density, double dy_stream.write_cumulative_file(cumul_name); } -/** - * For many of these tests (especially for those upon very sparse and small graphs) - * we allow for a certain number of failures per test. - * This is because the responsibility of these tests is to quickly alert us - * to “this code is very wrong” whereas the statistical testing is responsible - * for a more fine grained analysis. - * In this context a false positive is much worse than a false negative. - * With 2 failures allowed per test our entire testing suite should fail 1/5000 runs. - */ - // We create this class and instantiate a paramaterized test suite so that we // can run these tests both with the GutterTree and with StandAloneGutters class CCAlgTest : public testing::TestWithParam {}; @@ -56,12 +47,12 @@ TEST_P(CCAlgTest, SmallGraphConnectivity) { node_id_t num_nodes = stream.vertices(); CCSketchAlg cc_alg{num_nodes, get_seed()}; - cc_alg.set_verifier( - std::make_unique(1024, curr_dir + "/res/multiples_graph_1024.txt")); GraphSketchDriver driver(&cc_alg, &stream, driver_config); driver.process_stream_until(END_OF_STREAM); - driver.prep_query(); + driver.prep_query(CONNECTIVITY); + driver.check_verifier(GraphVerifier(1024, curr_dir + "/res/multiples_graph_1024.txt")); + ASSERT_EQ(78, cc_alg.connected_components().size()); } @@ -74,11 +65,11 @@ TEST_P(CCAlgTest, TestCorrectnessOnSmallRandomGraphs) { node_id_t num_nodes = stream.vertices(); CCSketchAlg cc_alg{num_nodes, get_seed()}; - cc_alg.set_verifier(std::make_unique(1024, "./cumul_sample.txt")); GraphSketchDriver driver(&cc_alg, &stream, driver_config); driver.process_stream_until(END_OF_STREAM); - driver.prep_query(); + driver.prep_query(CONNECTIVITY); + driver.check_verifier(GraphVerifier(1024, "./cumul_sample.txt")); cc_alg.connected_components(); } @@ -93,11 +84,11 @@ TEST_P(CCAlgTest, TestCorrectnessOnSmallSparseGraphs) { node_id_t num_nodes = stream.vertices(); CCSketchAlg cc_alg{num_nodes, get_seed()}; - cc_alg.set_verifier(std::make_unique(1024, "./cumul_sample.txt")); GraphSketchDriver driver(&cc_alg, &stream, driver_config); driver.process_stream_until(END_OF_STREAM); - driver.prep_query(); + driver.prep_query(CONNECTIVITY); + driver.check_verifier(GraphVerifier(1024, "./cumul_sample.txt")); cc_alg.connected_components(); } @@ -113,11 +104,11 @@ TEST_P(CCAlgTest, TestCorrectnessOfReheating) { node_id_t num_nodes = stream.vertices(); CCSketchAlg cc_alg{num_nodes, get_seed()}; - cc_alg.set_verifier(std::make_unique(1024, "./cumul_sample.txt")); GraphSketchDriver driver(&cc_alg, &stream, driver_config); driver.process_stream_until(END_OF_STREAM); - driver.prep_query(); + driver.prep_query(CONNECTIVITY); + driver.check_verifier(GraphVerifier(1024, "./cumul_sample.txt")); cc_alg.write_binary("./out_temp.txt"); std::vector> orig_cc; @@ -125,7 +116,7 @@ TEST_P(CCAlgTest, TestCorrectnessOfReheating) { printf("number of CC = %lu\n", orig_cc.size()); CCSketchAlg *reheat_alg = CCSketchAlg::construct_from_serialized_data("./out_temp.txt"); - reheat_alg->set_verifier(std::make_unique(1024, "./cumul_sample.txt")); + reheat_alg->set_verifier(std::make_unique(1024, "./cumul_sample.txt")); auto reheat_cc = reheat_alg->connected_components().get_component_sets(); printf("number of reheated CC = %lu\n", reheat_cc.size()); ASSERT_EQ(orig_cc.size(), reheat_cc.size()); @@ -136,20 +127,21 @@ TEST_P(CCAlgTest, TestCorrectnessOfReheating) { // Test the multithreaded system by using multiple worker threads TEST_P(CCAlgTest, MultipleWorkers) { auto driver_config = DriverConfiguration().gutter_sys(GetParam()).worker_threads(8); - int num_trials = 5; + int num_trials = 2; while (num_trials--) { size_t seed = get_seed(); - generate_stream(seed, 1024, 0.002, 0.5, 0.5, 3, "sample.txt", "cumul_sample.txt"); + generate_stream(seed, 1024, 0.002, 0.5, 0.2, 3, "sample.txt", "cumul_sample.txt"); AsciiFileStream stream{"./sample.txt"}; node_id_t num_nodes = stream.vertices(); seed = get_seed(); CCSketchAlg cc_alg{num_nodes, seed}; - cc_alg.set_verifier(std::make_unique(1024, "./cumul_sample.txt")); GraphSketchDriver driver(&cc_alg, &stream, driver_config); driver.process_stream_until(END_OF_STREAM); - driver.prep_query(); + driver.prep_query(CONNECTIVITY); + driver.check_verifier(GraphVerifier(1024, "./cumul_sample.txt")); + cc_alg.connected_components(); } } @@ -163,12 +155,11 @@ TEST_P(CCAlgTest, TestPointQuery) { node_id_t num_nodes = stream.vertices(); CCSketchAlg cc_alg{num_nodes, get_seed()}; - cc_alg.set_verifier( - std::make_unique(1024, curr_dir + "/res/multiples_graph_1024.txt")); GraphSketchDriver driver(&cc_alg, &stream, driver_config); driver.process_stream_until(END_OF_STREAM); - driver.prep_query(); + driver.prep_query(CONNECTIVITY); + driver.check_verifier(GraphVerifier(1024, curr_dir + "/res/multiples_graph_1024.txt")); std::vector> ret = cc_alg.connected_components().get_component_sets(); std::vector ccid(num_nodes); @@ -179,8 +170,6 @@ TEST_P(CCAlgTest, TestPointQuery) { } for (node_id_t i = 0; i < std::min(10u, num_nodes); ++i) { for (node_id_t j = 0; j < std::min(10u, num_nodes); ++j) { - cc_alg.set_verifier( - std::make_unique(1024, curr_dir + "/res/multiples_graph_1024.txt")); ASSERT_EQ(cc_alg.point_query(i, j), ccid[i] == ccid[j]); } } @@ -198,7 +187,7 @@ TEST(CCAlgTest, TestQueryDuringStream) { CCSketchAlg cc_alg{num_nodes, get_seed(), cc_config}; GraphSketchDriver driver(&cc_alg, &stream, driver_config); - MatGraphVerifier verify(num_nodes); + GraphVerifier verify(num_nodes); int type; node_id_t a, b; @@ -209,76 +198,105 @@ TEST(CCAlgTest, TestQueryDuringStream) { for (int j = 0; j < 9; j++) { for (edge_id_t i = 0; i < tenth; i++) { in >> type >> a >> b; - verify.edge_update(a, b); + verify.edge_update({a, b}); } - verify.reset_cc_state(); driver.process_stream_until(tenth * (j + 1)); - driver.prep_query(); - cc_alg.set_verifier(std::make_unique(verify)); + driver.prep_query(CONNECTIVITY); + driver.check_verifier(verify); + cc_alg.connected_components(); } num_edges -= 9 * tenth; while (num_edges--) { in >> type >> a >> b; - verify.edge_update(a, b); + verify.edge_update({a, b}); } - verify.reset_cc_state(); driver.process_stream_until(END_OF_STREAM); - driver.prep_query(); - cc_alg.set_verifier(std::make_unique(verify)); + driver.prep_query(CONNECTIVITY); + driver.check_verifier(verify); + cc_alg.connected_components(); } TEST(CCAlgTest, EagerDSUTest) { node_id_t num_nodes = 100; CCSketchAlg cc_alg{num_nodes, get_seed()}; - MatGraphVerifier verify(num_nodes); + GraphVerifier verify(num_nodes); // This should be a spanning forest edge cc_alg.update({{1, 2}, INSERT}); - verify.edge_update(1, 2); - verify.reset_cc_state(); + verify.edge_update({1, 2}); cc_alg.set_verifier(std::make_unique(verify)); cc_alg.connected_components(); // This should be a spanning forest edge cc_alg.update({{2, 3}, INSERT}); - verify.edge_update(2, 3); - verify.reset_cc_state(); + verify.edge_update({2, 3}); cc_alg.set_verifier(std::make_unique(verify)); cc_alg.connected_components(); // This should be an edge within a component cc_alg.update({{1, 3}, INSERT}); - verify.edge_update(1, 3); - verify.reset_cc_state(); + verify.edge_update({1, 3}); cc_alg.set_verifier(std::make_unique(verify)); cc_alg.connected_components(); // This should delete an edge within a component cc_alg.update({{1, 3}, DELETE}); - verify.edge_update(1, 3); - verify.reset_cc_state(); + verify.edge_update({1, 3}); cc_alg.set_verifier(std::make_unique(verify)); cc_alg.connected_components(); // This one should delete a spanning forest edge and cause a rebuild cc_alg.update({{2, 3}, DELETE}); - verify.edge_update(2, 3); - verify.reset_cc_state(); + verify.edge_update({2, 3}); cc_alg.set_verifier(std::make_unique(verify)); cc_alg.connected_components(); // This one should be a normal edge cc_alg.update({{2, 3}, INSERT}); - verify.edge_update(2, 3); - verify.reset_cc_state(); + verify.edge_update({2, 3}); cc_alg.set_verifier(std::make_unique(verify)); cc_alg.connected_components(); } +TEST(CCAlgTest, SpanningForestExtraction) { + auto driver_config = DriverConfiguration().gutter_sys(STANDALONE); + auto cc_config = CCAlgConfiguration(); + generate_stream(get_seed(), 1024, 0.03, 0.5, 0.05, 3, "sample.txt", "cumul_sample.txt"); + AsciiFileStream stream{"./sample.txt"}; + node_id_t num_nodes = stream.vertices(); + + CCSketchAlg cc_alg{num_nodes, get_seed()}; + GraphSketchDriver driver(&cc_alg, &stream, driver_config); + + driver.process_stream_until(END_OF_STREAM); + driver.prep_query(CONNECTIVITY); + driver.check_verifier(GraphVerifier(1024, "./cumul_sample.txt")); + + cc_alg.calc_spanning_forest(); +} + +TEST(CCAlgTest, InsertOnlyStream) { + auto driver_config = DriverConfiguration().gutter_sys(STANDALONE); + auto cc_config = CCAlgConfiguration(); + generate_stream(get_seed(), 1024, 0.1, 0, 0, 1, "sample.txt", "cumul_sample.txt"); + AsciiFileStream stream{"./sample.txt"}; + node_id_t num_nodes = stream.vertices(); + + CCSketchAlg cc_alg{num_nodes, get_seed()}; + GraphSketchDriver driver(&cc_alg, &stream, driver_config); + + driver.process_stream_until(END_OF_STREAM); + driver.prep_query(CONNECTIVITY); + driver.check_verifier(GraphVerifier(1024, "./cumul_sample.txt")); + + cc_alg.connected_components(); + cc_alg.calc_spanning_forest(); +} + TEST(CCAlgTest, MTStreamWithMultipleQueries) { for (int t = 1; t <= 3; t++) { auto driver_config = DriverConfiguration().gutter_sys(STANDALONE); @@ -296,7 +314,7 @@ TEST(CCAlgTest, MTStreamWithMultipleQueries) { CCSketchAlg cc_alg{num_nodes, get_seed()}; GraphSketchDriver driver(&cc_alg, &stream, driver_config, 4); - MatGraphVerifier verify(num_nodes); + GraphVerifier verify(num_nodes); size_t num_queries = 10; size_t upd_per_query = num_edges / num_queries; @@ -304,14 +322,14 @@ TEST(CCAlgTest, MTStreamWithMultipleQueries) { for (size_t j = 0; j < upd_per_query; j++) { GraphStreamUpdate upd; verify_stream.get_update_buffer(&upd, 1); - verify.edge_update(upd.edge.src, upd.edge.dst); + verify.edge_update(upd.edge); ASSERT_NE(upd.type, BREAKPOINT); } - verify.reset_cc_state(); - cc_alg.set_verifier(std::make_unique(verify)); driver.process_stream_until(upd_per_query * (i + 1)); - driver.prep_query(); + driver.prep_query(CONNECTIVITY); + driver.check_verifier(verify); + cc_alg.connected_components(); } @@ -319,14 +337,14 @@ TEST(CCAlgTest, MTStreamWithMultipleQueries) { while (num_edges--) { GraphStreamUpdate upd; verify_stream.get_update_buffer(&upd, 1); - verify.edge_update(upd.edge.src, upd.edge.dst); + verify.edge_update(upd.edge); ASSERT_NE(upd.type, BREAKPOINT); } - verify.reset_cc_state(); - cc_alg.set_verifier(std::make_unique(verify)); driver.process_stream_until(END_OF_STREAM); - driver.prep_query(); + driver.prep_query(CONNECTIVITY); + driver.check_verifier(verify); + cc_alg.connected_components(); } } diff --git a/test/util/file_graph_verifier.cpp b/test/util/file_graph_verifier.cpp deleted file mode 100644 index a212be3c..00000000 --- a/test/util/file_graph_verifier.cpp +++ /dev/null @@ -1,93 +0,0 @@ -#include "../../include/test/file_graph_verifier.h" - -#include -#include -#include -#include -#include - -FileGraphVerifier::FileGraphVerifier(node_id_t n, const std::string &input_file) { - std::ifstream in(input_file); - if (!in) { - throw std::invalid_argument("FileGraphVerifier: Could not open: " + input_file); - } - - kruskal_ref = kruskal(input_file); - node_id_t num_nodes; - edge_id_t m; - node_id_t a, b; - in >> num_nodes >> m; - if (num_nodes != n) throw std::invalid_argument("num_nodes != n in FileGraphVerifier"); - - for (unsigned i = 0; i < n; ++i) { - adj_matrix.emplace_back(n - i); - } - while (m--) { - in >> a >> b; - if (a > b) std::swap(a, b); - b = b - a; - adj_matrix[a][b] = !adj_matrix[a][b]; - } - in.close(); -} - -std::vector> FileGraphVerifier::kruskal(const std::string& input_file) { - std::ifstream in(input_file); - node_id_t n; - edge_id_t m; - in >> n >> m; - DisjointSetUnion kruskal_sets(n); - int a,b; - while (m--) { - in >> a >> b; - kruskal_sets.merge(a,b); - } - in.close(); - - std::map> temp; - for (unsigned i = 0; i < n; ++i) { - temp[kruskal_sets.find_root(i)].insert(i); - } - - std::vector> retval; - retval.reserve(temp.size()); - for (const auto& entry : temp) { - retval.push_back(entry.second); - } - return retval; -} - -void FileGraphVerifier::verify_edge(Edge edge) { - if (edge.src > edge.dst) std::swap(edge.src, edge.dst); - if (!adj_matrix[edge.src][edge.dst - edge.src]) { - printf("Got an error on edge (%u, %u): edge is not in graph!\n", edge.src, edge.dst); - throw BadEdgeException(); - } -} - -void FileGraphVerifier::verify_soln(std::vector> &retval) { - auto temp {retval}; - std::sort(temp.begin(),temp.end()); - std::sort(kruskal_ref.begin(),kruskal_ref.end()); - if (kruskal_ref != temp) { - std::cout << "Provided CC:" << std::endl; - for (auto cc : temp) { - for (auto v : cc) { - std::cout << " " << v; - } - std::cout << std::endl; - } - - std::cout << "Expected CC:" << std::endl; - for (auto cc : kruskal_ref) { - for (auto v : cc) { - std::cout << " " << v; - } - std::cout << std::endl; - } - - throw IncorrectCCException(); - } - - std::cout << "Solution ok: " << retval.size() << " CCs found." << std::endl; -} diff --git a/test/util/graph_verifier.cpp b/test/util/graph_verifier.cpp new file mode 100644 index 00000000..4d35ec1d --- /dev/null +++ b/test/util/graph_verifier.cpp @@ -0,0 +1,157 @@ +#include "graph_verifier.h" +#include + +#include +#include +#include +#include + +GraphVerifier::GraphVerifier(node_id_t num_vertices) + : num_vertices(num_vertices), kruskal_dsu(num_vertices) { + // initialize adjacency matrix + adj_matrix = std::vector>(num_vertices); + for (node_id_t i = 0; i < num_vertices; ++i) + adj_matrix[i] = std::vector(num_vertices - i); +} + +GraphVerifier::GraphVerifier(node_id_t num_vertices, const std::string &cumul_file_name) + : num_vertices(num_vertices), kruskal_dsu(num_vertices) { + // initialize adjacency matrix + adj_matrix = std::vector>(num_vertices); + for (node_id_t i = 0; i < num_vertices; ++i) + adj_matrix[i] = std::vector(num_vertices - i); + + // cumulative files do not have update types + AsciiFileStream stream(cumul_file_name, false); + + GraphStreamUpdate stream_upd; + stream.get_update_buffer(&stream_upd, 1); + + node_id_t src = stream_upd.edge.src; + node_id_t dst = stream_upd.edge.dst; + UpdateType type = static_cast(stream_upd.type); + + while (type != BREAKPOINT) { + if (src > dst) + std::swap(src, dst); + dst -= src; + adj_matrix[src][dst] = !adj_matrix[src][dst]; + + stream.get_update_buffer(&stream_upd, 1); + src = stream_upd.edge.src; + dst = stream_upd.edge.dst; + type = static_cast(stream_upd.type); + } + + kruskal(); +} + +void GraphVerifier::edge_update(Edge edge) { + auto src = edge.src; + auto dst = edge.dst; + + if (src >= num_vertices || dst >= num_vertices) { + throw BadEdgeException("Source " + std::to_string(src) + " or Destination " + + std::to_string(dst) + " out of bounds!"); + } + if (src > dst) std::swap(src, dst); + + dst = dst - src; + + // update adj_matrix entry + adj_matrix[src][dst] = !adj_matrix[src][dst]; + need_query_compute = true; +} + +void GraphVerifier::kruskal() { + if (!need_query_compute) + return; + + kruskal_ccs = num_vertices; + kruskal_dsu.reset(); + for (node_id_t i = 0; i < num_vertices; i++) { + for (node_id_t j = 0; j < adj_matrix[i].size(); j++) { + if (adj_matrix[i][j] && kruskal_dsu.merge(i, i + j).merged) + kruskal_ccs -= 1; + } + } + need_query_compute = false; +} + +void GraphVerifier::verify_edge(Edge edge) { + // verify that the edge in question actually exists + if (edge.src > edge.dst) std::swap(edge.src, edge.dst); + if (!adj_matrix[edge.src][edge.dst - edge.src]) { + printf("Got an error on edge (%u, %u): edge is not in adj_matrix\n", edge.src, edge.dst); + throw BadEdgeException("The edge is not in the cut of the sample!"); + } +} + +void GraphVerifier::verify_connected_components(const ConnectedComponents &cc) { + // compute the connected components for the verifier + kruskal(); + + // first check that the number of components is the same for both + if (kruskal_ccs != cc.size()) { + throw IncorrectCCException("Incorrect number of components!"); + } + + // then check that we agree on where all the vertices belong + for (node_id_t i = 0; i < num_vertices; i++) { + node_id_t root = kruskal_dsu.find_root(i); + if (!cc.is_connected(root, i)) + throw IncorrectCCException("Incorrect Connectivity!"); + } +} + +void GraphVerifier::verify_spanning_forests(std::vector SFs) { + // backup the adjacency matrix + std::vector> backup(adj_matrix); + + for (SpanningForest &forest : SFs) { + kruskal(); + + DisjointSetUnion forest_ccs(num_vertices); + for (auto edge : forest.get_edges()) { + // every edge in the spanning forest must encode connectivity info + if (!forest_ccs.merge(edge.src, edge.dst).merged) { + adj_matrix = backup; + throw IncorrectForestException( + "Found an edge: (" + std::to_string(edge.src) + ", " + + std::to_string(edge.dst) + ") that is redundant within a single spanning forest!"); + } + + try { + verify_edge(edge); + } catch (...) { + adj_matrix = backup; + throw; + } + edge_update(edge); + } + + // root map allows us to translate from the kruskal_dsu's roots to the forest_ccs' roots + std::map root_map; + + for (node_id_t i = 0; i < num_vertices; i++) { + node_id_t kruskal_root = kruskal_dsu.find_root(i); + + if (root_map.count(kruskal_root) == 0) { + root_map[kruskal_root] = forest_ccs.find_root(i); + } + else if (root_map[kruskal_root] != forest_ccs.find_root(i)) { + adj_matrix = backup; + throw IncorrectForestException("Forest does not match expected component sets!"); + } + } + } + adj_matrix = backup; +} + +void GraphVerifier::combine(const GraphVerifier &oth) { + for (size_t i = 0; i < adj_matrix.size(); i++) { + for (size_t j = 0; j < adj_matrix[i].size(); j++) { + adj_matrix[i][j] = adj_matrix[i][j] != oth.adj_matrix[i][j]; + } + } +} diff --git a/test/util/graph_verifier_test.cpp b/test/util/graph_verifier_test.cpp index 85d852c1..878c238f 100644 --- a/test/util/graph_verifier_test.cpp +++ b/test/util/graph_verifier_test.cpp @@ -1,16 +1,37 @@ #include -#include "../../include/test/file_graph_verifier.h" +#include const std::string fname = __FILE__; size_t pos = fname.find_last_of("\\/"); const std::string curr_dir = (std::string::npos == pos) ? "" : fname.substr(0,pos); -TEST(DeterministicToolsTestSuite, TestKruskal) { - ASSERT_EQ(78,FileGraphVerifier::kruskal(curr_dir+"/../res/multiples_graph_1024.txt").size()); +constexpr size_t num_primes = 97; +const size_t primes[num_primes] { + 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, + 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, + 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, + 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, + 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, + 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509}; + +// finds a factor of x that is less than x. +static node_id_t find_a_factor(node_id_t x) { + for (auto prime : primes) { + if (prime >= x) break; + if (x % prime == 0) return prime; + } + + return 0; +} + +TEST(GraphVerifierTest, TestCorrectNumCC) { + GraphVerifier verifier(1024, curr_dir+"/../res/multiples_graph_1024.txt"); + + ASSERT_EQ(78, verifier.get_num_kruskal_ccs()); } -TEST(DeterministicToolsTestSuite, TestEdgeVerifier) { - FileGraphVerifier verifier(1024, curr_dir+"/../res/multiples_graph_1024.txt"); +TEST(GraphVerifierTest, TestEdgeVerifier) { + GraphVerifier verifier(1024, curr_dir+"/../res/multiples_graph_1024.txt"); // add edges of the form {i,2i} for (node_id_t i = 2; i < 512; ++i) { verifier.verify_edge({i, i*2}); @@ -20,11 +41,54 @@ TEST(DeterministicToolsTestSuite, TestEdgeVerifier) { ASSERT_THROW(verifier.verify_edge({420,69}), BadEdgeException); } -TEST(DeterministicToolsTestSuite, TestCCVerifier) { - FileGraphVerifier verifier (1024, curr_dir+"/../res/multiples_graph_1024.txt"); - // {0}, {1}, and primes \in [521,1021] are CCs - // add edges of the form {i,2i} - for (node_id_t i = 2; i < 512; ++i) { - verifier.verify_edge({i, i*2}); +TEST(GraphVerifierTest, TestVerifySpanningForest) { + GraphVerifier verifier(1024, curr_dir+"/../res/multiples_graph_1024.txt"); + + { + // create a partial spanning forest + std::unordered_set adj_list[1024]; + for (node_id_t i = 2; i < 512; i++) { + adj_list[i].insert(i*2); + } + + // This spanning forest should be incorrect, + // it is incomplete + ASSERT_THROW( + verifier.verify_spanning_forests(std::vector{SpanningForest(1024, adj_list)}), + IncorrectForestException + ); + } + { + // create a partial spanning forest + std::unordered_set adj_list[1024]; + for (node_id_t i = 2; i < 512; i++) { + adj_list[i].insert(i*2); + } + adj_list[2].insert(5); // this is the bad edge + for (node_id_t i = 6; i < 1024; i+=2) { + adj_list[2].insert(i); + } + + // This spanning forest should be incorrect, + // it contains an edge not found in the original graph + ASSERT_THROW( + verifier.verify_spanning_forests(std::vector{SpanningForest(1024, adj_list)}), + BadEdgeException + ); + } + { + // This is a correct spanning forest + std::unordered_set adj_list[1024]; + for (node_id_t i = 2; i < 1024; i++) { + node_id_t factor = find_a_factor(i); + if (factor != 0) adj_list[factor].insert(i); + } + for (auto prime : primes) { + if (prime == 2) continue; + adj_list[prime].insert(prime * 2); + } + + SpanningForest forest(1024, adj_list); + verifier.verify_spanning_forests(std::vector{forest}); } } diff --git a/test/util/mat_graph_verifier.cpp b/test/util/mat_graph_verifier.cpp deleted file mode 100644 index d313d736..00000000 --- a/test/util/mat_graph_verifier.cpp +++ /dev/null @@ -1,67 +0,0 @@ -#include "../../include/test/mat_graph_verifier.h" - -#include -#include -#include -#include - -MatGraphVerifier::MatGraphVerifier(node_id_t n) : n(n) { - adj_matrix = std::vector>(n); - for (node_id_t i = 0; i < n; ++i) - adj_matrix[i] = std::vector(n - i); -} - -void MatGraphVerifier::edge_update(node_id_t src, node_id_t dst) { - if (src > dst) std::swap(src, dst); - - dst = dst - src; - - // update adj_matrix entry - adj_matrix[src][dst] = !adj_matrix[src][dst]; -} - - -void MatGraphVerifier::reset_cc_state() { - kruskal_ref = kruskal(); -} - -std::vector> MatGraphVerifier::kruskal() { - DisjointSetUnion kruskal_dsu(n); - - for (node_id_t i = 0; i < n; i++) { - for (node_id_t j = 0; j < adj_matrix[i].size(); j++) { - if (adj_matrix[i][j]) kruskal_dsu.merge(i, i + j); - } - } - - std::map> temp; - for (unsigned i = 0; i < n; ++i) { - temp[kruskal_dsu.find_root(i)].insert(i); - } - - std::vector> retval; - retval.reserve(temp.size()); - for (const auto& entry : temp) { - retval.push_back(entry.second); - } - return retval; -} - -void MatGraphVerifier::verify_edge(Edge edge) { - // verify that the edge in question actually exists - if (edge.src > edge.dst) std::swap(edge.src, edge.dst); - if (!adj_matrix[edge.src][edge.dst - edge.src]) { - printf("Got an error on edge (%u, %u): edge is not in adj_matrix\n", edge.src, edge.dst); - throw BadEdgeException(); - } -} - -void MatGraphVerifier::verify_soln(std::vector> &retval) { - auto temp {retval}; - std::sort(temp.begin(),temp.end()); - std::sort(kruskal_ref.begin(),kruskal_ref.end()); - if (kruskal_ref != temp) - throw IncorrectCCException(); - - std::cout << "Solution ok: " << retval.size() << " CCs found." << std::endl; -} diff --git a/tools/process_stream.cpp b/tools/process_stream.cpp index 3f9127f7..53c49586 100644 --- a/tools/process_stream.cpp +++ b/tools/process_stream.cpp @@ -95,7 +95,7 @@ int main(int argc, char **argv) { driver.process_stream_until(END_OF_STREAM); auto cc_start = std::chrono::steady_clock::now(); - driver.prep_query(); + driver.prep_query(CONNECTIVITY); auto CC_num = cc_alg.connected_components().size(); std::chrono::duration cc_time = std::chrono::steady_clock::now() - cc_start; std::chrono::duration insert_time = driver.flush_end - ins_start; @@ -116,7 +116,7 @@ int main(int argc, char **argv) { cc_start = std::chrono::steady_clock::now(); - driver.prep_query(); + driver.prep_query(CONNECTIVITY); CC_num = cc_alg.connected_components().size(); cc_time = std::chrono::steady_clock::now() - cc_start; insert_time = driver.flush_end - ins_start; diff --git a/tools/test_correctness.cpp b/tools/test_correctness.cpp index 36a6322f..48739ea3 100644 --- a/tools/test_correctness.cpp +++ b/tools/test_correctness.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include static size_t get_seed() { auto now = std::chrono::high_resolution_clock::now(); @@ -38,14 +38,13 @@ CorrectnessResults test_path_correctness(size_t num_vertices, size_t num_graphs, size_t edge_seed = gen(); std::vector copy_vertices(vertices); std::shuffle(copy_vertices.begin(), copy_vertices.end(), std::mt19937_64(edge_seed)); - MatGraphVerifier verifier(num_vertices); + GraphVerifier verifier(num_vertices); node_id_t cur_node = copy_vertices[0]; for (size_t i = 1; i < num_vertices; i++) { - verifier.edge_update(cur_node, copy_vertices[i]); + verifier.edge_update({cur_node, copy_vertices[i]}); cur_node = copy_vertices[i]; } - verifier.reset_cc_state(); for (size_t s = 0; s < samples_per_graph; s++) { CCSketchAlg cc_alg(num_vertices, get_seed()); @@ -55,7 +54,7 @@ CorrectnessResults test_path_correctness(size_t num_vertices, size_t num_graphs, cc_alg.update({{cur_node, copy_vertices[i]}, INSERT}); cur_node = copy_vertices[i]; } - cc_alg.set_verifier(std::make_unique(verifier)); + cc_alg.set_verifier(std::make_unique(verifier)); std::cout << "graph: " << g << " sample: " << s << " "; try {