From 9dc358c53c2ace1af818fb754e2c00e66b9a2a30 Mon Sep 17 00:00:00 2001 From: Evan West Date: Tue, 21 Mar 2023 11:46:03 -0400 Subject: [PATCH 1/2] range merge function --- include/supernode.h | 14 ++++++++++++-- src/supernode.cpp | 21 +++++++++++++++------ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/include/supernode.h b/include/supernode.h index 8144c8e1..49d71461 100644 --- a/include/supernode.h +++ b/include/supernode.h @@ -37,6 +37,7 @@ class Supernode { private: size_t num_sketches; + size_t merged_sketches; // This variable tells us which sketches are good for queries post merge size_t sketch_size; /* collection of logn sketches to query from, since we can't query from one @@ -112,10 +113,10 @@ class Supernode { static int get_max_sketches() { return max_sketches; }; // get number of samples remaining in the Supernode - int samples_remaining() { return num_sketches - sample_idx; } + int samples_remaining() { return merged_sketches - sample_idx; } inline bool out_of_queries() { - return sample_idx >= num_sketches; + return merged_sketches >= num_sketches; } inline int curr_idx() { @@ -158,6 +159,15 @@ class Supernode { */ void merge(Supernode& other); + /** + * In-place range merge function. Updates the caller Supernode. + * The range merge only merges some of the Sketches + * @param other Supernode to merge into caller + * @param start_idx Index of first Sketch to merge + * @param num_merge How many sketches to merge + */ + void range_merge(Supernode& other, size_t start_idx, size_t num_merge); + /** * Insert or delete an (encoded) edge into the supernode. Guaranteed to be * processed BEFORE Boruvka starts. diff --git a/src/supernode.cpp b/src/supernode.cpp index c91d5bcc..87bcdbe2 100644 --- a/src/supernode.cpp +++ b/src/supernode.cpp @@ -8,7 +8,8 @@ size_t Supernode::bytes_size; size_t Supernode::serialized_size; Supernode::Supernode(uint64_t n, uint64_t seed): sample_idx(0), - n(n), seed(seed), num_sketches(max_sketches), sketch_size(Sketch::sketchSizeof()) { + n(n), seed(seed), num_sketches(max_sketches), + merged_sketches(max_sketches), sketch_size(Sketch::sketchSizeof()) { size_t sketch_width = Sketch::column_gen(Sketch::get_failure_factor()); // generate num_sketches sketches for each supernode (read: node) @@ -40,6 +41,7 @@ Supernode::Supernode(uint64_t n, uint64_t seed, std::istream &binary_in) : } // sample in range [beg, beg + num) num_sketches = beg + num; + merged_sketches = num_sketches; sample_idx = beg; // create empty sketches, if any @@ -59,8 +61,9 @@ Supernode::Supernode(uint64_t n, uint64_t seed, std::istream &binary_in) : } } -Supernode::Supernode(const Supernode& s) : sample_idx(s.sample_idx), n(s.n), - seed(s.seed), num_sketches(s.num_sketches), sketch_size(s.sketch_size) { +Supernode::Supernode(const Supernode& s) : + sample_idx(s.sample_idx), n(s.n), seed(s.seed), num_sketches(s.num_sketches), + merged_sketches(s.merged_sketches), sketch_size(s.sketch_size) { for (size_t i = 0; i < num_sketches; ++i) { Sketch::makeSketch(get_sketch(i), *s.get_sketch(i)); } @@ -104,10 +107,16 @@ std::pair, SampleSketchRet> Supernode::exhaustive_sample() { void Supernode::merge(Supernode &other) { sample_idx = std::max(sample_idx, other.sample_idx); - num_sketches = std::min(num_sketches, other.num_sketches); - for (size_t i=sample_idx;i Date: Tue, 21 Mar 2023 14:56:22 -0400 Subject: [PATCH 2/2] improvements to range merge --- include/supernode.h | 3 ++- src/supernode.cpp | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/supernode.h b/include/supernode.h index 49d71461..788bf4ac 100644 --- a/include/supernode.h +++ b/include/supernode.h @@ -116,7 +116,7 @@ class Supernode { int samples_remaining() { return merged_sketches - sample_idx; } inline bool out_of_queries() { - return merged_sketches >= num_sketches; + return sample_idx >= merged_sketches; } inline int curr_idx() { @@ -162,6 +162,7 @@ class Supernode { /** * In-place range merge function. Updates the caller Supernode. * The range merge only merges some of the Sketches + * This function should only be used if you know what you're doing * @param other Supernode to merge into caller * @param start_idx Index of first Sketch to merge * @param num_merge How many sketches to merge diff --git a/src/supernode.cpp b/src/supernode.cpp index 87bcdbe2..d9f45057 100644 --- a/src/supernode.cpp +++ b/src/supernode.cpp @@ -85,7 +85,7 @@ Supernode::~Supernode() { } std::pair Supernode::sample() { - if (sample_idx == num_sketches) throw OutOfQueriesException(); + if (out_of_queries()) throw OutOfQueriesException(); std::pair query_ret = get_sketch(sample_idx++)->query(); vec_t non_zero = query_ret.first; @@ -94,7 +94,7 @@ std::pair Supernode::sample() { } std::pair, SampleSketchRet> Supernode::exhaustive_sample() { - if (sample_idx == num_sketches) throw OutOfQueriesException(); + if (out_of_queries()) throw OutOfQueriesException(); std::pair, SampleSketchRet> query_ret = get_sketch(sample_idx++)->exhaustive_query(); std::vector edges(query_ret.first.size()); @@ -114,7 +114,9 @@ void Supernode::merge(Supernode &other) { void Supernode::range_merge(Supernode& other, size_t start_idx, size_t num_merge) { sample_idx = std::max(sample_idx, other.sample_idx); - merged_sketches = std::min(merged_sketches, start_idx + num_merge); + // we trust the caller so whatever they tell us goes here + // hopefully if the caller is incorrect then this will be caught by out_of_queries() + merged_sketches = start_idx + num_merge; for (size_t i = sample_idx; i < merged_sketches; i++) (*get_sketch(i))+=(*other.get_sketch(i)); }