Skip to content

Commit bd8cddd

Browse files
authored
Merge pull request #126 from GraphStreamingProject/range_merge
Supernode: range merge function
2 parents 26b65e0 + 9436ddd commit bd8cddd

File tree

2 files changed

+32
-10
lines changed

2 files changed

+32
-10
lines changed

include/supernode.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ class Supernode {
3737

3838
private:
3939
size_t num_sketches;
40+
size_t merged_sketches; // This variable tells us which sketches are good for queries post merge
4041
size_t sketch_size;
4142

4243
/* collection of logn sketches to query from, since we can't query from one
@@ -112,10 +113,10 @@ class Supernode {
112113
static int get_max_sketches() { return max_sketches; };
113114

114115
// get number of samples remaining in the Supernode
115-
int samples_remaining() { return num_sketches - sample_idx; }
116+
int samples_remaining() { return merged_sketches - sample_idx; }
116117

117118
inline bool out_of_queries() {
118-
return sample_idx >= num_sketches;
119+
return sample_idx >= merged_sketches;
119120
}
120121

121122
inline int curr_idx() {
@@ -158,6 +159,16 @@ class Supernode {
158159
*/
159160
void merge(Supernode& other);
160161

162+
/**
163+
* In-place range merge function. Updates the caller Supernode.
164+
* The range merge only merges some of the Sketches
165+
* This function should only be used if you know what you're doing
166+
* @param other Supernode to merge into caller
167+
* @param start_idx Index of first Sketch to merge
168+
* @param num_merge How many sketches to merge
169+
*/
170+
void range_merge(Supernode& other, size_t start_idx, size_t num_merge);
171+
161172
/**
162173
* Insert or delete an (encoded) edge into the supernode. Guaranteed to be
163174
* processed BEFORE Boruvka starts.

src/supernode.cpp

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ size_t Supernode::bytes_size;
88
size_t Supernode::serialized_size;
99

1010
Supernode::Supernode(uint64_t n, uint64_t seed): sample_idx(0),
11-
n(n), seed(seed), num_sketches(max_sketches), sketch_size(Sketch::sketchSizeof()) {
11+
n(n), seed(seed), num_sketches(max_sketches),
12+
merged_sketches(max_sketches), sketch_size(Sketch::sketchSizeof()) {
1213

1314
size_t sketch_width = Sketch::column_gen(Sketch::get_failure_factor());
1415
// generate num_sketches sketches for each supernode (read: node)
@@ -40,6 +41,7 @@ Supernode::Supernode(uint64_t n, uint64_t seed, std::istream &binary_in) :
4041
}
4142
// sample in range [beg, beg + num)
4243
num_sketches = beg + num;
44+
merged_sketches = num_sketches;
4345
sample_idx = beg;
4446

4547
// create empty sketches, if any
@@ -59,8 +61,9 @@ Supernode::Supernode(uint64_t n, uint64_t seed, std::istream &binary_in) :
5961
}
6062
}
6163

62-
Supernode::Supernode(const Supernode& s) : sample_idx(s.sample_idx), n(s.n),
63-
seed(s.seed), num_sketches(s.num_sketches), sketch_size(s.sketch_size) {
64+
Supernode::Supernode(const Supernode& s) :
65+
sample_idx(s.sample_idx), n(s.n), seed(s.seed), num_sketches(s.num_sketches),
66+
merged_sketches(s.merged_sketches), sketch_size(s.sketch_size) {
6467
for (size_t i = 0; i < num_sketches; ++i) {
6568
Sketch::makeSketch(get_sketch(i), *s.get_sketch(i));
6669
}
@@ -82,7 +85,7 @@ Supernode::~Supernode() {
8285
}
8386

8487
std::pair<Edge, SampleSketchRet> Supernode::sample() {
85-
if (sample_idx == num_sketches) throw OutOfQueriesException();
88+
if (out_of_queries()) throw OutOfQueriesException();
8689

8790
std::pair<vec_t, SampleSketchRet> query_ret = get_sketch(sample_idx++)->query();
8891
vec_t non_zero = query_ret.first;
@@ -91,7 +94,7 @@ std::pair<Edge, SampleSketchRet> Supernode::sample() {
9194
}
9295

9396
std::pair<std::vector<Edge>, SampleSketchRet> Supernode::exhaustive_sample() {
94-
if (sample_idx == num_sketches) throw OutOfQueriesException();
97+
if (out_of_queries()) throw OutOfQueriesException();
9598

9699
std::pair<std::vector<vec_t>, SampleSketchRet> query_ret = get_sketch(sample_idx++)->exhaustive_query();
97100
std::vector<Edge> edges(query_ret.first.size());
@@ -104,10 +107,18 @@ std::pair<std::vector<Edge>, SampleSketchRet> Supernode::exhaustive_sample() {
104107

105108
void Supernode::merge(Supernode &other) {
106109
sample_idx = std::max(sample_idx, other.sample_idx);
107-
num_sketches = std::min(num_sketches, other.num_sketches);
108-
for (size_t i=sample_idx;i<num_sketches;++i) {
110+
merged_sketches = std::min(merged_sketches, other.merged_sketches);
111+
for (size_t i = sample_idx; i < merged_sketches; ++i)
112+
(*get_sketch(i))+=(*other.get_sketch(i));
113+
}
114+
115+
void Supernode::range_merge(Supernode& other, size_t start_idx, size_t num_merge) {
116+
sample_idx = std::max(sample_idx, other.sample_idx);
117+
// we trust the caller so whatever they tell us goes here
118+
// hopefully if the caller is incorrect then this will be caught by out_of_queries()
119+
merged_sketches = start_idx + num_merge;
120+
for (size_t i = sample_idx; i < merged_sketches; i++)
109121
(*get_sketch(i))+=(*other.get_sketch(i));
110-
}
111122
}
112123

113124
void Supernode::update(vec_t upd) {

0 commit comments

Comments
 (0)