diff --git a/include/l0_sampling/sketch.h b/include/l0_sampling/sketch.h index 8746a5b4..16f61f48 100644 --- a/include/l0_sampling/sketch.h +++ b/include/l0_sampling/sketch.h @@ -94,7 +94,7 @@ class Sketch { failure_factor = _factor; num_buckets = bucket_gen(failure_factor); num_guesses = guess_gen(n); - num_elems = num_buckets * num_guesses + 2; // +2 for zero depth bucket and null bucket + num_elems = num_buckets * num_guesses + 1; // +1 for zero bucket optimization } inline static size_t sketchSizeof() { diff --git a/src/l0_sampling/sketch.cpp b/src/l0_sampling/sketch.cpp index 7d5c22d5..8da5314e 100644 --- a/src/l0_sampling/sketch.cpp +++ b/src/l0_sampling/sketch.cpp @@ -64,9 +64,8 @@ void Sketch::update(const vec_t update_idx) { for (unsigned i = 0; i < num_buckets; ++i) { col_hash_t depth = Bucket_Boruvka::get_index_depth(update_idx, seed + i, num_guesses); size_t bucket_id = i * num_guesses + depth; - bucket_id *= (bool)(depth!=0); // if depth is 0 then "update" null bucket -> bucket[0] - - Bucket_Boruvka::update(bucket_a[bucket_id], bucket_c[bucket_id], update_idx, checksum); + if (depth < num_guesses) + Bucket_Boruvka::update(bucket_a[bucket_id], bucket_c[bucket_id], update_idx, checksum); } } @@ -83,14 +82,13 @@ std::pair Sketch::query() { already_queried = true; if (bucket_a[num_elems - 1] == 0 && bucket_c[num_elems - 1] == 0) { - return {0, ZERO}; // the "first" bucket is deterministic so if it is all zero then there are no edges to return + return {0, ZERO}; // the "first" bucket is deterministic so if all zero then no edges to return } if (Bucket_Boruvka::is_good(bucket_a[num_elems - 1], bucket_c[num_elems - 1], seed)) { return {bucket_a[num_elems - 1], GOOD}; } for (unsigned i = 0; i < num_buckets; ++i) { - // bucket[0] is null - for (unsigned j = begin_nonnull; j < num_guesses; ++j) { + for (unsigned j = 0; j < num_guesses; ++j) { unsigned bucket_id = i * num_guesses + j; if (Bucket_Boruvka::is_good(bucket_a[bucket_id], bucket_c[bucket_id], seed)) { return {bucket_a[bucket_id], GOOD}; @@ -102,7 +100,7 @@ std::pair Sketch::query() { Sketch &operator+= (Sketch &sketch1, const Sketch &sketch2) { assert (sketch1.seed == sketch2.seed); - for (unsigned i = Sketch::begin_nonnull; i < Sketch::num_elems; i++) { + for (unsigned i = 0; i < Sketch::num_elems; i++) { sketch1.bucket_a[i] ^= sketch2.bucket_a[i]; sketch1.bucket_c[i] ^= sketch2.bucket_c[i]; } @@ -114,11 +112,11 @@ bool operator== (const Sketch &sketch1, const Sketch &sketch2) { if (sketch1.seed != sketch2.seed || sketch1.already_queried != sketch2.already_queried) return false; - for (size_t i = Sketch::begin_nonnull; i < Sketch::num_elems; ++i) { + for (size_t i = 0; i < Sketch::num_elems; ++i) { if (sketch1.bucket_a[i] != sketch2.bucket_a[i]) return false; } - for (size_t i = Sketch::begin_nonnull; i < Sketch::num_elems; ++i) { + for (size_t i = 0; i < Sketch::num_elems; ++i) { if (sketch1.bucket_c[i] != sketch2.bucket_c[i]) return false; } @@ -133,7 +131,7 @@ std::ostream& operator<< (std::ostream &os, const Sketch &sketch) { os << " a:" << a << " c:" << c << (good ? " good" : " bad") << std::endl; for (unsigned i = 0; i < Sketch::num_buckets; ++i) { - for (unsigned j = Sketch::begin_nonnull; j < Sketch::num_guesses; ++j) { + for (unsigned j = 0; j < Sketch::num_guesses; ++j) { unsigned bucket_id = i * Sketch::num_guesses + j; vec_t a = sketch.bucket_a[bucket_id]; vec_hash_t c = sketch.bucket_c[bucket_id]; @@ -152,7 +150,6 @@ void Sketch::write_binary(std::ostream& binary_out) { void Sketch::write_binary(std::ostream &binary_out) const { // Write out the bucket values to the stream. - // Do not include the null bucket binary_out.write((char*)bucket_a, num_elems * sizeof(vec_t)); binary_out.write((char*)bucket_c, num_elems * sizeof(vec_hash_t)); } diff --git a/tools/benchmark/graphcc_bench.cpp b/tools/benchmark/graphcc_bench.cpp index e8f901e8..44ab9811 100644 --- a/tools/benchmark/graphcc_bench.cpp +++ b/tools/benchmark/graphcc_bench.cpp @@ -103,7 +103,7 @@ static void BM_builtin_ffsl(benchmark::State& state) { size_t j = -1; for (auto _ : state) { benchmark::DoNotOptimize(__builtin_ffsl(i++)); - benchmark::DoNotOptimize(__builtin_ffsl(j++)); + benchmark::DoNotOptimize(__builtin_ffsl(j--)); } } BENCHMARK(BM_builtin_ffsl); @@ -113,7 +113,7 @@ static void BM_builtin_ctzl(benchmark::State& state) { size_t j = -1; for (auto _ : state) { benchmark::DoNotOptimize(__builtin_ctzl(i++)); - benchmark::DoNotOptimize(__builtin_ctzl(j++)); + benchmark::DoNotOptimize(__builtin_ctzl(j--)); } } BENCHMARK(BM_builtin_ctzl); @@ -123,7 +123,7 @@ static void BM_builtin_clzl(benchmark::State& state) { size_t j = -1; for (auto _ : state) { benchmark::DoNotOptimize(__builtin_clzl(i++)); - benchmark::DoNotOptimize(__builtin_clzl(j++)); + benchmark::DoNotOptimize(__builtin_clzl(j--)); } } BENCHMARK(BM_builtin_clzl); @@ -190,7 +190,7 @@ BENCHMARK(BM_update_bucket); // Benchmark the speed of updating sketches both serially and in batch mode static void BM_Sketch_Update(benchmark::State& state) { size_t vec_size = state.range(0); - vec_t input = vec_size / 4; + vec_t input = vec_size / 3; // initialize sketches Sketch::configure(vec_size, 100); SketchUniquePtr skt = makeSketch(seed); @@ -239,6 +239,27 @@ static void BM_Sketch_Query(benchmark::State& state) { } BENCHMARK(BM_Sketch_Query)->DenseRange(0, 90, 10); +static void BM_Supernode_Merge(benchmark::State& state) { + size_t n = state.range(0); + size_t upds = n / 100; + Supernode::configure(n); + Supernode* s1 = Supernode::makeSupernode(n, seed); + Supernode* s2 = Supernode::makeSupernode(n, seed); + + for (size_t i = 0; i < upds; i++) { + s1->update(static_cast(concat_pairing_fn(rand() % n, rand() % n))); + s2->update(static_cast(concat_pairing_fn(rand() % n, rand() % n))); + } + + for (auto _ : state) { + s1->merge(*s2); + } + + free(s1); + free(s2); +} +BENCHMARK(BM_Supernode_Merge)->RangeMultiplier(10)->Range(1e3, 1e6); + // Benchmark speed of DSU merges when the sequence of merges is adversarial // This means we avoid joining roots wherever possible static void BM_DSU_Adversarial(benchmark::State &state) {