Skip to content

Commit b631c1e

Browse files
authored
Merge pull request #169 from alamb/alamb/hash_reuse
Reuse hashes buffer when emitting partial join results
2 parents cdd7f12 + 302c223 commit b631c1e

1 file changed

Lines changed: 10 additions & 2 deletions

File tree

datafusion/physical-plan/src/joins/hash_join.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,7 @@ impl ExecutionPlan for HashJoinExec {
669669
state: HashJoinStreamState::WaitBuildSide,
670670
build_side: BuildSide::Initial(BuildSideInitialState { left_fut }),
671671
batch_size,
672+
hashes_buffer: vec![],
672673
}))
673674
}
674675

@@ -979,6 +980,8 @@ struct HashJoinStream {
979980
build_side: BuildSide,
980981
/// Maximum output batch size
981982
batch_size: usize,
983+
/// Scratch space for computing hashes
984+
hashes_buffer: Vec<u64>,
982985
}
983986

984987
impl RecordBatchStream for HashJoinStream {
@@ -1044,6 +1047,7 @@ fn lookup_join_hashmap<T: JoinHashMapType>(
10441047
probe_on: &[Column],
10451048
random_state: &RandomState,
10461049
null_equals_null: bool,
1050+
hashes_buffer: &mut Vec<u64>,
10471051
limit: usize,
10481052
offset: JoinHashMapOffset,
10491053
) -> Result<(UInt64Array, UInt32Array, Option<JoinHashMapOffset>)> {
@@ -1059,8 +1063,9 @@ fn lookup_join_hashmap<T: JoinHashMapType>(
10591063
})
10601064
.collect::<Result<Vec<_>>>()?;
10611065

1062-
let mut hashes_buffer = vec![0; probe_batch.num_rows()];
1063-
let hash_values = create_hashes(&keys_values, random_state, &mut hashes_buffer)?;
1066+
hashes_buffer.clear();
1067+
hashes_buffer.resize(probe_batch.num_rows(), 0);
1068+
let hash_values = create_hashes(&keys_values, random_state, hashes_buffer)?;
10641069

10651070
let (mut probe_builder, mut build_builder, next_offset) = build_hashmap
10661071
.get_matched_indices_with_limit_offset(
@@ -1263,6 +1268,7 @@ impl HashJoinStream {
12631268
&self.on_right,
12641269
&self.random_state,
12651270
self.null_equals_null,
1271+
&mut self.hashes_buffer,
12661272
self.batch_size,
12671273
state.offset,
12681274
)?;
@@ -2930,6 +2936,7 @@ mod tests {
29302936
);
29312937

29322938
let join_hash_map = JoinHashMap::new(hashmap_left, next);
2939+
let mut hashes_buffer = vec![0];
29332940

29342941
let (l, r, _) = lookup_join_hashmap(
29352942
&join_hash_map,
@@ -2939,6 +2946,7 @@ mod tests {
29392946
&[Column::new("a", 0)],
29402947
&random_state,
29412948
false,
2949+
&mut hashes_buffer,
29422950
8192,
29432951
(0, None),
29442952
)?;

0 commit comments

Comments
 (0)