Skip to content

Commit 6ecb6cd

Browse files
Preserve all of the valid orderings during merging. (#8169)
* Preserve all of the valid orderings during merging. * Update datafusion/physical-expr/src/equivalence.rs Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com> * Address reviews --------- Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>
1 parent abb2ae7 commit 6ecb6cd

5 files changed

Lines changed: 335 additions & 23 deletions

File tree

datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs

Lines changed: 263 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,272 @@
1717

1818
#[cfg(test)]
1919
mod sp_repartition_fuzz_tests {
20-
use arrow::compute::concat_batches;
21-
use arrow_array::{ArrayRef, Int64Array, RecordBatch};
22-
use arrow_schema::SortOptions;
23-
use datafusion::physical_plan::memory::MemoryExec;
24-
use datafusion::physical_plan::repartition::RepartitionExec;
25-
use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
26-
use datafusion::physical_plan::{collect, ExecutionPlan, Partitioning};
27-
use datafusion::prelude::SessionContext;
28-
use datafusion_execution::config::SessionConfig;
29-
use datafusion_physical_expr::expressions::col;
30-
use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
31-
use rand::rngs::StdRng;
32-
use rand::{Rng, SeedableRng};
3320
use std::sync::Arc;
21+
22+
use arrow::compute::{concat_batches, lexsort, SortColumn};
23+
use arrow_array::{ArrayRef, Int64Array, RecordBatch, UInt64Array};
24+
use arrow_schema::{DataType, Field, Schema, SchemaRef, SortOptions};
25+
26+
use datafusion::physical_plan::{
27+
collect,
28+
memory::MemoryExec,
29+
metrics::{BaselineMetrics, ExecutionPlanMetricsSet},
30+
repartition::RepartitionExec,
31+
sorts::sort_preserving_merge::SortPreservingMergeExec,
32+
sorts::streaming_merge::streaming_merge,
33+
stream::RecordBatchStreamAdapter,
34+
ExecutionPlan, Partitioning,
35+
};
36+
use datafusion::prelude::SessionContext;
37+
use datafusion_common::Result;
38+
use datafusion_execution::{
39+
config::SessionConfig, memory_pool::MemoryConsumer, SendableRecordBatchStream,
40+
};
41+
use datafusion_physical_expr::{
42+
expressions::{col, Column},
43+
EquivalenceProperties, PhysicalExpr, PhysicalSortExpr,
44+
};
3445
use test_utils::add_empty_batches;
3546

47+
use itertools::izip;
48+
use rand::{rngs::StdRng, seq::SliceRandom, Rng, SeedableRng};
49+
50+
// Generate a schema which consists of 6 columns (a, b, c, d, e, f)
51+
fn create_test_schema() -> Result<SchemaRef> {
52+
let a = Field::new("a", DataType::Int32, true);
53+
let b = Field::new("b", DataType::Int32, true);
54+
let c = Field::new("c", DataType::Int32, true);
55+
let d = Field::new("d", DataType::Int32, true);
56+
let e = Field::new("e", DataType::Int32, true);
57+
let f = Field::new("f", DataType::Int32, true);
58+
let schema = Arc::new(Schema::new(vec![a, b, c, d, e, f]));
59+
60+
Ok(schema)
61+
}
62+
63+
/// Construct a schema with random ordering
64+
/// among column a, b, c, d
65+
/// where
66+
/// Column [a=f] (e.g they are aliases).
67+
/// Column e is constant.
68+
fn create_random_schema(seed: u64) -> Result<(SchemaRef, EquivalenceProperties)> {
69+
let test_schema = create_test_schema()?;
70+
let col_a = &col("a", &test_schema)?;
71+
let col_b = &col("b", &test_schema)?;
72+
let col_c = &col("c", &test_schema)?;
73+
let col_d = &col("d", &test_schema)?;
74+
let col_e = &col("e", &test_schema)?;
75+
let col_f = &col("f", &test_schema)?;
76+
let col_exprs = [col_a, col_b, col_c, col_d, col_e, col_f];
77+
78+
let mut eq_properties = EquivalenceProperties::new(test_schema.clone());
79+
// Define a and f are aliases
80+
eq_properties.add_equal_conditions(col_a, col_f);
81+
// Column e has constant value.
82+
eq_properties = eq_properties.add_constants([col_e.clone()]);
83+
84+
// Randomly order columns for sorting
85+
let mut rng = StdRng::seed_from_u64(seed);
86+
let mut remaining_exprs = col_exprs[0..4].to_vec(); // only a, b, c, d are sorted
87+
88+
let options_asc = SortOptions {
89+
descending: false,
90+
nulls_first: false,
91+
};
92+
93+
while !remaining_exprs.is_empty() {
94+
let n_sort_expr = rng.gen_range(0..remaining_exprs.len() + 1);
95+
remaining_exprs.shuffle(&mut rng);
96+
97+
let ordering = remaining_exprs
98+
.drain(0..n_sort_expr)
99+
.map(|expr| PhysicalSortExpr {
100+
expr: expr.clone(),
101+
options: options_asc,
102+
})
103+
.collect();
104+
105+
eq_properties.add_new_orderings([ordering]);
106+
}
107+
108+
Ok((test_schema, eq_properties))
109+
}
110+
111+
// If we already generated a random result for one of the
112+
// expressions in the equivalence classes. For other expressions in the same
113+
// equivalence class use same result. This util gets already calculated result, when available.
114+
fn get_representative_arr(
115+
eq_group: &[Arc<dyn PhysicalExpr>],
116+
existing_vec: &[Option<ArrayRef>],
117+
schema: SchemaRef,
118+
) -> Option<ArrayRef> {
119+
for expr in eq_group.iter() {
120+
let col = expr.as_any().downcast_ref::<Column>().unwrap();
121+
let (idx, _field) = schema.column_with_name(col.name()).unwrap();
122+
if let Some(res) = &existing_vec[idx] {
123+
return Some(res.clone());
124+
}
125+
}
126+
None
127+
}
128+
129+
// Generate a table that satisfies the given equivalence properties; i.e.
130+
// equivalences, ordering equivalences, and constants.
131+
fn generate_table_for_eq_properties(
132+
eq_properties: &EquivalenceProperties,
133+
n_elem: usize,
134+
n_distinct: usize,
135+
) -> Result<RecordBatch> {
136+
let mut rng = StdRng::seed_from_u64(23);
137+
138+
let schema = eq_properties.schema();
139+
let mut schema_vec = vec![None; schema.fields.len()];
140+
141+
// Utility closure to generate random array
142+
let mut generate_random_array = |num_elems: usize, max_val: usize| -> ArrayRef {
143+
let values: Vec<u64> = (0..num_elems)
144+
.map(|_| rng.gen_range(0..max_val) as u64)
145+
.collect();
146+
Arc::new(UInt64Array::from_iter_values(values))
147+
};
148+
149+
// Fill constant columns
150+
for constant in eq_properties.constants() {
151+
let col = constant.as_any().downcast_ref::<Column>().unwrap();
152+
let (idx, _field) = schema.column_with_name(col.name()).unwrap();
153+
let arr =
154+
Arc::new(UInt64Array::from_iter_values(vec![0; n_elem])) as ArrayRef;
155+
schema_vec[idx] = Some(arr);
156+
}
157+
158+
// Fill columns based on ordering equivalences
159+
for ordering in eq_properties.oeq_class().iter() {
160+
let (sort_columns, indices): (Vec<_>, Vec<_>) = ordering
161+
.iter()
162+
.map(|PhysicalSortExpr { expr, options }| {
163+
let col = expr.as_any().downcast_ref::<Column>().unwrap();
164+
let (idx, _field) = schema.column_with_name(col.name()).unwrap();
165+
let arr = generate_random_array(n_elem, n_distinct);
166+
(
167+
SortColumn {
168+
values: arr,
169+
options: Some(*options),
170+
},
171+
idx,
172+
)
173+
})
174+
.unzip();
175+
176+
let sort_arrs = arrow::compute::lexsort(&sort_columns, None)?;
177+
for (idx, arr) in izip!(indices, sort_arrs) {
178+
schema_vec[idx] = Some(arr);
179+
}
180+
}
181+
182+
// Fill columns based on equivalence groups
183+
for eq_group in eq_properties.eq_group().iter() {
184+
let representative_array =
185+
get_representative_arr(eq_group, &schema_vec, schema.clone())
186+
.unwrap_or_else(|| generate_random_array(n_elem, n_distinct));
187+
188+
for expr in eq_group {
189+
let col = expr.as_any().downcast_ref::<Column>().unwrap();
190+
let (idx, _field) = schema.column_with_name(col.name()).unwrap();
191+
schema_vec[idx] = Some(representative_array.clone());
192+
}
193+
}
194+
195+
let res: Vec<_> = schema_vec
196+
.into_iter()
197+
.zip(schema.fields.iter())
198+
.map(|(elem, field)| {
199+
(
200+
field.name(),
201+
// Generate random values for columns that do not occur in any of the groups (equivalence, ordering equivalence, constants)
202+
elem.unwrap_or_else(|| generate_random_array(n_elem, n_distinct)),
203+
)
204+
})
205+
.collect();
206+
207+
Ok(RecordBatch::try_from_iter(res)?)
208+
}
209+
210+
// This test checks for whether during sort preserving merge we can preserve all of the valid orderings
211+
// successfully. If at the input we have orderings [a ASC, b ASC], [c ASC, d ASC]
212+
// After sort preserving merge orderings [a ASC, b ASC], [c ASC, d ASC] should still be valid.
213+
#[tokio::test]
214+
async fn stream_merge_multi_order_preserve() -> Result<()> {
215+
const N_PARTITION: usize = 8;
216+
const N_ELEM: usize = 25;
217+
const N_DISTINCT: usize = 5;
218+
const N_DIFF_SCHEMA: usize = 20;
219+
220+
use datafusion::physical_plan::common::collect;
221+
for seed in 0..N_DIFF_SCHEMA {
222+
// Create a schema with random equivalence properties
223+
let (_test_schema, eq_properties) = create_random_schema(seed as u64)?;
224+
let table_data_with_properties =
225+
generate_table_for_eq_properties(&eq_properties, N_ELEM, N_DISTINCT)?;
226+
let schema = table_data_with_properties.schema();
227+
let streams: Vec<SendableRecordBatchStream> = (0..N_PARTITION)
228+
.map(|_idx| {
229+
let batch = table_data_with_properties.clone();
230+
Box::pin(RecordBatchStreamAdapter::new(
231+
schema.clone(),
232+
futures::stream::once(async { Ok(batch) }),
233+
)) as SendableRecordBatchStream
234+
})
235+
.collect::<Vec<_>>();
236+
237+
// Returns concatenated version of the all available orderings
238+
let exprs = eq_properties
239+
.oeq_class()
240+
.output_ordering()
241+
.unwrap_or_default();
242+
243+
let context = SessionContext::new().task_ctx();
244+
let mem_reservation =
245+
MemoryConsumer::new("test".to_string()).register(context.memory_pool());
246+
247+
// Internally SortPreservingMergeExec uses this function for merging.
248+
let res = streaming_merge(
249+
streams,
250+
schema,
251+
&exprs,
252+
BaselineMetrics::new(&ExecutionPlanMetricsSet::new(), 0),
253+
1,
254+
None,
255+
mem_reservation,
256+
)?;
257+
let res = collect(res).await?;
258+
// Contains the merged result.
259+
let res = concat_batches(&res[0].schema(), &res)?;
260+
261+
for ordering in eq_properties.oeq_class().iter() {
262+
let err_msg = format!("error in eq properties: {:?}", eq_properties);
263+
let sort_solumns = ordering
264+
.iter()
265+
.map(|sort_expr| sort_expr.evaluate_to_sort_column(&res))
266+
.collect::<Result<Vec<_>>>()?;
267+
let orig_columns = sort_solumns
268+
.iter()
269+
.map(|sort_column| sort_column.values.clone())
270+
.collect::<Vec<_>>();
271+
let sorted_columns = lexsort(&sort_solumns, None)?;
272+
273+
// Make sure after merging ordering is still valid.
274+
assert_eq!(orig_columns.len(), sorted_columns.len(), "{}", err_msg);
275+
assert!(
276+
izip!(orig_columns.into_iter(), sorted_columns.into_iter())
277+
.all(|(lhs, rhs)| { lhs == rhs }),
278+
"{}",
279+
err_msg
280+
)
281+
}
282+
}
283+
Ok(())
284+
}
285+
36286
#[tokio::test(flavor = "multi_thread", worker_threads = 8)]
37287
async fn sort_preserving_repartition_test() {
38288
let seed_start = 0;

datafusion/physical-expr/src/equivalence.rs

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ impl EquivalenceGroup {
229229
}
230230

231231
/// Returns an iterator over the equivalence classes in this group.
232-
fn iter(&self) -> impl Iterator<Item = &EquivalenceClass> {
232+
pub fn iter(&self) -> impl Iterator<Item = &EquivalenceClass> {
233233
self.classes.iter()
234234
}
235235

@@ -551,7 +551,7 @@ impl EquivalenceGroup {
551551

552552
/// This function constructs a duplicate-free `LexOrderingReq` by filtering out
553553
/// duplicate entries that have same physical expression inside. For example,
554-
/// `vec![a Some(Asc), a Some(Desc)]` collapses to `vec![a Some(Asc)]`.
554+
/// `vec![a Some(ASC), a Some(DESC)]` collapses to `vec![a Some(ASC)]`.
555555
pub fn collapse_lex_req(input: LexRequirement) -> LexRequirement {
556556
let mut output = Vec::<PhysicalSortRequirement>::new();
557557
for item in input {
@@ -562,6 +562,19 @@ pub fn collapse_lex_req(input: LexRequirement) -> LexRequirement {
562562
output
563563
}
564564

565+
/// This function constructs a duplicate-free `LexOrdering` by filtering out
566+
/// duplicate entries that have same physical expression inside. For example,
567+
/// `vec![a ASC, a DESC]` collapses to `vec![a ASC]`.
568+
pub fn collapse_lex_ordering(input: LexOrdering) -> LexOrdering {
569+
let mut output = Vec::<PhysicalSortExpr>::new();
570+
for item in input {
571+
if !output.iter().any(|req| req.expr.eq(&item.expr)) {
572+
output.push(item);
573+
}
574+
}
575+
output
576+
}
577+
565578
/// An `OrderingEquivalenceClass` object keeps track of different alternative
566579
/// orderings than can describe a schema. For example, consider the following table:
567580
///
@@ -667,10 +680,13 @@ impl OrderingEquivalenceClass {
667680
}
668681
}
669682

670-
/// Gets the first ordering entry in this ordering equivalence class.
671-
/// This is one of the many valid orderings (if there are multiple).
683+
/// Returns the concatenation of all the orderings. This enables merge
684+
/// operations to preserve all equivalent orderings simultaneously.
672685
pub fn output_ordering(&self) -> Option<LexOrdering> {
673-
self.orderings.first().cloned()
686+
let output_ordering =
687+
self.orderings.iter().flatten().cloned().collect::<Vec<_>>();
688+
let output_ordering = collapse_lex_ordering(output_ordering);
689+
(!output_ordering.is_empty()).then_some(output_ordering)
674690
}
675691

676692
// Append orderings in `other` to all existing orderings in this equivalence
@@ -825,6 +841,11 @@ impl EquivalenceProperties {
825841
&self.eq_group
826842
}
827843

844+
/// Returns a reference to the constant expressions
845+
pub fn constants(&self) -> &[Arc<dyn PhysicalExpr>] {
846+
&self.constants
847+
}
848+
828849
/// Returns the normalized version of the ordering equivalence class within.
829850
/// Normalization removes constants and duplicates as well as standardizing
830851
/// expressions according to the equivalence group within.

datafusion/physical-plan/src/repartition/mod.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -472,9 +472,6 @@ impl ExecutionPlan for RepartitionExec {
472472
if !self.maintains_input_order()[0] {
473473
result.clear_orderings();
474474
}
475-
if self.preserve_order {
476-
result = result.with_reorder(self.sort_exprs().unwrap_or_default().to_vec())
477-
}
478475
result
479476
}
480477

datafusion/physical-plan/src/sorts/sort_preserving_merge.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,7 @@ impl ExecutionPlan for SortPreservingMergeExec {
174174
}
175175

176176
fn equivalence_properties(&self) -> EquivalenceProperties {
177-
let output_oeq = self.input.equivalence_properties();
178-
output_oeq.with_reorder(self.expr.to_vec())
177+
self.input.equivalence_properties()
179178
}
180179

181180
fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {

0 commit comments

Comments
 (0)