Skip to content

Commit 19bdcdc

Browse files
authored
Refactor optimize projections rule, combines (eliminate, merge, pushdown projections) (#8340)
1 parent e21b031 commit 19bdcdc

16 files changed

Lines changed: 1011 additions & 819 deletions

File tree

datafusion/core/tests/sql/explain_analyze.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ async fn csv_explain_verbose_plans() {
560560
// Since the plan contains path that are environmentally
561561
// dependant(e.g. full path of the test file), only verify
562562
// important content
563-
assert_contains!(&actual, "logical_plan after push_down_projection");
563+
assert_contains!(&actual, "logical_plan after optimize_projections");
564564
assert_contains!(&actual, "physical_plan");
565565
assert_contains!(&actual, "FilterExec: c2@1 > 10");
566566
assert_contains!(actual, "ProjectionExec: expr=[c1@0 as c1]");

datafusion/expr/src/logical_plan/mod.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,11 @@ pub use ddl::{
3333
};
3434
pub use dml::{DmlStatement, WriteOp};
3535
pub use plan::{
36-
Aggregate, Analyze, CrossJoin, DescribeTable, Distinct, DistinctOn, EmptyRelation,
37-
Explain, Extension, Filter, Join, JoinConstraint, JoinType, Limit, LogicalPlan,
38-
Partitioning, PlanType, Prepare, Projection, Repartition, Sort, StringifiedPlan,
39-
Subquery, SubqueryAlias, TableScan, ToStringifiedPlan, Union, Unnest, Values, Window,
36+
projection_schema, Aggregate, Analyze, CrossJoin, DescribeTable, Distinct,
37+
DistinctOn, EmptyRelation, Explain, Extension, Filter, Join, JoinConstraint,
38+
JoinType, Limit, LogicalPlan, Partitioning, PlanType, Prepare, Projection,
39+
Repartition, Sort, StringifiedPlan, Subquery, SubqueryAlias, TableScan,
40+
ToStringifiedPlan, Union, Unnest, Values, Window,
4041
};
4142
pub use statement::{
4243
SetVariable, Statement, TransactionAccessMode, TransactionConclusion, TransactionEnd,

datafusion/expr/src/logical_plan/plan.rs

Lines changed: 49 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -551,15 +551,9 @@ impl LogicalPlan {
551551
Projection::try_new(projection.expr.to_vec(), Arc::new(inputs[0].clone()))
552552
.map(LogicalPlan::Projection)
553553
}
554-
LogicalPlan::Window(Window {
555-
window_expr,
556-
schema,
557-
..
558-
}) => Ok(LogicalPlan::Window(Window {
559-
input: Arc::new(inputs[0].clone()),
560-
window_expr: window_expr.to_vec(),
561-
schema: schema.clone(),
562-
})),
554+
LogicalPlan::Window(Window { window_expr, .. }) => Ok(LogicalPlan::Window(
555+
Window::try_new(window_expr.to_vec(), Arc::new(inputs[0].clone()))?,
556+
)),
563557
LogicalPlan::Aggregate(Aggregate {
564558
group_expr,
565559
aggr_expr,
@@ -837,10 +831,19 @@ impl LogicalPlan {
837831
LogicalPlan::Extension(e) => Ok(LogicalPlan::Extension(Extension {
838832
node: e.node.from_template(&expr, inputs),
839833
})),
840-
LogicalPlan::Union(Union { schema, .. }) => Ok(LogicalPlan::Union(Union {
841-
inputs: inputs.iter().cloned().map(Arc::new).collect(),
842-
schema: schema.clone(),
843-
})),
834+
LogicalPlan::Union(Union { schema, .. }) => {
835+
let input_schema = inputs[0].schema();
836+
// If inputs are not pruned do not change schema.
837+
let schema = if schema.fields().len() == input_schema.fields().len() {
838+
schema
839+
} else {
840+
input_schema
841+
};
842+
Ok(LogicalPlan::Union(Union {
843+
inputs: inputs.iter().cloned().map(Arc::new).collect(),
844+
schema: schema.clone(),
845+
}))
846+
}
844847
LogicalPlan::Distinct(distinct) => {
845848
let distinct = match distinct {
846849
Distinct::All(_) => Distinct::All(Arc::new(inputs[0].clone())),
@@ -1792,11 +1795,8 @@ pub struct Projection {
17921795
impl Projection {
17931796
/// Create a new Projection
17941797
pub fn try_new(expr: Vec<Expr>, input: Arc<LogicalPlan>) -> Result<Self> {
1795-
let schema = Arc::new(DFSchema::new_with_metadata(
1796-
exprlist_to_fields(&expr, &input)?,
1797-
input.schema().metadata().clone(),
1798-
)?);
1799-
Self::try_new_with_schema(expr, input, schema)
1798+
let projection_schema = projection_schema(&input, &expr)?;
1799+
Self::try_new_with_schema(expr, input, projection_schema)
18001800
}
18011801

18021802
/// Create a new Projection using the specified output schema
@@ -1808,11 +1808,6 @@ impl Projection {
18081808
if expr.len() != schema.fields().len() {
18091809
return plan_err!("Projection has mismatch between number of expressions ({}) and number of fields in schema ({})", expr.len(), schema.fields().len());
18101810
}
1811-
// Update functional dependencies of `input` according to projection
1812-
// expressions:
1813-
let id_key_groups = calc_func_dependencies_for_project(&expr, &input)?;
1814-
let schema = schema.as_ref().clone();
1815-
let schema = Arc::new(schema.with_functional_dependencies(id_key_groups));
18161811
Ok(Self {
18171812
expr,
18181813
input,
@@ -1836,6 +1831,29 @@ impl Projection {
18361831
}
18371832
}
18381833

1834+
/// Computes the schema of the result produced by applying a projection to the input logical plan.
1835+
///
1836+
/// # Arguments
1837+
///
1838+
/// * `input`: A reference to the input `LogicalPlan` for which the projection schema
1839+
/// will be computed.
1840+
/// * `exprs`: A slice of `Expr` expressions representing the projection operation to apply.
1841+
///
1842+
/// # Returns
1843+
///
1844+
/// A `Result` containing an `Arc<DFSchema>` representing the schema of the result
1845+
/// produced by the projection operation. If the schema computation is successful,
1846+
/// the `Result` will contain the schema; otherwise, it will contain an error.
1847+
pub fn projection_schema(input: &LogicalPlan, exprs: &[Expr]) -> Result<Arc<DFSchema>> {
1848+
let mut schema = DFSchema::new_with_metadata(
1849+
exprlist_to_fields(exprs, input)?,
1850+
input.schema().metadata().clone(),
1851+
)?;
1852+
schema = schema
1853+
.with_functional_dependencies(calc_func_dependencies_for_project(exprs, input)?);
1854+
Ok(Arc::new(schema))
1855+
}
1856+
18391857
/// Aliased subquery
18401858
#[derive(Clone, PartialEq, Eq, Hash)]
18411859
// mark non_exhaustive to encourage use of try_new/new()
@@ -1934,8 +1952,7 @@ impl Window {
19341952
/// Create a new window operator.
19351953
pub fn try_new(window_expr: Vec<Expr>, input: Arc<LogicalPlan>) -> Result<Self> {
19361954
let mut window_fields: Vec<DFField> = input.schema().fields().clone();
1937-
window_fields
1938-
.extend_from_slice(&exprlist_to_fields(window_expr.iter(), input.as_ref())?);
1955+
window_fields.extend_from_slice(&exprlist_to_fields(window_expr.iter(), &input)?);
19391956
let metadata = input.schema().metadata().clone();
19401957

19411958
// Update functional dependencies for window:
@@ -2357,6 +2374,13 @@ impl Aggregate {
23572374
schema,
23582375
})
23592376
}
2377+
2378+
/// Get the length of the group by expression in the output schema
2379+
/// This is not simply group by expression length. Expression may be
2380+
/// GroupingSet, etc. In these case we need to get inner expression lengths.
2381+
pub fn group_expr_len(&self) -> Result<usize> {
2382+
grouping_set_expr_count(&self.group_expr)
2383+
}
23602384
}
23612385

23622386
/// Checks whether any expression in `group_expr` contains `Expr::GroupingSet`.

datafusion/optimizer/src/eliminate_project.rs

Lines changed: 0 additions & 94 deletions
This file was deleted.

datafusion/optimizer/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ pub mod eliminate_limit;
2727
pub mod eliminate_nested_union;
2828
pub mod eliminate_one_union;
2929
pub mod eliminate_outer_join;
30-
pub mod eliminate_project;
3130
pub mod extract_equijoin_predicate;
3231
pub mod filter_null_join_keys;
3332
pub mod merge_projection;
33+
pub mod optimize_projections;
3434
pub mod optimizer;
3535
pub mod propagate_empty_relation;
3636
pub mod push_down_filter;

datafusion/optimizer/src/merge_projection.rs

Lines changed: 5 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -15,105 +15,9 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use std::collections::HashMap;
19-
20-
use crate::optimizer::ApplyOrder;
21-
use crate::push_down_filter::replace_cols_by_name;
22-
use crate::{OptimizerConfig, OptimizerRule};
23-
24-
use datafusion_common::Result;
25-
use datafusion_expr::{Expr, LogicalPlan, Projection};
26-
27-
/// Optimization rule that merge [LogicalPlan::Projection].
28-
#[derive(Default)]
29-
pub struct MergeProjection;
30-
31-
impl MergeProjection {
32-
#[allow(missing_docs)]
33-
pub fn new() -> Self {
34-
Self {}
35-
}
36-
}
37-
38-
impl OptimizerRule for MergeProjection {
39-
fn try_optimize(
40-
&self,
41-
plan: &LogicalPlan,
42-
_config: &dyn OptimizerConfig,
43-
) -> Result<Option<LogicalPlan>> {
44-
match plan {
45-
LogicalPlan::Projection(parent_projection) => {
46-
match parent_projection.input.as_ref() {
47-
LogicalPlan::Projection(child_projection) => {
48-
let new_plan =
49-
merge_projection(parent_projection, child_projection)?;
50-
Ok(Some(
51-
self.try_optimize(&new_plan, _config)?.unwrap_or(new_plan),
52-
))
53-
}
54-
_ => Ok(None),
55-
}
56-
}
57-
_ => Ok(None),
58-
}
59-
}
60-
61-
fn name(&self) -> &str {
62-
"merge_projection"
63-
}
64-
65-
fn apply_order(&self) -> Option<ApplyOrder> {
66-
Some(ApplyOrder::TopDown)
67-
}
68-
}
69-
70-
pub(super) fn merge_projection(
71-
parent_projection: &Projection,
72-
child_projection: &Projection,
73-
) -> Result<LogicalPlan> {
74-
let replace_map = collect_projection_expr(child_projection);
75-
let new_exprs = parent_projection
76-
.expr
77-
.iter()
78-
.map(|expr| replace_cols_by_name(expr.clone(), &replace_map))
79-
.enumerate()
80-
.map(|(i, e)| match e {
81-
Ok(e) => {
82-
let parent_expr = parent_projection.schema.fields()[i].qualified_name();
83-
e.alias_if_changed(parent_expr)
84-
}
85-
Err(e) => Err(e),
86-
})
87-
.collect::<Result<Vec<_>>>()?;
88-
// Use try_new, since schema changes with changing expressions.
89-
let new_plan = LogicalPlan::Projection(Projection::try_new(
90-
new_exprs,
91-
child_projection.input.clone(),
92-
)?);
93-
Ok(new_plan)
94-
}
95-
96-
pub fn collect_projection_expr(projection: &Projection) -> HashMap<String, Expr> {
97-
projection
98-
.schema
99-
.fields()
100-
.iter()
101-
.enumerate()
102-
.flat_map(|(i, field)| {
103-
// strip alias
104-
let expr = projection.expr[i].clone().unalias();
105-
// Convert both qualified and unqualified fields
106-
[
107-
(field.name().clone(), expr.clone()),
108-
(field.qualified_name(), expr),
109-
]
110-
})
111-
.collect::<HashMap<_, _>>()
112-
}
113-
11418
#[cfg(test)]
11519
mod tests {
116-
use crate::merge_projection::MergeProjection;
20+
use crate::optimize_projections::OptimizeProjections;
11721
use datafusion_common::Result;
11822
use datafusion_expr::{
11923
binary_expr, col, lit, logical_plan::builder::LogicalPlanBuilder, LogicalPlan,
@@ -124,7 +28,7 @@ mod tests {
12428
use crate::test::*;
12529

12630
fn assert_optimized_plan_equal(plan: &LogicalPlan, expected: &str) -> Result<()> {
127-
assert_optimized_plan_eq(Arc::new(MergeProjection::new()), plan, expected)
31+
assert_optimized_plan_eq(Arc::new(OptimizeProjections::new()), plan, expected)
12832
}
12933

13034
#[test]
@@ -136,7 +40,7 @@ mod tests {
13640
.build()?;
13741

13842
let expected = "Projection: Int32(1) + test.a\
139-
\n TableScan: test";
43+
\n TableScan: test projection=[a]";
14044
assert_optimized_plan_equal(&plan, expected)
14145
}
14246

@@ -150,7 +54,7 @@ mod tests {
15054
.build()?;
15155

15256
let expected = "Projection: Int32(1) + test.a\
153-
\n TableScan: test";
57+
\n TableScan: test projection=[a]";
15458
assert_optimized_plan_equal(&plan, expected)
15559
}
15660

@@ -163,7 +67,7 @@ mod tests {
16367
.build()?;
16468

16569
let expected = "Projection: test.a AS alias\
166-
\n TableScan: test";
70+
\n TableScan: test projection=[a]";
16771
assert_optimized_plan_equal(&plan, expected)
16872
}
16973
}

0 commit comments

Comments
 (0)