Skip to content

Commit 1e1de82

Browse files
authored
fix subquery where exists distinct (#3732)
* fix subquery where exists distinct * optimize * fmt
1 parent de9c7c5 commit 1e1de82

File tree

2 files changed

+34
-8
lines changed

2 files changed

+34
-8
lines changed

datafusion/optimizer/src/decorrelate_where_exists.rs

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,10 @@
1616
// under the License.
1717

1818
use crate::utils::{
19-
exprs_to_join_cols, find_join_exprs, only_or_err, split_conjunction,
20-
verify_not_disjunction,
19+
exprs_to_join_cols, find_join_exprs, split_conjunction, verify_not_disjunction,
2120
};
2221
use crate::{utils, OptimizerConfig, OptimizerRule};
23-
use datafusion_common::{context, plan_err};
22+
use datafusion_common::{context, plan_err, DataFusionError};
2423
use datafusion_expr::logical_plan::{Filter, JoinType, Subquery};
2524
use datafusion_expr::{combine_filters, Expr, LogicalPlan, LogicalPlanBuilder};
2625
use std::sync::Arc;
@@ -134,11 +133,23 @@ fn optimize_exists(
134133
outer_input: &LogicalPlan,
135134
outer_other_exprs: &[Expr],
136135
) -> datafusion_common::Result<LogicalPlan> {
137-
let subqry_inputs = query_info.query.subquery.inputs();
138-
let subqry_input = only_or_err(subqry_inputs.as_slice())
139-
.map_err(|e| context!("single expression projection required", e))?;
140-
let subqry_filter = Filter::try_from_plan(subqry_input)
141-
.map_err(|e| context!("cannot optimize non-correlated subquery", e))?;
136+
let subqry_filter = match query_info.query.subquery.as_ref() {
137+
LogicalPlan::Distinct(subqry_distinct) => match subqry_distinct.input.as_ref() {
138+
LogicalPlan::Projection(subqry_proj) => {
139+
Filter::try_from_plan(&*subqry_proj.input)
140+
}
141+
_ => Err(DataFusionError::NotImplemented(
142+
"Subquery currently only supports distinct or projection".to_string(),
143+
)),
144+
},
145+
LogicalPlan::Projection(subqry_proj) => {
146+
Filter::try_from_plan(&*subqry_proj.input)
147+
}
148+
_ => Err(DataFusionError::NotImplemented(
149+
"Subquery currently only supports distinct or projection".to_string(),
150+
)),
151+
}
152+
.map_err(|e| context!("cannot optimize non-correlated subquery", e))?;
142153

143154
// split into filters
144155
let mut subqry_filter_exprs = vec![];

datafusion/optimizer/tests/integration-test.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,21 @@ fn anti_join_with_join_filter() -> Result<()> {
118118
Ok(())
119119
}
120120

121+
#[test]
122+
fn where_exists_distinct() -> Result<()> {
123+
// regression test for https://github.com/apache/arrow-datafusion/issues/3724
124+
let sql = "SELECT * FROM test WHERE EXISTS (\
125+
SELECT DISTINCT col_int32 FROM test t2 WHERE test.col_int32 = t2.col_int32)";
126+
let plan = test_sql(sql)?;
127+
let expected = r#"Projection: test.col_int32, test.col_uint32, test.col_utf8, test.col_date32, test.col_date64
128+
Semi Join: test.col_int32 = t2.col_int32
129+
TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64]
130+
SubqueryAlias: t2
131+
TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64]"#;
132+
assert_eq!(expected, format!("{:?}", plan));
133+
Ok(())
134+
}
135+
121136
#[test]
122137
fn intersect() -> Result<()> {
123138
let sql = "SELECT col_int32, col_utf8 FROM test \

0 commit comments

Comments
 (0)