From 0fdf75c9677671962986ab659bc717e2882b181c Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 10 May 2023 12:28:47 -0400 Subject: [PATCH 1/8] Make to_variant public --- src/sql/logical.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sql/logical.rs b/src/sql/logical.rs index 07a3f65b1..a48447bbb 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -56,7 +56,7 @@ impl PyLogicalPlan { #[pymethods] impl PyLogicalPlan { /// Return the specific logical operator - fn to_variant(&self, py: Python) -> PyResult { + pub fn to_variant(&self, py: Python) -> PyResult { Python::with_gil(|_| match self.plan.as_ref() { LogicalPlan::Aggregate(plan) => PyAggregate::from(plan.clone()).to_variant(py), LogicalPlan::Analyze(plan) => PyAnalyze::from(plan.clone()).to_variant(py), From d80c54c66aa39bd04c1aca1d8069031cf76286e8 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 10 May 2023 13:04:09 -0400 Subject: [PATCH 2/8] Add to_variant() coverage for Subquery and SubqueryAlias --- src/expr/subquery.rs | 44 ++++++++++++++++++++++++++++++++++++++++++++ src/sql/logical.rs | 4 ++++ 2 files changed, 48 insertions(+) diff --git a/src/expr/subquery.rs b/src/expr/subquery.rs index 93ff244f6..1d526c2c6 100644 --- a/src/expr/subquery.rs +++ b/src/expr/subquery.rs @@ -15,9 +15,15 @@ // specific language governing permissions and limitations // under the License. +use std::fmt::{Display, Formatter, self}; + use datafusion_expr::Subquery; use pyo3::prelude::*; +use crate::sql::logical::PyLogicalPlan; + +use super::logical_node::LogicalNode; + #[pyclass(name = "Subquery", module = "datafusion.expr", subclass)] #[derive(Clone)] pub struct PySubquery { @@ -35,3 +41,41 @@ impl From for PySubquery { PySubquery { subquery } } } + +impl Display for PySubquery { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "Subquery + Subquery: {:?} + outer_ref_columns: {:?}", + self.subquery.subquery, self.subquery.outer_ref_columns, + ) + } +} + +#[pymethods] +impl PySubquery { + /// Retrieves the input `LogicalPlan` to this `Projection` node + fn input(&self) -> PyResult> { + Ok(Self::inputs(self)) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("Subquery({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("Subquery".to_string()) + } +} + +impl LogicalNode for PySubquery { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant(&self, py: Python) -> PyResult { + Ok(self.clone().into_py(py)) + } +} diff --git a/src/sql/logical.rs b/src/sql/logical.rs index a48447bbb..2183155bf 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -28,6 +28,8 @@ use crate::expr::filter::PyFilter; use crate::expr::limit::PyLimit; use crate::expr::projection::PyProjection; use crate::expr::sort::PySort; +use crate::expr::subquery::PySubquery; +use crate::expr::subquery_alias::PySubqueryAlias; use crate::expr::table_scan::PyTableScan; use datafusion_expr::LogicalPlan; use pyo3::prelude::*; @@ -69,6 +71,8 @@ impl PyLogicalPlan { LogicalPlan::Projection(plan) => PyProjection::from(plan.clone()).to_variant(py), LogicalPlan::Sort(plan) => PySort::from(plan.clone()).to_variant(py), LogicalPlan::TableScan(plan) => PyTableScan::from(plan.clone()).to_variant(py), + LogicalPlan::Subquery(plan) => PySubquery::from(plan.clone()).to_variant(py), + LogicalPlan::SubqueryAlias(plan) => PySubqueryAlias::from(plan.clone()).to_variant(py), other => Err(py_unsupported_variant_err(format!( "Cannot convert this plan to a LogicalNode: {:?}", other From c511238469b7578686af57140f7aff4051dd6e02 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 10 May 2023 18:40:00 -0400 Subject: [PATCH 3/8] Add variant_name() function to Expr --- src/expr.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/expr.rs b/src/expr.rs index c002b3291..fa44e40c3 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -149,6 +149,12 @@ impl PyExpr { Ok(self.expr.canonical_name()) } + /// Returns the name of the Expr variant. + /// Ex: 'IsNotNull', 'Literal', 'BinaryExpr', etc + fn variant_name(&self) -> PyResult<&str> { + Ok(self.expr.variant_name()) + } + fn __richcmp__(&self, other: PyExpr, op: CompareOp) -> PyExpr { let expr = match op { CompareOp::Lt => self.expr.clone().lt(other.expr), From 727a510d268d539d9a1c8fef2a3613c45a77f212 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 11 May 2023 09:44:33 -0400 Subject: [PATCH 4/8] Add function for friendly display of arrow DataType to Python layer since DataType enum values cannot be directly accessed --- src/common/data_type.rs | 44 +++++++++++++++++++++++++++++++++++++++++ src/expr.rs | 8 ++++---- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 622e1aa46..3f372613c 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -292,6 +292,7 @@ impl DataTypeMap { } } + #[pymethods] impl DataTypeMap { #[new] @@ -503,6 +504,49 @@ impl DataTypeMap { )), } } + + /// Unfortunately PyO3 does not allow for us to expose the DataType as an enum since + /// we cannot directly annotae the Enum instance of dependency code. Therefore, here + /// we provide an enum to mimic it. + pub fn friendly_arrow_type_name(&self) -> PyResult<&str> { + Ok(match &self.arrow_type.data_type { + DataType::Null => "Null", + DataType::Boolean => "Boolean", + DataType::Int8 => "Int8", + DataType::Int16 => "Int16", + DataType::Int32 => "Int32", + DataType::Int64 => "Int64", + DataType::UInt8 => "UInt8", + DataType::UInt16 => "UInt16", + DataType::UInt32 => "UInt32", + DataType::UInt64 => "UInt64", + DataType::Float16 => "Float16", + DataType::Float32 => "Float32", + DataType::Float64 => "Float64", + DataType::Timestamp(_, _) => "Timestamp", + DataType::Date32 => "Date32", + DataType::Date64 => "Date64", + DataType::Time32(_) => "Time32", + DataType::Time64(_) => "Time64", + DataType::Duration(_) => "Duration", + DataType::Interval(_) => "Interval", + DataType::Binary => "Binary", + DataType::FixedSizeBinary(_) => "FixedSizeBinary", + DataType::LargeBinary => "LargeBinary", + DataType::Utf8 => "Utf8", + DataType::LargeUtf8 => "LargeUtf8", + DataType::List(_) => "List", + DataType::FixedSizeList(_, _) => "FixedSizeList", + DataType::LargeList(_) => "LargeList", + DataType::Struct(_) => "Struct", + DataType::Union(_, _) => "Union", + DataType::Dictionary(_, _) => "Dictionary", + DataType::Decimal128(_, _) => "Decimal128", + DataType::Decimal256(_, _) => "Decimal256", + DataType::Map(_, _) => "Map", + DataType::RunEndEncoded(_, _) => "RunEndEncoded" + }) + } } /// PyO3 requires that objects passed between Rust and Python implement the trait `PyClass` diff --git a/src/expr.rs b/src/expr.rs index fa44e40c3..bf968bc5d 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -323,10 +323,10 @@ impl PyExpr { ScalarValue::Time32Millisecond(v) => v.into_py(py), ScalarValue::Time64Microsecond(v) => v.into_py(py), ScalarValue::Time64Nanosecond(v) => v.into_py(py), - ScalarValue::TimestampSecond(_, _) => todo!(), - ScalarValue::TimestampMillisecond(_, _) => todo!(), - ScalarValue::TimestampMicrosecond(_, _) => todo!(), - ScalarValue::TimestampNanosecond(_, _) => todo!(), + ScalarValue::TimestampSecond(v, _) => v.into_py(py), + ScalarValue::TimestampMillisecond(v, _) => v.into_py(py), + ScalarValue::TimestampMicrosecond(v, _) => v.into_py(py), + ScalarValue::TimestampNanosecond(v, _) => v.into_py(py), ScalarValue::IntervalYearMonth(v) => v.into_py(py), ScalarValue::IntervalDayTime(v) => v.into_py(py), ScalarValue::IntervalMonthDayNano(v) => v.into_py(py), From 2efa639c51bd766a21313e916eaa2ed83fc681d5 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 11 May 2023 09:57:00 -0400 Subject: [PATCH 5/8] Change signature --- src/common/data_type.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 3f372613c..8b053f0ae 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -508,6 +508,7 @@ impl DataTypeMap { /// Unfortunately PyO3 does not allow for us to expose the DataType as an enum since /// we cannot directly annotae the Enum instance of dependency code. Therefore, here /// we provide an enum to mimic it. + #[pyo3(name = "friendly_arrow_type_name")] pub fn friendly_arrow_type_name(&self) -> PyResult<&str> { Ok(match &self.arrow_type.data_type { DataType::Null => "Null", From 0f5ecc528fd15a21fa4b040046ff2a05009b52a8 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 11 May 2023 13:41:35 -0400 Subject: [PATCH 6/8] Updated to include Decimal128 variant coverage --- src/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expr.rs b/src/expr.rs index bf968bc5d..4bf275939 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -302,7 +302,7 @@ impl PyExpr { ScalarValue::Boolean(v) => v.into_py(py), ScalarValue::Float32(v) => v.into_py(py), ScalarValue::Float64(v) => v.into_py(py), - ScalarValue::Decimal128(_, _, _) => todo!(), + ScalarValue::Decimal128(v, _, _) => v.into_py(py), ScalarValue::Int8(v) => v.into_py(py), ScalarValue::Int16(v) => v.into_py(py), ScalarValue::Int32(v) => v.into_py(py), From de42813a2d8689ac73ada6605b12636efc8b281b Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 11 May 2023 14:55:32 -0400 Subject: [PATCH 7/8] Add try_from to PyCreateView --- src/expr/create_view.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/expr/create_view.rs b/src/expr/create_view.rs index 9d06239ea..0f16334f7 100644 --- a/src/expr/create_view.rs +++ b/src/expr/create_view.rs @@ -17,10 +17,10 @@ use std::fmt::{self, Display, Formatter}; -use datafusion_expr::CreateView; +use datafusion_expr::{CreateView, DdlStatement, LogicalPlan}; use pyo3::prelude::*; -use crate::sql::logical::PyLogicalPlan; +use crate::{sql::logical::PyLogicalPlan, errors::py_type_err}; use super::logical_node::LogicalNode; @@ -92,3 +92,14 @@ impl LogicalNode for PyCreateView { Ok(self.clone().into_py(py)) } } + +impl TryFrom for PyCreateView { + type Error = PyErr; + + fn try_from(logical_plan: LogicalPlan) -> Result { + match logical_plan { + LogicalPlan::Ddl(DdlStatement::CreateView(create)) => Ok(PyCreateView { create }), + _ => Err(py_type_err("unexpected plan")), + } + } +} From ce83d8a837ac2bd8473cbe3432cebd3eb90a6e1c Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 14 Jun 2023 10:19:16 -0400 Subject: [PATCH 8/8] Cargo lint fixes --- src/common/data_type.rs | 3 +-- src/expr/create_view.rs | 2 +- src/expr/subquery.rs | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 8b053f0ae..85d2febb5 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -292,7 +292,6 @@ impl DataTypeMap { } } - #[pymethods] impl DataTypeMap { #[new] @@ -545,7 +544,7 @@ impl DataTypeMap { DataType::Decimal128(_, _) => "Decimal128", DataType::Decimal256(_, _) => "Decimal256", DataType::Map(_, _) => "Map", - DataType::RunEndEncoded(_, _) => "RunEndEncoded" + DataType::RunEndEncoded(_, _) => "RunEndEncoded", }) } } diff --git a/src/expr/create_view.rs b/src/expr/create_view.rs index 0f16334f7..febd723c5 100644 --- a/src/expr/create_view.rs +++ b/src/expr/create_view.rs @@ -20,7 +20,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion_expr::{CreateView, DdlStatement, LogicalPlan}; use pyo3::prelude::*; -use crate::{sql::logical::PyLogicalPlan, errors::py_type_err}; +use crate::{errors::py_type_err, sql::logical::PyLogicalPlan}; use super::logical_node::LogicalNode; diff --git a/src/expr/subquery.rs b/src/expr/subquery.rs index 1d526c2c6..f6f7b7fe5 100644 --- a/src/expr/subquery.rs +++ b/src/expr/subquery.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use std::fmt::{Display, Formatter, self}; +use std::fmt::{self, Display, Formatter}; use datafusion_expr::Subquery; use pyo3::prelude::*;