diff --git a/Cargo.lock b/Cargo.lock index 7c6109046..5059afaac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -605,9 +605,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.90" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90d59d9acd2a682b4e40605a242f6670eaa58c5957471cbf85e8aa6a0b97a5e8" +checksum = "86d3488e7665a7a483b57e25bdd90d0aeb2bc7608c8d0346acf2ad3f1caf1d62" dependencies = [ "cc", "cxxbridge-flags", @@ -617,9 +617,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.90" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebfa40bda659dd5c864e65f4c9a2b0aff19bea56b017b9b77c73d3766a453a38" +checksum = "48fcaf066a053a41a81dfb14d57d99738b767febb8b735c3016e469fac5da690" dependencies = [ "cc", "codespan-reporting", @@ -632,15 +632,15 @@ dependencies = [ [[package]] name = "cxxbridge-flags" -version = "1.0.90" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "457ce6757c5c70dc6ecdbda6925b958aae7f959bda7d8fb9bde889e34a09dc03" +checksum = "a2ef98b8b717a829ca5603af80e1f9e2e48013ab227b68ef37872ef84ee479bf" [[package]] name = "cxxbridge-macro" -version = "1.0.90" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebf883b7aacd7b2aeb2a7b338648ee19f57c140d4ee8e52c68979c6b2f7f2263" +checksum = "086c685979a698443656e5cf7856c95c642295a38599f12fb1ff76fb28d19892" dependencies = [ "proc-macro2", "quote", diff --git a/datafusion/__init__.py b/datafusion/__init__.py index 4667835e2..b6cd5178a 100644 --- a/datafusion/__init__.py +++ b/datafusion/__init__.py @@ -36,6 +36,7 @@ ) from .common import ( + DFField, DFSchema, ) @@ -61,6 +62,7 @@ "TableScan", "Projection", "DFSchema", + "DFField", ] diff --git a/datafusion/tests/test_imports.py b/datafusion/tests/test_imports.py index 7bdbd8371..e5d958537 100644 --- a/datafusion/tests/test_imports.py +++ b/datafusion/tests/test_imports.py @@ -27,6 +27,7 @@ ) from datafusion.common import ( + DFField, DFSchema, ) @@ -57,7 +58,7 @@ def test_class_module_is_datafusion(): for klass in [Expr, Projection, TableScan]: assert klass.__module__ == "datafusion.expr" - for klass in [DFSchema]: + for klass in [DFField, DFSchema]: assert klass.__module__ == "datafusion.common" diff --git a/src/common.rs b/src/common.rs index ba4438efd..8a8e2adf5 100644 --- a/src/common.rs +++ b/src/common.rs @@ -24,5 +24,10 @@ pub mod df_schema; /// Initializes the `common` module to match the pattern of `datafusion-common` https://docs.rs/datafusion-common/18.0.0/datafusion_common/index.html pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 8ada1c756..e07805c52 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -31,7 +31,7 @@ use crate::errors::py_datafusion_err; /// to map those types and provide a simple place for developers /// to map types from one system to another. #[derive(Debug, Clone)] -#[pyclass(name = "DataTypeMap", module = "datafusion", subclass)] +#[pyclass(name = "DataTypeMap", module = "datafusion.common", subclass)] pub struct DataTypeMap { #[allow(dead_code)] arrow_type: PyDataType, @@ -419,7 +419,7 @@ impl DataTypeMap { /// Since `DataType` exists in another package we cannot make that happen here so we wrap /// `DataType` as `PyDataType` This exists solely to satisfy those constraints. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass(name = "DataType", module = "datafusion")] +#[pyclass(name = "DataType", module = "datafusion.common")] pub struct PyDataType { data_type: DataType, } @@ -438,7 +438,7 @@ impl From for PyDataType { /// Represents the possible Python types that can be mapped to the SQL types #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass(name = "PythonType", module = "datafusion")] +#[pyclass(name = "PythonType", module = "datafusion.common")] pub enum PythonType { Array, Bool, @@ -458,7 +458,7 @@ pub enum PythonType { #[allow(non_camel_case_types)] #[allow(clippy::upper_case_acronyms)] #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass(name = "SqlType", module = "datafusion")] +#[pyclass(name = "SqlType", module = "datafusion.common")] pub enum SqlType { ANY, ARRAY, diff --git a/src/common/df_field.rs b/src/common/df_field.rs index 098df9bda..fa65e0495 100644 --- a/src/common/df_field.rs +++ b/src/common/df_field.rs @@ -15,27 +15,92 @@ // specific language governing permissions and limitations // under the License. -use datafusion::arrow::datatypes::Field; +use datafusion::arrow::datatypes::DataType; +use datafusion_common::DFField; use pyo3::prelude::*; -use crate::common::data_type::DataTypeMap; +use super::data_type::PyDataType; /// PyDFField wraps an arrow-datafusion `DFField` struct type /// and also supplies convenience methods for interacting /// with the `DFField` instance in the context of Python -#[pyclass(name = "DFField", module = "datafusion", subclass)] +#[pyclass(name = "DFField", module = "datafusion.common", subclass)] #[derive(Debug, Clone)] pub struct PyDFField { - /// Optional qualifier (usually a table or relation name) - #[allow(dead_code)] - qualifier: Option, - #[allow(dead_code)] - name: String, - #[allow(dead_code)] - data_type: DataTypeMap, - /// Arrow field definition - #[allow(dead_code)] - field: Field, - #[allow(dead_code)] - index: usize, + field: DFField, +} + +impl From for DFField { + fn from(py_field: PyDFField) -> DFField { + py_field.field + } +} + +impl From for PyDFField { + fn from(field: DFField) -> PyDFField { + PyDFField { field } + } +} + +#[pymethods] +impl PyDFField { + #[new] + #[pyo3(signature = (qualifier=None, name="", data_type=DataType::Int64.into(), nullable=false))] + fn new(qualifier: Option<&str>, name: &str, data_type: PyDataType, nullable: bool) -> Self { + PyDFField { + field: DFField::new(qualifier, name, data_type.into(), nullable), + } + } + + // TODO: Need bindings for Array `Field` first + // #[staticmethod] + // #[pyo3(name = "from")] + // fn py_from(field: Field) -> Self {} + + // TODO: Need bindings for Array `Field` first + // #[staticmethod] + // #[pyo3(name = "from_qualified")] + // fn py_from_qualified(field: Field) -> Self {} + + #[pyo3(name = "name")] + fn py_name(&self) -> PyResult { + Ok(self.field.name().clone()) + } + + #[pyo3(name = "data_type")] + fn py_data_type(&self) -> PyResult { + Ok(self.field.data_type().clone().into()) + } + + #[pyo3(name = "is_nullable")] + fn py_is_nullable(&self) -> PyResult { + Ok(self.field.is_nullable()) + } + + #[pyo3(name = "qualified_name")] + fn py_qualified_name(&self) -> PyResult { + Ok(self.field.qualified_name()) + } + + // TODO: Need bindings for `Column` first + // #[pyo3(name = "qualified_column")] + // fn py_qualified_column(&self) -> PyResult {} + + // TODO: Need bindings for `Column` first + // #[pyo3(name = "unqualified_column")] + // fn py_unqualified_column(&self) -> PyResult {} + + #[pyo3(name = "qualifier")] + fn py_qualifier(&self) -> PyResult> { + Ok(self.field.qualifier()) + } + + // TODO: Need bindings for Arrow `Field` first + // #[pyo3(name = "field")] + // fn py_field(&self) -> PyResult {} + + #[pyo3(name = "strip_qualifier")] + fn py_strip_qualifier(&self) -> PyResult { + Ok(self.field.clone().strip_qualifier().into()) + } }