From 369a60431427b19524154682da2e23f964f4a871 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 23 Feb 2023 10:38:39 -0500 Subject: [PATCH 1/2] Add bindings for GetIndexedField --- Cargo.lock | 39 +++++++++--------- datafusion/__init__.py | 2 + datafusion/tests/test_imports.py | 2 + src/expr.rs | 1 + src/expr/empty_relation.rs | 4 +- src/expr/indexed_field.rs | 69 ++++++++++++++++++++++++++++++++ 6 files changed, 95 insertions(+), 22 deletions(-) create mode 100644 src/expr/indexed_field.rs diff --git a/Cargo.lock b/Cargo.lock index 04a2ea8d2..9e337e207 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -844,9 +844,9 @@ dependencies = [ "datafusion", "itertools", "object_store", - "prost 0.11.6", + "prost 0.11.7", "prost-build 0.9.0", - "prost-types 0.11.6", + "prost-types 0.11.7", "substrait", "tokio", ] @@ -1813,12 +1813,12 @@ dependencies = [ [[package]] name = "prost" -version = "0.11.6" +version = "0.11.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21dc42e00223fc37204bd4aa177e69420c604ca4a183209a8f9de30c6d934698" +checksum = "3933d3ac2717077b3d5f42b40f59edfb1fb6a8c14e1c7de0f38075c4bac8e314" dependencies = [ "bytes", - "prost-derive 0.11.6", + "prost-derive 0.11.7", ] [[package]] @@ -1843,9 +1843,9 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.11.6" +version = "0.11.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f8ad728fb08fe212df3c05169e940fbb6d9d16a877ddde14644a983ba2012e" +checksum = "a24be1d23b4552a012093e1b93697b73d644ae9590e3253d878d0e77d411b614" dependencies = [ "bytes", "heck 0.4.1", @@ -1854,8 +1854,8 @@ dependencies = [ "log", "multimap", "petgraph", - "prost 0.11.6", - "prost-types 0.11.6", + "prost 0.11.7", + "prost-types 0.11.7", "regex", "tempfile", "which", @@ -1876,9 +1876,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.11.6" +version = "0.11.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bda8c0881ea9f722eb9629376db3d0b903b462477c1aafcb0566610ac28ac5d" +checksum = "8e9935362e8369bc3acd874caeeae814295c504c2bdbcde5c024089cf8b4dc12" dependencies = [ "anyhow", "itertools", @@ -1899,12 +1899,11 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.11.6" +version = "0.11.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e0526209433e96d83d750dd81a99118edbc55739e7e61a46764fd2ad537788" +checksum = "7de56acd5cc9642cac2a9518d4c8c53818905398fe42d33235859e0d542a7695" dependencies = [ - "bytes", - "prost 0.11.6", + "prost 0.11.7", ] [[package]] @@ -2469,9 +2468,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2feb96a6a106e21161551af32dc4e0fdab3aceb926b940d7e92a086b640fc7c" dependencies = [ "heck 0.4.1", - "prost 0.11.6", - "prost-build 0.11.6", - "prost-types 0.11.6", + "prost 0.11.7", + "prost-build 0.11.7", + "prost-types 0.11.7", "schemars", "serde", "serde_json", @@ -2488,9 +2487,9 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "syn" -version = "1.0.107" +version = "1.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +checksum = "d56e159d99e6c2b93995d171050271edb50ecc5288fbc7cc17de8fdce4e58c14" dependencies = [ "proc-macro2", "quote", diff --git a/datafusion/__init__.py b/datafusion/__init__.py index 01680e731..0bbbe5f12 100644 --- a/datafusion/__init__.py +++ b/datafusion/__init__.py @@ -53,6 +53,7 @@ ScalarVariable, Sort, TableScan, + GetIndexedField, ) __version__ = importlib_metadata.version(__name__) @@ -81,6 +82,7 @@ "SimilarTo", "ScalarVariable", "Alias", + "GetIndexedField", ] diff --git a/datafusion/tests/test_imports.py b/datafusion/tests/test_imports.py index 0e39fd597..5fc69814c 100644 --- a/datafusion/tests/test_imports.py +++ b/datafusion/tests/test_imports.py @@ -49,6 +49,7 @@ SimilarTo, ScalarVariable, Alias, + GetIndexedField, ) @@ -87,6 +88,7 @@ def test_class_module_is_datafusion(): SimilarTo, ScalarVariable, Alias, + GetIndexedField, ]: assert klass.__module__ == "datafusion.expr" diff --git a/src/expr.rs b/src/expr.rs index cdc81e7a4..52b31a71c 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -41,6 +41,7 @@ pub mod binary_expr; pub mod column; pub mod empty_relation; pub mod filter; +pub mod indexed_field; pub mod like; pub mod limit; pub mod literal; diff --git a/src/expr/empty_relation.rs b/src/expr/empty_relation.rs index f3008d1f7..8b2621da5 100644 --- a/src/expr/empty_relation.rs +++ b/src/expr/empty_relation.rs @@ -43,8 +43,8 @@ impl Display for PyEmptyRelation { write!( f, "Empty Relation - \nProduce One Row: {:?} - \nSchema: {:?}", + Produce One Row: {:?} + Schema: {:?}", &self.empty.produce_one_row, &self.empty.schema ) } diff --git a/src/expr/indexed_field.rs b/src/expr/indexed_field.rs new file mode 100644 index 000000000..c98607712 --- /dev/null +++ b/src/expr/indexed_field.rs @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::expr::PyExpr; +use datafusion_expr::expr::GetIndexedField; +use pyo3::prelude::*; +use std::fmt::{Display, Formatter}; + +use super::literal::PyLiteral; + +#[pyclass(name = "GetIndexedField", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyGetIndexedField { + indexed_field: GetIndexedField, +} + +impl From for GetIndexedField { + fn from(indexed_field: PyGetIndexedField) -> Self { + indexed_field.indexed_field + } +} + +impl From for PyGetIndexedField { + fn from(indexed_field: GetIndexedField) -> PyGetIndexedField { + PyGetIndexedField { indexed_field } + } +} + +impl Display for PyGetIndexedField { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + write!( + f, + "GetIndexedField + Expr: {:?} + Key: {:?}", + &self.indexed_field.expr, &self.indexed_field.key + ) + } +} + +#[pymethods] +impl PyGetIndexedField { + fn expr(&self) -> PyResult { + Ok((*self.indexed_field.expr).clone().into()) + } + + fn key(&self) -> PyResult { + Ok(self.indexed_field.key.clone().into()) + } + + /// Get a String representation of this column + fn __repr__(&self) -> String { + format!("{}", self) + } +} From 8b37339b5debfead41585d3df9cb33760becb30f Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 23 Feb 2023 11:29:41 -0500 Subject: [PATCH 2/2] Add pre-commit configuration file --- .pre-commit-config.yaml | 52 +++++++++++++++++++++++++++++++++++++++++ README.md | 8 +++++++ 2 files changed, 60 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..3c6805322 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +repos: + - repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + files: datafusion/.* + # Explicitly specify the pyproject.toml at the repo root, not per-project. + args: ["--config", "pyproject.toml", "--line-length", "79", "--diff", "--check", "."] + - repo: https://github.com/PyCQA/flake8 + rev: 5.0.4 + hooks: + - id: flake8 + files: datafusion/.*$ + types: [file] + types_or: [python] + additional_dependencies: ["flake8-force"] + - repo: local + hooks: + - id: rust-fmt + name: Rust fmt + description: Run cargo fmt on files included in the commit. rustfmt should be installed before-hand. + entry: cargo fmt --all -- + pass_filenames: true + types: [file, rust] + language: system + - id: rust-clippy + name: Rust clippy + description: Run cargo clippy on files included in the commit. clippy should be installed before-hand. + entry: cargo clippy --all-targets --all-features -- -Dclippy::all -Aclippy::redundant_closure + pass_filenames: false + types: [file, rust] + language: system + +default_language_version: + python: python3 diff --git a/README.md b/README.md index d83b78ce3..923b6be0c 100644 --- a/README.md +++ b/README.md @@ -183,6 +183,14 @@ maturin develop python -m pytest ``` +### Running & Installing pre-commit hooks + +arrow-datafusion-python takes advantage of (pre-commit)[https://pre-commit.com/] to assist developers in with code linting to help reduce the number of commits that ultimately fail in CI due to linter errors. Using the pre-commit hooks is optional for the developer but certainly helpful for keep PRs clean and concise. + +Our pre-commit hooks can be installed by running `pre-commit install` which will install the configurations in your ARROW_DATAFUSION_PYTHON_ROOT/.github directory and run each time you perform a commit failing to perform the commit if an offending lint is found giving you the opportunity to make changes locally before pushing. + +The pre-commit hooks can also be ran ad-hoc without installing them by simply running `pre-commit run --all-files` + ## How to update dependencies To change test dependencies, change the `requirements.in` and run