Skip to content

Commit 292865e

Browse files
authored
Create datafusion-functions-array crate and move ArrayToString function into it (#9113)
* Add `datafusion-functions-array` crate * Add test for round tripping array_to_string
1 parent 3c2b542 commit 292865e

25 files changed

Lines changed: 652 additions & 293 deletions

File tree

.github/workflows/rust.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ jobs:
8282
- name: Check function packages (encoding_expressions)
8383
run: cargo check --no-default-features --features=encoding_expressions -p datafusion
8484

85+
- name: Check function packages (array_expressions)
86+
run: cargo check --no-default-features --features=array_expressions -p datafusion
87+
8588
- name: Check Cargo.lock for datafusion-cli
8689
run: |
8790
# If this test fails, try running `cargo update` in the `datafusion-cli` directory

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
[workspace]
1919
exclude = ["datafusion-cli"]
20-
members = ["datafusion/common", "datafusion/core", "datafusion/expr", "datafusion/execution", "datafusion/functions", "datafusion/optimizer", "datafusion/physical-expr", "datafusion/physical-plan", "datafusion/proto", "datafusion/proto/gen", "datafusion/sql", "datafusion/sqllogictest", "datafusion/substrait", "datafusion/wasmtest", "datafusion-examples", "docs", "test-utils", "benchmarks",
20+
members = ["datafusion/common", "datafusion/core", "datafusion/expr", "datafusion/execution", "datafusion/functions", "datafusion/functions-array", "datafusion/optimizer", "datafusion/physical-expr", "datafusion/physical-plan", "datafusion/proto", "datafusion/proto/gen", "datafusion/sql", "datafusion/sqllogictest", "datafusion/substrait", "datafusion/wasmtest", "datafusion-examples", "docs", "test-utils", "benchmarks",
2121
]
2222
resolver = "2"
2323

@@ -51,6 +51,7 @@ datafusion-common = { path = "datafusion/common", version = "35.0.0" }
5151
datafusion-execution = { path = "datafusion/execution", version = "35.0.0" }
5252
datafusion-expr = { path = "datafusion/expr", version = "35.0.0" }
5353
datafusion-functions = { path = "datafusion/functions", version = "35.0.0" }
54+
datafusion-functions-array = { path = "datafusion/functions-array", version = "35.0.0" }
5455
datafusion-optimizer = { path = "datafusion/optimizer", version = "35.0.0" }
5556
datafusion-physical-expr = { path = "datafusion/physical-expr", version = "35.0.0" }
5657
datafusion-physical-plan = { path = "datafusion/physical-plan", version = "35.0.0" }

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ This crate has several [features] which can be specified in your `Cargo.toml`.
7575

7676
Default features:
7777

78+
- `array_expressions`: functions for working with arrays such as `array_to_string`
7879
- `compression`: reading files compressed with `xz2`, `bzip2`, `flate2`, and `zstd`
7980
- `crypto_expressions`: cryptographic functions such as `md5` and `sha256`
8081
- `encoding_expressions`: `encode` and `decode` functions

datafusion-cli/Cargo.lock

Lines changed: 40 additions & 28 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/core/Cargo.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,12 @@ path = "src/lib.rs"
3838

3939
[features]
4040
# Used to enable the avro format
41+
array_expressions = ["datafusion-functions-array"]
4142
avro = ["apache-avro", "num-traits", "datafusion-common/avro"]
4243
backtrace = ["datafusion-common/backtrace"]
4344
compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression", "tokio-util"]
4445
crypto_expressions = ["datafusion-physical-expr/crypto_expressions", "datafusion-optimizer/crypto_expressions"]
45-
default = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions", "compression", "parquet"]
46+
default = ["array_expressions", "crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions", "compression", "parquet"]
4647
encoding_expressions = ["datafusion-functions/encoding_expressions"]
4748
# Used for testing ONLY: causes all values to hash to the same value (test for collisions)
4849
force_hash_collisions = []
@@ -68,7 +69,8 @@ dashmap = { workspace = true }
6869
datafusion-common = { path = "../common", version = "35.0.0", features = ["object_store"], default-features = false }
6970
datafusion-execution = { workspace = true }
7071
datafusion-expr = { workspace = true }
71-
datafusion-functions = { path = "../functions", version = "35.0.0" }
72+
datafusion-functions = { workspace = true }
73+
datafusion-functions-array = { workspace = true, optional = true }
7274
datafusion-optimizer = { path = "../optimizer", version = "35.0.0", default-features = false }
7375
datafusion-physical-expr = { path = "../physical-expr", version = "35.0.0", default-features = false }
7476
datafusion-physical-plan = { workspace = true }

datafusion/core/src/execution/context/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,6 +1348,11 @@ impl SessionState {
13481348
datafusion_functions::register_all(&mut new_self)
13491349
.expect("can not register built in functions");
13501350

1351+
// register crate of array expressions (if enabled)
1352+
#[cfg(feature = "array_expressions")]
1353+
datafusion_functions_array::register_all(&mut new_self)
1354+
.expect("can not register array expressions");
1355+
13511356
new_self
13521357
}
13531358
/// Returns new [`SessionState`] using the provided

datafusion/core/src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,12 @@ pub mod functions {
521521
pub use datafusion_functions::*;
522522
}
523523

524+
/// re-export of [`datafusion_functions_array`] crate, if "array_expressions" feature is enabled
525+
pub mod functions_array {
526+
#[cfg(feature = "array_expressions")]
527+
pub use datafusion_functions::*;
528+
}
529+
524530
#[cfg(test)]
525531
pub mod test;
526532
pub mod test_util;

datafusion/core/src/prelude.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ pub use datafusion_expr::{
3939
Expr,
4040
};
4141
pub use datafusion_functions::expr_fn::*;
42+
#[cfg(feature = "array_expressions")]
43+
pub use datafusion_functions_array::expr_fn::*;
4244

4345
pub use std::ops::Not;
4446
pub use std::ops::{Add, Div, Mul, Neg, Rem, Sub};

datafusion/core/tests/dataframe/dataframe_functions.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ use arrow::{
2020
array::{Int32Array, StringArray},
2121
record_batch::RecordBatch,
2222
};
23+
use arrow_array::types::Int32Type;
24+
use arrow_array::ListArray;
2325
use arrow_schema::SchemaRef;
2426
use std::sync::Arc;
2527

@@ -40,6 +42,7 @@ fn test_schema() -> SchemaRef {
4042
Arc::new(Schema::new(vec![
4143
Field::new("a", DataType::Utf8, false),
4244
Field::new("b", DataType::Int32, false),
45+
Field::new("l", DataType::new_list(DataType::Int32, true), true),
4346
]))
4447
}
4548

@@ -57,6 +60,12 @@ async fn create_test_table() -> Result<DataFrame> {
5760
"123AbcDef",
5861
])),
5962
Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
63+
Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
64+
Some(vec![Some(0), Some(1), Some(2)]),
65+
None,
66+
Some(vec![Some(3), None, Some(5)]),
67+
Some(vec![Some(6), Some(7)]),
68+
])),
6069
],
6170
)?;
6271

@@ -67,7 +76,7 @@ async fn create_test_table() -> Result<DataFrame> {
6776
ctx.table("test").await
6877
}
6978

70-
/// Excutes an expression on the test dataframe as a select.
79+
/// Executes an expression on the test dataframe as a select.
7180
/// Compares formatted output of a record batch with an expected
7281
/// vector of strings, using the assert_batch_eq! macro
7382
macro_rules! assert_fn_batches {
@@ -862,3 +871,22 @@ async fn test_fn_decode() -> Result<()> {
862871

863872
Ok(())
864873
}
874+
875+
#[tokio::test]
876+
async fn test_fn_array_to_string() -> Result<()> {
877+
let expr = array_to_string(col("l"), lit("***"));
878+
879+
let expected = [
880+
"+-------------------------------------+",
881+
"| array_to_string(test.l,Utf8(\"***\")) |",
882+
"+-------------------------------------+",
883+
"| 0***1***2 |",
884+
"| |",
885+
"| 3***5 |",
886+
"| 6***7 |",
887+
"+-------------------------------------+",
888+
];
889+
assert_fn_batches!(expr, expected);
890+
891+
Ok(())
892+
}

datafusion/expr/src/built_in_function.rs

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,6 @@ pub enum BuiltinScalarFunction {
175175
ArrayReverse,
176176
/// array_slice
177177
ArraySlice,
178-
/// array_to_string
179-
ArrayToString,
180178
/// array_intersect
181179
ArrayIntersect,
182180
/// array_union
@@ -434,7 +432,6 @@ impl BuiltinScalarFunction {
434432
BuiltinScalarFunction::ArrayReverse => Volatility::Immutable,
435433
BuiltinScalarFunction::Flatten => Volatility::Immutable,
436434
BuiltinScalarFunction::ArraySlice => Volatility::Immutable,
437-
BuiltinScalarFunction::ArrayToString => Volatility::Immutable,
438435
BuiltinScalarFunction::ArrayIntersect => Volatility::Immutable,
439436
BuiltinScalarFunction::ArrayUnion => Volatility::Immutable,
440437
BuiltinScalarFunction::ArrayResize => Volatility::Immutable,
@@ -631,7 +628,6 @@ impl BuiltinScalarFunction {
631628
BuiltinScalarFunction::ArrayReverse => Ok(input_expr_types[0].clone()),
632629
BuiltinScalarFunction::ArraySlice => Ok(input_expr_types[0].clone()),
633630
BuiltinScalarFunction::ArrayResize => Ok(input_expr_types[0].clone()),
634-
BuiltinScalarFunction::ArrayToString => Ok(Utf8),
635631
BuiltinScalarFunction::ArrayIntersect => {
636632
match (input_expr_types[0].clone(), input_expr_types[1].clone()) {
637633
(DataType::Null, DataType::Null) | (DataType::Null, _) => {
@@ -991,9 +987,6 @@ impl BuiltinScalarFunction {
991987
Signature::variadic_any(self.volatility())
992988
}
993989

994-
BuiltinScalarFunction::ArrayToString => {
995-
Signature::variadic_any(self.volatility())
996-
}
997990
BuiltinScalarFunction::ArrayIntersect => Signature::any(2, self.volatility()),
998991
BuiltinScalarFunction::ArrayUnion => Signature::any(2, self.volatility()),
999992
BuiltinScalarFunction::Cardinality => Signature::any(1, self.volatility()),
@@ -1605,12 +1598,6 @@ impl BuiltinScalarFunction {
16051598
}
16061599
BuiltinScalarFunction::ArrayReverse => &["array_reverse", "list_reverse"],
16071600
BuiltinScalarFunction::ArraySlice => &["array_slice", "list_slice"],
1608-
BuiltinScalarFunction::ArrayToString => &[
1609-
"array_to_string",
1610-
"list_to_string",
1611-
"array_join",
1612-
"list_join",
1613-
],
16141601
BuiltinScalarFunction::ArrayUnion => &["array_union", "list_union"],
16151602
BuiltinScalarFunction::Cardinality => &["cardinality"],
16161603
BuiltinScalarFunction::ArrayResize => &["array_resize", "list_resize"],

0 commit comments

Comments
 (0)