Skip to content

Commit 2556f04

Browse files
committed
add allow null type coercion parameter
1 parent 1ce1ffd commit 2556f04

6 files changed

Lines changed: 78 additions & 36 deletions

File tree

datafusion/expr/src/built_in_function.rs

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -924,51 +924,63 @@ impl BuiltinScalarFunction {
924924
Signature::variadic_any(self.volatility())
925925
}
926926
BuiltinScalarFunction::ArrayAppend => {
927-
Signature::array_and_element(self.volatility())
927+
Signature::array_and_element(false, self.volatility())
928928
}
929929
BuiltinScalarFunction::MakeArray => {
930930
// 0 or more arguments of arbitrary type
931931
Signature::one_of(vec![VariadicEqual, Any(0)], self.volatility())
932932
}
933-
BuiltinScalarFunction::ArrayPopFront => Signature::array(self.volatility()),
934-
BuiltinScalarFunction::ArrayPopBack => Signature::array(self.volatility()),
933+
BuiltinScalarFunction::ArrayPopFront => {
934+
Signature::array(false, self.volatility())
935+
}
936+
BuiltinScalarFunction::ArrayPopBack => {
937+
Signature::array(false, self.volatility())
938+
}
935939
BuiltinScalarFunction::ArrayConcat => {
936940
Signature::variadic_any(self.volatility())
937941
}
938-
BuiltinScalarFunction::ArrayDims => Signature::array(self.volatility()),
939-
BuiltinScalarFunction::ArrayEmpty => Signature::array(self.volatility()),
942+
BuiltinScalarFunction::ArrayDims => {
943+
Signature::array(false, self.volatility())
944+
}
945+
BuiltinScalarFunction::ArrayEmpty => {
946+
Signature::array(false, self.volatility())
947+
}
940948
BuiltinScalarFunction::ArrayElement => {
941-
Signature::array_and_index(self.volatility())
949+
Signature::array_and_index(false, self.volatility())
942950
}
943951
BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()),
944-
BuiltinScalarFunction::Flatten => Signature::array(self.volatility()),
952+
BuiltinScalarFunction::Flatten => Signature::array(false, self.volatility()),
945953
BuiltinScalarFunction::ArrayHasAll | BuiltinScalarFunction::ArrayHasAny => {
946954
Signature::any(2, self.volatility())
947955
}
948956
BuiltinScalarFunction::ArrayHas => {
949-
Signature::array_and_element(self.volatility())
957+
Signature::array_and_element(false, self.volatility())
950958
}
951959
BuiltinScalarFunction::ArrayLength => {
952960
Signature::variadic_any(self.volatility())
953961
}
954-
BuiltinScalarFunction::ArrayNdims => Signature::array(self.volatility()),
955-
BuiltinScalarFunction::ArrayDistinct => Signature::array(self.volatility()),
962+
BuiltinScalarFunction::ArrayNdims => {
963+
Signature::array(false, self.volatility())
964+
}
965+
BuiltinScalarFunction::ArrayDistinct => {
966+
Signature::array(true, self.volatility())
967+
}
956968
BuiltinScalarFunction::ArrayPosition => {
957969
Signature::variadic_any(self.volatility())
958970
}
959971
BuiltinScalarFunction::ArrayPositions => {
960-
Signature::array_and_element(self.volatility())
972+
Signature::array_and_element(false, self.volatility())
961973
}
962974
BuiltinScalarFunction::ArrayPrepend => {
963-
Signature::element_and_array(self.volatility())
975+
Signature::element_and_array(false, self.volatility())
964976
}
965977
BuiltinScalarFunction::ArrayRepeat => Signature::any(2, self.volatility()),
966978
BuiltinScalarFunction::ArrayRemove => {
967-
Signature::array_and_element(self.volatility())
979+
Signature::array_and_element(false, self.volatility())
968980
}
969981
BuiltinScalarFunction::ArrayRemoveN => Signature::any(3, self.volatility()),
970982
BuiltinScalarFunction::ArrayRemoveAll => {
971-
Signature::array_and_element(self.volatility())
983+
Signature::array_and_element(false, self.volatility())
972984
}
973985
BuiltinScalarFunction::ArrayReplace => Signature::any(3, self.volatility()),
974986
BuiltinScalarFunction::ArrayReplaceN => Signature::any(4, self.volatility()),
@@ -985,7 +997,9 @@ impl BuiltinScalarFunction {
985997
}
986998
BuiltinScalarFunction::ArrayIntersect => Signature::any(2, self.volatility()),
987999
BuiltinScalarFunction::ArrayUnion => Signature::any(2, self.volatility()),
988-
BuiltinScalarFunction::Cardinality => Signature::array(self.volatility()),
1000+
BuiltinScalarFunction::Cardinality => {
1001+
Signature::array(false, self.volatility())
1002+
}
9891003
BuiltinScalarFunction::ArrayResize => {
9901004
Signature::variadic_any(self.volatility())
9911005
}

datafusion/expr/src/signature.rs

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ pub enum TypeSignature {
122122
/// is `OneOf(vec![Any(0), VariadicAny])`.
123123
OneOf(Vec<TypeSignature>),
124124
/// Specifies Signatures for array functions
125-
ArraySignature(ArrayFunctionSignature),
125+
/// Boolean value specifies whether null type coercion is allowed
126+
ArraySignature(ArrayFunctionSignature, bool),
126127
}
127128

128129
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -144,13 +145,19 @@ pub enum ArrayFunctionSignature {
144145
}
145146

146147
impl ArrayFunctionSignature {
148+
/// Arguments to ArrayFunctionSignature
149+
/// `current_types` - The data types of the arguments
150+
/// `coercion` - Whether null type coercion is allowed
151+
/// Returns the valid types for the function signature
147152
pub fn get_type_signature(
148153
&self,
149154
current_types: &[DataType],
155+
allow_null_coercion: bool,
150156
) -> Result<Vec<Vec<DataType>>> {
151157
fn array_append_or_prepend_valid_types(
152158
current_types: &[DataType],
153159
is_append: bool,
160+
allow_null_coercion: bool,
154161
) -> Result<Vec<Vec<DataType>>> {
155162
if current_types.len() != 2 {
156163
return Ok(vec![vec![]]);
@@ -163,7 +170,7 @@ impl ArrayFunctionSignature {
163170
};
164171

165172
// We follow Postgres on `array_append(Null, T)`, which is not valid.
166-
if array_type.eq(&DataType::Null) {
173+
if array_type.eq(&DataType::Null) && !allow_null_coercion {
167174
return Ok(vec![vec![]]);
168175
}
169176

@@ -215,8 +222,13 @@ impl ArrayFunctionSignature {
215222
_ => Ok(vec![vec![]]),
216223
}
217224
}
218-
fn array(current_types: &[DataType]) -> Result<Vec<Vec<DataType>>> {
219-
if current_types.len() != 1 {
225+
fn array(
226+
current_types: &[DataType],
227+
allow_null_coercion: bool,
228+
) -> Result<Vec<Vec<DataType>>> {
229+
if current_types.len() != 1
230+
|| (current_types[0].is_null() && !allow_null_coercion)
231+
{
220232
return Ok(vec![vec![]]);
221233
}
222234

@@ -229,7 +241,6 @@ impl ArrayFunctionSignature {
229241
let array_type = coerced_fixed_size_list_to_list(array_type);
230242
Ok(vec![vec![array_type]])
231243
}
232-
DataType::Null => Ok(vec![vec![array_type.to_owned()]]),
233244
_ => Ok(vec![vec![DataType::List(Arc::new(Field::new(
234245
"item",
235246
array_type.to_owned(),
@@ -239,13 +250,21 @@ impl ArrayFunctionSignature {
239250
}
240251
match self {
241252
ArrayFunctionSignature::ArrayAndElement => {
242-
array_append_or_prepend_valid_types(current_types, true)
253+
array_append_or_prepend_valid_types(
254+
current_types,
255+
true,
256+
allow_null_coercion,
257+
)
243258
}
244259
ArrayFunctionSignature::ElementAndArray => {
245-
array_append_or_prepend_valid_types(current_types, false)
260+
array_append_or_prepend_valid_types(
261+
current_types,
262+
false,
263+
allow_null_coercion,
264+
)
246265
}
247266
ArrayFunctionSignature::ArrayAndIndex => array_and_index(current_types),
248-
ArrayFunctionSignature::Array => array(current_types),
267+
ArrayFunctionSignature::Array => array(current_types, allow_null_coercion),
249268
}
250269
}
251270
}
@@ -297,7 +316,7 @@ impl TypeSignature {
297316
TypeSignature::OneOf(sigs) => {
298317
sigs.iter().flat_map(|s| s.to_string_repr()).collect()
299318
}
300-
TypeSignature::ArraySignature(array_signature) => {
319+
TypeSignature::ArraySignature(array_signature, _) => {
301320
vec![array_signature.to_string()]
302321
}
303322
}
@@ -402,36 +421,42 @@ impl Signature {
402421
}
403422
}
404423
/// Specialized Signature for ArrayAppend and similar functions
405-
pub fn array_and_element(volatility: Volatility) -> Self {
424+
pub fn array_and_element(allow_null_coercion: bool, volatility: Volatility) -> Self {
406425
Signature {
407426
type_signature: TypeSignature::ArraySignature(
408427
ArrayFunctionSignature::ArrayAndElement,
428+
allow_null_coercion,
409429
),
410430
volatility,
411431
}
412432
}
413433
/// Specialized Signature for ArrayPrepend and similar functions
414-
pub fn element_and_array(volatility: Volatility) -> Self {
434+
pub fn element_and_array(allow_null_coercion: bool, volatility: Volatility) -> Self {
415435
Signature {
416436
type_signature: TypeSignature::ArraySignature(
417437
ArrayFunctionSignature::ElementAndArray,
438+
allow_null_coercion,
418439
),
419440
volatility,
420441
}
421442
}
422443
/// Specialized Signature for ArrayElement and similar functions
423-
pub fn array_and_index(volatility: Volatility) -> Self {
444+
pub fn array_and_index(allow_null_coercion: bool, volatility: Volatility) -> Self {
424445
Signature {
425446
type_signature: TypeSignature::ArraySignature(
426447
ArrayFunctionSignature::ArrayAndIndex,
448+
allow_null_coercion,
427449
),
428450
volatility,
429451
}
430452
}
431453
/// Specialized Signature for ArrayEmpty and similar functions
432-
pub fn array(volatility: Volatility) -> Self {
454+
pub fn array(allow_null_coercion: bool, volatility: Volatility) -> Self {
433455
Signature {
434-
type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array),
456+
type_signature: TypeSignature::ArraySignature(
457+
ArrayFunctionSignature::Array,
458+
allow_null_coercion,
459+
),
435460
volatility,
436461
}
437462
}

datafusion/expr/src/type_coercion/binary.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
301301
.or_else(|| dictionary_coercion(lhs_type, rhs_type, true))
302302
.or_else(|| temporal_coercion(lhs_type, rhs_type))
303303
.or_else(|| string_coercion(lhs_type, rhs_type))
304-
.or_else(|| null_coercion(lhs_type, rhs_type))
304+
.or_else(|| allow_null_coercion(lhs_type, rhs_type))
305305
.or_else(|| string_numeric_coercion(lhs_type, rhs_type))
306306
.or_else(|| string_temporal_coercion(lhs_type, rhs_type))
307307
.or_else(|| binary_coercion(lhs_type, rhs_type))
@@ -756,7 +756,7 @@ pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataTyp
756756
string_coercion(lhs_type, rhs_type)
757757
.or_else(|| binary_to_string_coercion(lhs_type, rhs_type))
758758
.or_else(|| dictionary_coercion(lhs_type, rhs_type, false))
759-
.or_else(|| null_coercion(lhs_type, rhs_type))
759+
.or_else(|| allow_null_coercion(lhs_type, rhs_type))
760760
}
761761

762762
/// coercion rules for regular expression comparison operations.
@@ -844,7 +844,7 @@ fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataTyp
844844

845845
/// coercion rules from NULL type. Since NULL can be casted to any other type in arrow,
846846
/// either lhs or rhs is NULL, if NULL can be casted to type of the other side, the coercion is valid.
847-
fn null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
847+
fn allow_null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
848848
match (lhs_type, rhs_type) {
849849
(DataType::Null, other_type) | (other_type, DataType::Null) => {
850850
if can_cast_types(&DataType::Null, other_type) {

datafusion/expr/src/type_coercion/functions.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ fn get_valid_types(
110110
}
111111

112112
TypeSignature::Exact(valid_types) => vec![valid_types.clone()],
113-
TypeSignature::ArraySignature(ref function_signature) => {
114-
function_signature.get_type_signature(current_types)?
113+
TypeSignature::ArraySignature(ref function_signature, allow_null_coercion) => {
114+
function_signature.get_type_signature(current_types, *allow_null_coercion)?
115115
}
116116

117117
TypeSignature::Any(number) => {

datafusion/physical-expr/src/array_expressions.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2659,7 +2659,7 @@ pub fn array_distinct(args: &[ArrayRef]) -> Result<ArrayRef> {
26592659
}
26602660

26612661
// handle for list & largelist
2662-
match args[0].data_type() {
2662+
match dbg!(args[0].data_type()) {
26632663
DataType::List(field) => {
26642664
let array = as_list_array(&args[0])?;
26652665
general_array_distinct(array, field)

datafusion/sqllogictest/test_files/array.slt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4253,7 +4253,10 @@ NULL [3] [4]
42534253
# array_ndims scalar function #1
42544254

42554255
query error
4256-
selrct array_ndims(1), array_ndims(null)
4256+
select array_ndims(1)
4257+
4258+
query error
4259+
select array_ndims(null)
42574260

42584261
query I
42594262
select

0 commit comments

Comments
 (0)