From 3504818e69fe1779e046b267cb7c00cbc8659d40 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Sun, 15 Sep 2024 17:50:59 +0200 Subject: [PATCH] Make make_scalar_function() result candidate for inlining `make_scalar_function` serves as a template to implement functions, abstracting away processing of scalar values. While it's recommended to implement `ScalarUDFImpl` directly, a few functions still use the `make_scalar_function` helper, perhaps because it reduces code verbosity. While `make_scalar_function` is a template function, it's not eligible for effective inlining because of the `Arc` return type. This commit removes `Arc` and unlocks inlining. The change impact was verified manually using `cargo rustc -p datafusion-functions-nested -- --emit asm -C opt-level=2` and comparing the generated for `ArrayDistance::invoke` exemplary function that uses `make_scalar_function`. Only after the changes can `ArrayDistance::invoke` call the `array_distance_inner` directly. The change saves some small overhead on each invocation of a UDF, but doesn't improve per-row performance. --- datafusion/functions-nested/src/utils.rs | 12 +++++++----- datafusion/functions/src/utils.rs | 12 +++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/datafusion/functions-nested/src/utils.rs b/datafusion/functions-nested/src/utils.rs index 0765f6cd237d6..b9a75724bcdee 100644 --- a/datafusion/functions-nested/src/utils.rs +++ b/datafusion/functions-nested/src/utils.rs @@ -32,7 +32,7 @@ use datafusion_common::{exec_err, internal_err, plan_err, Result, ScalarValue}; use core::any::type_name; use datafusion_common::DataFusionError; -use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation}; +use datafusion_expr::ColumnarValue; macro_rules! downcast_arg { ($ARG:expr, $ARRAY_TYPE:ident) => {{ @@ -60,11 +60,13 @@ pub(crate) fn check_datatypes(name: &str, args: &[&ArrayRef]) -> Result<()> { } /// array function wrapper that differentiates between scalar (length 1) and array. -pub(crate) fn make_scalar_function(inner: F) -> ScalarFunctionImplementation +pub(crate) fn make_scalar_function( + inner: F, +) -> impl Fn(&[ColumnarValue]) -> Result where - F: Fn(&[ArrayRef]) -> Result + Sync + Send + 'static, + F: Fn(&[ArrayRef]) -> Result, { - Arc::new(move |args: &[ColumnarValue]| { + move |args: &[ColumnarValue]| { // first, identify if any of the arguments is an Array. If yes, store its `len`, // as any scalar will need to be converted to an array of len `len`. let len = args @@ -87,7 +89,7 @@ where } else { result.map(ColumnarValue::Array) } - }) + } } pub(crate) fn align_array_dimensions( diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index d36c5473ba01d..818b4c64bd20c 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -15,14 +15,12 @@ // specific language governing permissions and limitations // under the License. -use std::sync::Arc; - use arrow::array::ArrayRef; use arrow::datatypes::DataType; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::function::Hint; -use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation}; +use datafusion_expr::ColumnarValue; /// Creates a function to identify the optimal return type of a string function given /// the type of its first argument. @@ -80,11 +78,11 @@ get_optimal_return_type!(utf8_to_int_type, DataType::Int64, DataType::Int32); pub(super) fn make_scalar_function( inner: F, hints: Vec, -) -> ScalarFunctionImplementation +) -> impl Fn(&[ColumnarValue]) -> Result where - F: Fn(&[ArrayRef]) -> Result + Sync + Send + 'static, + F: Fn(&[ArrayRef]) -> Result, { - Arc::new(move |args: &[ColumnarValue]| { + move |args: &[ColumnarValue]| { // first, identify if any of the arguments is an Array. If yes, store its `len`, // as any scalar will need to be converted to an array of len `len`. let len = args @@ -119,7 +117,7 @@ where } else { result.map(ColumnarValue::Array) } - }) + } } #[cfg(test)]