@@ -22,6 +22,7 @@ use std::{any::Any, convert::TryFrom};
2222use super :: ArrayDataRef ;
2323use super :: * ;
2424use crate :: array:: equal_json:: JsonEqual ;
25+ use crate :: buffer:: { Buffer , MutableBuffer } ;
2526use crate :: error:: Result ;
2627use crate :: ffi;
2728
@@ -326,6 +327,170 @@ pub fn new_empty_array(data_type: &DataType) -> ArrayRef {
326327 let data = ArrayData :: new_empty ( data_type) ;
327328 make_array ( Arc :: new ( data) )
328329}
330+ /// Creates a new array of `data_type` of length `length` filled entirely of `NULL` values
331+ pub fn new_null_array ( data_type : & DataType , length : usize ) -> ArrayRef {
332+ // context: https://github.com/apache/arrow/pull/9469#discussion_r574761687
333+ match data_type {
334+ DataType :: Null => Arc :: new ( NullArray :: new ( length) ) ,
335+ DataType :: Boolean => {
336+ let null_buf: Buffer = MutableBuffer :: new_null ( length) . into ( ) ;
337+ make_array ( Arc :: new ( ArrayData :: new (
338+ data_type. clone ( ) ,
339+ length,
340+ Some ( length) ,
341+ Some ( null_buf. clone ( ) ) ,
342+ 0 ,
343+ vec ! [ null_buf] ,
344+ vec ! [ ] ,
345+ ) ) )
346+ }
347+ DataType :: Int8 => new_null_sized_array :: < Int8Type > ( data_type, length) ,
348+ DataType :: UInt8 => new_null_sized_array :: < UInt8Type > ( data_type, length) ,
349+ DataType :: Int16 => new_null_sized_array :: < Int16Type > ( data_type, length) ,
350+ DataType :: UInt16 => new_null_sized_array :: < UInt16Type > ( data_type, length) ,
351+ DataType :: Float16 => unreachable ! ( ) ,
352+ DataType :: Int32 => new_null_sized_array :: < Int32Type > ( data_type, length) ,
353+ DataType :: UInt32 => new_null_sized_array :: < UInt32Type > ( data_type, length) ,
354+ DataType :: Float32 => new_null_sized_array :: < Float32Type > ( data_type, length) ,
355+ DataType :: Date32 => new_null_sized_array :: < Date32Type > ( data_type, length) ,
356+ // expanding this into Date23{unit}Type results in needless branching
357+ DataType :: Time32 ( _) => new_null_sized_array :: < Int32Type > ( data_type, length) ,
358+ DataType :: Int64 => new_null_sized_array :: < Int64Type > ( data_type, length) ,
359+ DataType :: UInt64 => new_null_sized_array :: < UInt64Type > ( data_type, length) ,
360+ DataType :: Float64 => new_null_sized_array :: < Float64Type > ( data_type, length) ,
361+ DataType :: Date64 => new_null_sized_array :: < Date64Type > ( data_type, length) ,
362+ // expanding this into Timestamp{unit}Type results in needless branching
363+ DataType :: Timestamp ( _, _) => new_null_sized_array :: < Int64Type > ( data_type, length) ,
364+ DataType :: Time64 ( _) => new_null_sized_array :: < Int64Type > ( data_type, length) ,
365+ DataType :: Duration ( _) => new_null_sized_array :: < Int64Type > ( data_type, length) ,
366+ DataType :: Interval ( unit) => match unit {
367+ IntervalUnit :: YearMonth => {
368+ new_null_sized_array :: < IntervalYearMonthType > ( data_type, length)
369+ }
370+ IntervalUnit :: DayTime => {
371+ new_null_sized_array :: < IntervalDayTimeType > ( data_type, length)
372+ }
373+ } ,
374+ DataType :: FixedSizeBinary ( value_len) => make_array ( Arc :: new ( ArrayData :: new (
375+ data_type. clone ( ) ,
376+ length,
377+ Some ( length) ,
378+ Some ( MutableBuffer :: new_null ( length) . into ( ) ) ,
379+ 0 ,
380+ vec ! [ Buffer :: from( vec![ 0u8 ; * value_len as usize * length] ) ] ,
381+ vec ! [ ] ,
382+ ) ) ) ,
383+ DataType :: Binary | DataType :: Utf8 => {
384+ new_null_binary_array :: < i32 > ( data_type, length)
385+ }
386+ DataType :: LargeBinary | DataType :: LargeUtf8 => {
387+ new_null_binary_array :: < i64 > ( data_type, length)
388+ }
389+ DataType :: List ( field) => {
390+ new_null_list_array :: < i32 > ( data_type, field. data_type ( ) , length)
391+ }
392+ DataType :: LargeList ( field) => {
393+ new_null_list_array :: < i64 > ( data_type, field. data_type ( ) , length)
394+ }
395+ DataType :: FixedSizeList ( field, value_len) => {
396+ make_array ( Arc :: new ( ArrayData :: new (
397+ data_type. clone ( ) ,
398+ length,
399+ Some ( length) ,
400+ Some ( MutableBuffer :: new_null ( length) . into ( ) ) ,
401+ 0 ,
402+ vec ! [ ] ,
403+ vec ! [
404+ new_null_array( field. data_type( ) , * value_len as usize * length)
405+ . data( ) ,
406+ ] ,
407+ ) ) )
408+ }
409+ DataType :: Struct ( fields) => make_array ( Arc :: new ( ArrayData :: new (
410+ data_type. clone ( ) ,
411+ length,
412+ Some ( length) ,
413+ Some ( MutableBuffer :: new_null ( length) . into ( ) ) ,
414+ 0 ,
415+ vec ! [ ] ,
416+ fields
417+ . iter ( )
418+ . map ( |field| Arc :: new ( ArrayData :: new_empty ( field. data_type ( ) ) ) )
419+ . collect ( ) ,
420+ ) ) ) ,
421+ DataType :: Union ( _) => {
422+ unimplemented ! ( "Creating null Union array not yet supported" )
423+ }
424+ DataType :: Dictionary ( _, value) => {
425+ make_array ( Arc :: new ( ArrayData :: new (
426+ data_type. clone ( ) ,
427+ length,
428+ Some ( length) ,
429+ Some ( MutableBuffer :: new_null ( length) . into ( ) ) ,
430+ 0 ,
431+ vec ! [ MutableBuffer :: new( 0 ) . into( ) ] , // values are empty
432+ vec ! [ new_empty_array( value. as_ref( ) ) . data( ) ] ,
433+ ) ) )
434+ }
435+ DataType :: Decimal ( _, _) => {
436+ unimplemented ! ( "Creating null Decimal array not yet supported" )
437+ }
438+ }
439+ }
440+
441+ #[ inline]
442+ fn new_null_list_array < OffsetSize : OffsetSizeTrait > (
443+ data_type : & DataType ,
444+ child_data_type : & DataType ,
445+ length : usize ,
446+ ) -> ArrayRef {
447+ make_array ( Arc :: new ( ArrayData :: new (
448+ data_type. clone ( ) ,
449+ length,
450+ Some ( length) ,
451+ Some ( MutableBuffer :: new_null ( length) . into ( ) ) ,
452+ 0 ,
453+ vec ! [ Buffer :: from(
454+ vec![ OffsetSize :: zero( ) ; length + 1 ] . to_byte_slice( ) ,
455+ ) ] ,
456+ vec ! [ Arc :: new( ArrayData :: new_empty( child_data_type) ) ] ,
457+ ) ) )
458+ }
459+
460+ #[ inline]
461+ fn new_null_binary_array < OffsetSize : OffsetSizeTrait > (
462+ data_type : & DataType ,
463+ length : usize ,
464+ ) -> ArrayRef {
465+ make_array ( Arc :: new ( ArrayData :: new (
466+ data_type. clone ( ) ,
467+ length,
468+ Some ( length) ,
469+ Some ( MutableBuffer :: new_null ( length) . into ( ) ) ,
470+ 0 ,
471+ vec ! [
472+ Buffer :: from( vec![ OffsetSize :: zero( ) ; length + 1 ] . to_byte_slice( ) ) ,
473+ MutableBuffer :: new( 0 ) . into( ) ,
474+ ] ,
475+ vec ! [ ] ,
476+ ) ) )
477+ }
478+
479+ #[ inline]
480+ fn new_null_sized_array < T : ArrowPrimitiveType > (
481+ data_type : & DataType ,
482+ length : usize ,
483+ ) -> ArrayRef {
484+ make_array ( Arc :: new ( ArrayData :: new (
485+ data_type. clone ( ) ,
486+ length,
487+ Some ( length) ,
488+ Some ( MutableBuffer :: new_null ( length) . into ( ) ) ,
489+ 0 ,
490+ vec ! [ Buffer :: from( vec![ 0u8 ; length * T :: get_byte_width( ) ] ) ] ,
491+ vec ! [ ] ,
492+ ) ) )
493+ }
329494
330495/// Creates a new array from two FFI pointers. Used to import arrays from the C Data Interface
331496/// # Safety
@@ -409,4 +574,60 @@ mod tests {
409574 assert_eq ! ( a. len( ) , 0 ) ;
410575 assert_eq ! ( a. value_offsets( ) [ 0 ] , 0i32 ) ;
411576 }
577+
578+ #[ test]
579+ fn test_null_boolean ( ) {
580+ let array = new_null_array ( & DataType :: Boolean , 9 ) ;
581+ let a = array. as_any ( ) . downcast_ref :: < BooleanArray > ( ) . unwrap ( ) ;
582+ assert_eq ! ( a. len( ) , 9 ) ;
583+ for i in 0 ..9 {
584+ assert ! ( a. is_null( i) ) ;
585+ }
586+ }
587+
588+ #[ test]
589+ fn test_null_primitive ( ) {
590+ let array = new_null_array ( & DataType :: Int32 , 9 ) ;
591+ let a = array. as_any ( ) . downcast_ref :: < Int32Array > ( ) . unwrap ( ) ;
592+ assert_eq ! ( a. len( ) , 9 ) ;
593+ for i in 0 ..9 {
594+ assert ! ( a. is_null( i) ) ;
595+ }
596+ }
597+
598+ #[ test]
599+ fn test_null_variable_sized ( ) {
600+ let array = new_null_array ( & DataType :: Utf8 , 9 ) ;
601+ let a = array. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) ;
602+ assert_eq ! ( a. len( ) , 9 ) ;
603+ assert_eq ! ( a. value_offsets( ) [ 9 ] , 0i32 ) ;
604+ for i in 0 ..9 {
605+ assert ! ( a. is_null( i) ) ;
606+ }
607+ }
608+
609+ #[ test]
610+ fn test_null_list_primitive ( ) {
611+ let data_type =
612+ DataType :: List ( Box :: new ( Field :: new ( "item" , DataType :: Int32 , true ) ) ) ;
613+ let array = new_null_array ( & data_type, 9 ) ;
614+ let a = array. as_any ( ) . downcast_ref :: < ListArray > ( ) . unwrap ( ) ;
615+ assert_eq ! ( a. len( ) , 9 ) ;
616+ assert_eq ! ( a. value_offsets( ) [ 9 ] , 0i32 ) ;
617+ for i in 0 ..9 {
618+ assert ! ( a. is_null( i) ) ;
619+ }
620+ }
621+
622+ #[ test]
623+ fn test_null_dictionary ( ) {
624+ let values = vec ! [ None , None , None , None , None , None , None , None , None ]
625+ as Vec < Option < & str > > ;
626+
627+ let array: DictionaryArray < Int8Type > = values. into_iter ( ) . collect ( ) ;
628+ let array = Arc :: new ( array) as ArrayRef ;
629+
630+ let null_array = new_null_array ( array. data_type ( ) , 9 ) ;
631+ assert_eq ! ( & array, & null_array) ;
632+ }
412633}
0 commit comments