@@ -74,6 +74,36 @@ pub fn make_byte_array_reader(
7474 }
7575}
7676
77+ /// Returns an [`ArrayReader`] that decodes the provided byte array column to view types.
78+ pub fn make_byte_view_array_reader (
79+ pages : Box < dyn PageIterator > ,
80+ column_desc : ColumnDescPtr ,
81+ arrow_type : Option < ArrowType > ,
82+ ) -> Result < Box < dyn ArrayReader > > {
83+ // Check if Arrow type is specified, else create it from Parquet type
84+ let data_type = match arrow_type {
85+ Some ( t) => t,
86+ None => match parquet_to_arrow_field ( column_desc. as_ref ( ) ) ?. data_type ( ) {
87+ ArrowType :: Utf8 | ArrowType :: Utf8View => ArrowType :: Utf8View ,
88+ _ => ArrowType :: BinaryView ,
89+ } ,
90+ } ;
91+
92+ match data_type {
93+ ArrowType :: BinaryView | ArrowType :: Utf8View => {
94+ let reader = GenericRecordReader :: new ( column_desc) ;
95+ Ok ( Box :: new ( ByteArrayReader :: < i32 > :: new (
96+ pages, data_type, reader,
97+ ) ) )
98+ }
99+
100+ _ => Err ( general_err ! (
101+ "invalid data type for byte array reader read to view type - {}" ,
102+ data_type
103+ ) ) ,
104+ }
105+ }
106+
77107/// An [`ArrayReader`] for variable length byte arrays
78108struct ByteArrayReader < I : OffsetSizeTrait > {
79109 data_type : ArrowType ,
@@ -588,7 +618,7 @@ mod tests {
588618 use super :: * ;
589619 use crate :: arrow:: array_reader:: test_util:: { byte_array_all_encodings, utf8_column} ;
590620 use crate :: arrow:: record_reader:: buffer:: ValuesBuffer ;
591- use arrow_array:: { Array , StringArray } ;
621+ use arrow_array:: { Array , StringArray , StringViewArray } ;
592622 use arrow_buffer:: Buffer ;
593623
594624 #[ test]
@@ -646,6 +676,64 @@ mod tests {
646676 }
647677 }
648678
679+ #[ test]
680+ fn test_byte_array_string_view_decoder ( ) {
681+ let ( pages, encoded_dictionary) =
682+ byte_array_all_encodings ( vec ! [ "hello" , "world" , "large payload over 12 bytes" , "b" ] ) ;
683+
684+ let column_desc = utf8_column ( ) ;
685+ let mut decoder = ByteArrayColumnValueDecoder :: new ( & column_desc) ;
686+
687+ decoder
688+ . set_dict ( encoded_dictionary, 4 , Encoding :: RLE_DICTIONARY , false )
689+ . unwrap ( ) ;
690+
691+ for ( encoding, page) in pages {
692+ let mut output = OffsetBuffer :: < i32 > :: default ( ) ;
693+ decoder. set_data ( encoding, page, 4 , Some ( 4 ) ) . unwrap ( ) ;
694+
695+ assert_eq ! ( decoder. read( & mut output, 1 ) . unwrap( ) , 1 ) ;
696+
697+ assert_eq ! ( output. values. as_slice( ) , "hello" . as_bytes( ) ) ;
698+ assert_eq ! ( output. offsets. as_slice( ) , & [ 0 , 5 ] ) ;
699+
700+ assert_eq ! ( decoder. read( & mut output, 1 ) . unwrap( ) , 1 ) ;
701+ assert_eq ! ( output. values. as_slice( ) , "helloworld" . as_bytes( ) ) ;
702+ assert_eq ! ( output. offsets. as_slice( ) , & [ 0 , 5 , 10 ] ) ;
703+
704+ assert_eq ! ( decoder. read( & mut output, 2 ) . unwrap( ) , 2 ) ;
705+ assert_eq ! (
706+ output. values. as_slice( ) ,
707+ "helloworldlarge payload over 12 bytesb" . as_bytes( )
708+ ) ;
709+ assert_eq ! ( output. offsets. as_slice( ) , & [ 0 , 5 , 10 , 37 , 38 ] ) ;
710+
711+ assert_eq ! ( decoder. read( & mut output, 4 ) . unwrap( ) , 0 ) ;
712+
713+ let valid = [ false , false , true , true , false , true , true , false , false ] ;
714+ let valid_buffer = Buffer :: from_iter ( valid. iter ( ) . cloned ( ) ) ;
715+
716+ output. pad_nulls ( 0 , 4 , valid. len ( ) , valid_buffer. as_slice ( ) ) ;
717+ let array = output. into_array ( Some ( valid_buffer) , ArrowType :: Utf8View ) ;
718+ let strings = array. as_any ( ) . downcast_ref :: < StringViewArray > ( ) . unwrap ( ) ;
719+
720+ assert_eq ! (
721+ strings. iter( ) . collect:: <Vec <_>>( ) ,
722+ vec![
723+ None ,
724+ None ,
725+ Some ( "hello" ) ,
726+ Some ( "world" ) ,
727+ None ,
728+ Some ( "large payload over 12 bytes" ) ,
729+ Some ( "b" ) ,
730+ None ,
731+ None ,
732+ ]
733+ ) ;
734+ }
735+ }
736+
649737 #[ test]
650738 fn test_byte_array_decoder_skip ( ) {
651739 let ( pages, encoded_dictionary) =
@@ -690,6 +778,60 @@ mod tests {
690778 }
691779 }
692780
781+ #[ test]
782+ fn test_byte_array_string_view_decoder_skip ( ) {
783+ let ( pages, encoded_dictionary) =
784+ byte_array_all_encodings ( vec ! [ "hello" , "world" , "a" , "large payload over 12 bytes" ] ) ;
785+
786+ let column_desc = utf8_column ( ) ;
787+ let mut decoder = ByteArrayColumnValueDecoder :: new ( & column_desc) ;
788+
789+ decoder
790+ . set_dict ( encoded_dictionary, 4 , Encoding :: RLE_DICTIONARY , false )
791+ . unwrap ( ) ;
792+
793+ for ( encoding, page) in pages {
794+ let mut output = OffsetBuffer :: < i32 > :: default ( ) ;
795+ decoder. set_data ( encoding, page, 4 , Some ( 4 ) ) . unwrap ( ) ;
796+
797+ assert_eq ! ( decoder. read( & mut output, 1 ) . unwrap( ) , 1 ) ;
798+
799+ assert_eq ! ( output. values. as_slice( ) , "hello" . as_bytes( ) ) ;
800+ assert_eq ! ( output. offsets. as_slice( ) , & [ 0 , 5 ] ) ;
801+
802+ assert_eq ! ( decoder. skip_values( 1 ) . unwrap( ) , 1 ) ;
803+ assert_eq ! ( decoder. skip_values( 1 ) . unwrap( ) , 1 ) ;
804+
805+ assert_eq ! ( decoder. read( & mut output, 1 ) . unwrap( ) , 1 ) ;
806+ assert_eq ! (
807+ output. values. as_slice( ) ,
808+ "hellolarge payload over 12 bytes" . as_bytes( )
809+ ) ;
810+ assert_eq ! ( output. offsets. as_slice( ) , & [ 0 , 5 , 32 ] ) ;
811+
812+ assert_eq ! ( decoder. read( & mut output, 4 ) . unwrap( ) , 0 ) ;
813+
814+ let valid = [ false , false , true , true , false , false ] ;
815+ let valid_buffer = Buffer :: from_iter ( valid. iter ( ) . cloned ( ) ) ;
816+
817+ output. pad_nulls ( 0 , 2 , valid. len ( ) , valid_buffer. as_slice ( ) ) ;
818+ let array = output. into_array ( Some ( valid_buffer) , ArrowType :: Utf8View ) ;
819+ let strings = array. as_any ( ) . downcast_ref :: < StringViewArray > ( ) . unwrap ( ) ;
820+
821+ assert_eq ! (
822+ strings. iter( ) . collect:: <Vec <_>>( ) ,
823+ vec![
824+ None ,
825+ None ,
826+ Some ( "hello" ) ,
827+ Some ( "large payload over 12 bytes" ) ,
828+ None ,
829+ None ,
830+ ]
831+ ) ;
832+ }
833+ }
834+
693835 #[ test]
694836 fn test_byte_array_decoder_nulls ( ) {
695837 let ( pages, encoded_dictionary) = byte_array_all_encodings ( Vec :: < & str > :: new ( ) ) ;
0 commit comments