Skip to content

Commit 5353c28

Browse files
mqynevi-me
authored andcommitted
ARROW-10832: [Rust] [Arrow] generate src/ipc/gen/* with latest snapshot flatc.
I managed to regenerate the code by snapshot flatc, because flatc 1.12.0 does not work. Both `cargo test` and `cargo doc` passed. Closes #8859 from mqy/master Authored-by: mqy <meng.qingyou@gmail.com> Signed-off-by: Neville Dipale <nevilledips@gmail.com>
1 parent 7a22ba9 commit 5353c28

10 files changed

Lines changed: 2399 additions & 858 deletions

File tree

rust/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ Cargo.lock
22
target
33
rusty-tags.vi
44
.history
5+
.flatbuffers/

rust/arrow/README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,10 @@ cargo run --example read_csv
6868

6969
## IPC
7070

71-
The IPC flatbuffer code was generated by running this command from the root of the project, using flatc version 1.10.0:
71+
The expected flatc version is 1.12.0+, built from [flatbuffers](https://github.com/google/flatbuffers)
72+
master at fixed commit ID, by regen.sh.
73+
74+
The IPC flatbuffer code was generated by running this command from the root of the project:
7275

7376
```bash
7477
./regen.sh

rust/arrow/regen.sh

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/bin/bash
1+
#!/bin/bash -e
22
# Licensed to the Apache Software Foundation (ASF) under one
33
# or more contributor license agreements. See the NOTICE file
44
# distributed with this work for additional information
@@ -21,8 +21,43 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
2121
# Change to the toplevel Rust directory
2222
pushd $DIR/../../
2323

24+
# As of 2020-12-06, the snapshot flatc version is not changed since "1.12.0",
25+
# so let's build flatc from source.
26+
27+
FB_URL="https://github.com/google/flatbuffers"
28+
FB_COMMIT="2046bffa40400904c926c2a5bedab67a8d6b7e08"
29+
FB_DIR="rust/arrow/.flatbuffers"
30+
FLATC="$FB_DIR/bazel-bin/flatc"
31+
32+
if [ ! -e "$FLATC" ]; then
33+
echo "$FLATC: not found, let's build it ..."
34+
35+
if [ -z $(which bazel) ]; then
36+
echo "bazel is required to build flatc"
37+
exit 1
38+
fi
39+
40+
echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')"
41+
42+
if [ ! -e $FB_DIR ]; then
43+
echo "git clone $FB_URL ..."
44+
git clone -b master --no-tag --depth 1 $FB_URL $FB_DIR
45+
else
46+
echo "git pull $FB_URL ..."
47+
git -C $FB_DIR pull
48+
fi
49+
50+
echo "hard reset to $FB_COMMIT"
51+
git -C $FB_DIR reset --hard $FB_COMMIT
52+
53+
pushd $FB_DIR
54+
echo "run: bazel build :flatc ..."
55+
bazel build :flatc
56+
popd
57+
fi
58+
2459
# Execute the code generation:
25-
flatc --rust -o rust/arrow/src/ipc/gen/ format/*.fbs
60+
$FLATC --rust -o rust/arrow/src/ipc/gen/ format/*.fbs
2661

2762
# Now the files are wrongly named so we have to change that.
2863
popd
@@ -63,6 +98,8 @@ SCHEMA_IMPORT="\nuse crate::ipc::gen::Schema::*;"
6398
SPARSE_TENSOR_IMPORT="\nuse crate::ipc::gen::SparseTensor::*;"
6499
TENSOR_IMPORT="\nuse crate::ipc::gen::Tensor::*;"
65100

101+
# For flatbuffer(1.12.0+), remove: use crate::${name}_generated::\*;
102+
names=("File" "Message" "Schema" "SparseTensor" "Tensor")
66103

67104
# Remove all generated lines we don't need
68105
for f in `ls *.rs`; do
@@ -86,6 +123,12 @@ for f in `ls *.rs`; do
86123
sed -i '' '/use std::mem;/d' $f
87124
sed -i '' '/use std::cmp::Ordering;/d' $f
88125

126+
# required by flatc 1.12.0+
127+
sed -i '' "/\#\!\[allow(unused_imports, dead_code)\]/d" $f
128+
for name in ${names[@]}; do
129+
sed -i '' "/use crate::${name}_generated::\*;/d" $f
130+
done
131+
89132
# Replace all occurrences of type__ with type_
90133
sed -i '' 's/type__/type_/g' $f
91134

@@ -106,4 +149,19 @@ done
106149

107150
# Return back to base directory
108151
popd
109-
cargo +stable fmt -- src/ipc/gen/*
152+
cargo +stable fmt -- src/ipc/gen/*
153+
154+
echo "=== TIPS ==="
155+
echo "Let's manually fix rustdoc of SparseTensorIndexCSF::indptrType:"
156+
echo 'prepend the tree with ```text, and append the tree with ```'
157+
cat <<TREE_EOF
158+
/// \`\`\`text
159+
/// 0 1
160+
/// / \ |
161+
/// 0 1 1
162+
/// / / \ |
163+
/// 0 0 1 1
164+
/// /| /| | /| |
165+
/// 1 2 0 2 0 0 1 2
166+
/// \`\`\`
167+
TREE_EOF

rust/arrow/src/ipc/convert.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,10 @@ pub(crate) fn get_data_type(field: ipc::Field, may_be_dictionary: bool) -> DataT
166166
(32, false) => DataType::UInt32,
167167
(64, true) => DataType::Int64,
168168
(64, false) => DataType::UInt64,
169-
_ => panic!("Unexpected bitwidth and signed"),
169+
z => panic!(
170+
"Int type with bit width of {} and signed of {} not supported",
171+
z.0, z.1
172+
),
170173
}
171174
}
172175
ipc::Type::Binary => DataType::Binary,
@@ -183,13 +186,15 @@ pub(crate) fn get_data_type(field: ipc::Field, may_be_dictionary: bool) -> DataT
183186
ipc::Precision::HALF => DataType::Float16,
184187
ipc::Precision::SINGLE => DataType::Float32,
185188
ipc::Precision::DOUBLE => DataType::Float64,
189+
z => panic!("FloatingPoint type with precision of {:?} not supported", z),
186190
}
187191
}
188192
ipc::Type::Date => {
189193
let date = field.type_as_date().unwrap();
190194
match date.unit() {
191195
ipc::DateUnit::DAY => DataType::Date32(DateUnit::Day),
192196
ipc::DateUnit::MILLISECOND => DataType::Date64(DateUnit::Millisecond),
197+
z => panic!("Date type with unit of {:?} not supported", z),
193198
}
194199
}
195200
ipc::Type::Time => {
@@ -224,6 +229,7 @@ pub(crate) fn get_data_type(field: ipc::Field, may_be_dictionary: bool) -> DataT
224229
ipc::TimeUnit::NANOSECOND => {
225230
DataType::Timestamp(TimeUnit::Nanosecond, timezone)
226231
}
232+
z => panic!("Timestamp type with unit of {:?} not supported", z),
227233
}
228234
}
229235
ipc::Type::Interval => {
@@ -233,6 +239,7 @@ pub(crate) fn get_data_type(field: ipc::Field, may_be_dictionary: bool) -> DataT
233239
DataType::Interval(IntervalUnit::YearMonth)
234240
}
235241
ipc::IntervalUnit::DAY_TIME => DataType::Interval(IntervalUnit::DayTime),
242+
z => panic!("Interval type with unit of {:?} unsupported", z),
236243
}
237244
}
238245
ipc::Type::Duration => {
@@ -242,6 +249,7 @@ pub(crate) fn get_data_type(field: ipc::Field, may_be_dictionary: bool) -> DataT
242249
ipc::TimeUnit::MILLISECOND => DataType::Duration(TimeUnit::Millisecond),
243250
ipc::TimeUnit::MICROSECOND => DataType::Duration(TimeUnit::Microsecond),
244251
ipc::TimeUnit::NANOSECOND => DataType::Duration(TimeUnit::Nanosecond),
252+
z => panic!("Duration type with unit of {:?} unsupported", z),
245253
}
246254
}
247255
ipc::Type::List => {

rust/arrow/src/ipc/gen/File.rs

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,23 @@ use std::{cmp::Ordering, mem};
2525

2626
// struct Block, aligned to 8
2727
#[repr(C, align(8))]
28-
#[derive(Clone, Copy, Debug, PartialEq)]
28+
#[derive(Clone, Copy, PartialEq)]
2929
pub struct Block {
3030
offset_: i64,
3131
metaDataLength_: i32,
3232
padding0__: u32,
3333
bodyLength_: i64,
3434
} // pub struct Block
35+
impl std::fmt::Debug for Block {
36+
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
37+
f.debug_struct("Block")
38+
.field("offset", &self.offset())
39+
.field("metaDataLength", &self.metaDataLength())
40+
.field("bodyLength", &self.bodyLength())
41+
.finish()
42+
}
43+
}
44+
3545
impl flatbuffers::SafeSliceAccess for Block {}
3646
impl<'a> flatbuffers::Follow<'a> for Block {
3747
type Inner = &'a Block;
@@ -52,7 +62,7 @@ impl<'b> flatbuffers::Push for Block {
5262
#[inline]
5363
fn push(&self, dst: &mut [u8], _rest: &[u8]) {
5464
let src = unsafe {
55-
std::slice::from_raw_parts(self as *const Block as *const u8, Self::size())
65+
::std::slice::from_raw_parts(self as *const Block as *const u8, Self::size())
5666
};
5767
dst.copy_from_slice(src);
5868
}
@@ -63,14 +73,14 @@ impl<'b> flatbuffers::Push for &'b Block {
6373
#[inline]
6474
fn push(&self, dst: &mut [u8], _rest: &[u8]) {
6575
let src = unsafe {
66-
std::slice::from_raw_parts(*self as *const Block as *const u8, Self::size())
76+
::std::slice::from_raw_parts(*self as *const Block as *const u8, Self::size())
6777
};
6878
dst.copy_from_slice(src);
6979
}
7080
}
7181

7282
impl Block {
73-
pub fn new<'a>(_offset: i64, _metaDataLength: i32, _bodyLength: i64) -> Self {
83+
pub fn new(_offset: i64, _metaDataLength: i32, _bodyLength: i64) -> Self {
7484
Block {
7585
offset_: _offset.to_little_endian(),
7686
metaDataLength_: _metaDataLength.to_little_endian(),
@@ -80,22 +90,22 @@ impl Block {
8090
}
8191
}
8292
/// Index to the start of the RecordBlock (note this is past the Message header)
83-
pub fn offset<'a>(&'a self) -> i64 {
93+
pub fn offset(&self) -> i64 {
8494
self.offset_.from_little_endian()
8595
}
8696
/// Length of the metadata
87-
pub fn metaDataLength<'a>(&'a self) -> i32 {
97+
pub fn metaDataLength(&self) -> i32 {
8898
self.metaDataLength_.from_little_endian()
8999
}
90100
/// Length of the data (this is aligned so there can be a gap between this and
91101
/// the metadata).
92-
pub fn bodyLength<'a>(&'a self) -> i64 {
102+
pub fn bodyLength(&self) -> i64 {
93103
self.bodyLength_.from_little_endian()
94104
}
95105
}
96106

97107
pub enum FooterOffset {}
98-
#[derive(Copy, Clone, Debug, PartialEq)]
108+
#[derive(Copy, Clone, PartialEq)]
99109

100110
/// ----------------------------------------------------------------------
101111
/// Arrow File metadata
@@ -109,7 +119,7 @@ impl<'a> flatbuffers::Follow<'a> for Footer<'a> {
109119
#[inline]
110120
fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
111121
Self {
112-
_tab: flatbuffers::Table { buf: buf, loc: loc },
122+
_tab: flatbuffers::Table { buf, loc },
113123
}
114124
}
115125
}
@@ -280,6 +290,17 @@ impl<'a: 'b, 'b> FooterBuilder<'a, 'b> {
280290
}
281291
}
282292

293+
impl std::fmt::Debug for Footer<'_> {
294+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
295+
let mut ds = f.debug_struct("Footer");
296+
ds.field("version", &self.version());
297+
ds.field("schema", &self.schema());
298+
ds.field("dictionaries", &self.dictionaries());
299+
ds.field("recordBatches", &self.recordBatches());
300+
ds.field("custom_metadata", &self.custom_metadata());
301+
ds.finish()
302+
}
303+
}
283304
#[inline]
284305
pub fn get_root_as_footer<'a>(buf: &'a [u8]) -> Footer<'a> {
285306
flatbuffers::get_root::<Footer<'a>>(buf)

0 commit comments

Comments
 (0)