Skip to content

Commit 8df8328

Browse files
rdmellowesm
authored andcommitted
PARQUET-1482: [C++] Adding basic unit test for DataPageV2 serialization and deserialization.
1 parent 9df3222 commit 8df8328

1 file changed

Lines changed: 46 additions & 0 deletions

File tree

cpp/src/parquet/file-deserialize-test.cc

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,21 @@ class TestPageSerde : public ::testing::Test {
9090
ASSERT_NO_THROW(serializer.Serialize(&page_header_, out_stream_.get()));
9191
}
9292

93+
void WriteDataPageHeaderV2(int max_serialized_len = 1024, int32_t uncompressed_size = 0,
94+
int32_t compressed_size = 0) {
95+
// Simplifying writing serialized data page V2 headers which may or may not
96+
// have meaningful data associated with them
97+
98+
// Serialize the Page header
99+
page_header_.__set_data_page_header_v2(data_page_header_v2_);
100+
page_header_.uncompressed_page_size = uncompressed_size;
101+
page_header_.compressed_page_size = compressed_size;
102+
page_header_.type = format::PageType::DATA_PAGE_V2;
103+
104+
ThriftSerializer serializer;
105+
ASSERT_NO_THROW(serializer.Serialize(&page_header_, out_stream_.get()));
106+
}
107+
93108
void ResetStream() { out_stream_.reset(new InMemoryOutputStream); }
94109

95110
void EndStream() { out_buffer_ = out_stream_->GetBuffer(); }
@@ -101,6 +116,7 @@ class TestPageSerde : public ::testing::Test {
101116
std::unique_ptr<PageReader> page_reader_;
102117
format::PageHeader page_header_;
103118
format::DataPageHeader data_page_header_;
119+
format::DataPageHeaderV2 data_page_header_v2_;
104120
};
105121

106122
void CheckDataPageHeader(const format::DataPageHeader expected, const Page* page) {
@@ -120,6 +136,24 @@ void CheckDataPageHeader(const format::DataPageHeader expected, const Page* page
120136
}
121137
}
122138

139+
// Overload for DataPageV2 tests.
140+
void CheckDataPageHeader(const format::DataPageHeaderV2 expected, const Page* page) {
141+
ASSERT_EQ(PageType::DATA_PAGE_V2, page->type());
142+
143+
const DataPageV2* data_page = static_cast<const DataPageV2*>(page);
144+
ASSERT_EQ(expected.num_values, data_page->num_values());
145+
ASSERT_EQ(expected.num_nulls, data_page->num_nulls());
146+
ASSERT_EQ(expected.num_rows, data_page->num_rows());
147+
ASSERT_EQ(expected.encoding, data_page->encoding());
148+
ASSERT_EQ(expected.definition_levels_byte_length,
149+
data_page->definition_levels_byte_length());
150+
ASSERT_EQ(expected.repetition_levels_byte_length,
151+
data_page->repetition_levels_byte_length());
152+
ASSERT_EQ(expected.is_compressed, data_page->is_compressed());
153+
154+
// TODO: Tests for DataPageHeaderV2 statistics.
155+
}
156+
123157
TEST_F(TestPageSerde, DataPage) {
124158
format::PageHeader out_page_header;
125159

@@ -134,6 +168,18 @@ TEST_F(TestPageSerde, DataPage) {
134168
ASSERT_NO_FATAL_FAILURE(CheckDataPageHeader(data_page_header_, current_page.get()));
135169
}
136170

171+
TEST_F(TestPageSerde, DataPageV2) {
172+
format::PageHeader out_page_header;
173+
174+
const int32_t num_rows = 4444;
175+
data_page_header_.num_values = num_rows;
176+
177+
ASSERT_NO_FATAL_FAILURE(WriteDataPageHeaderV2());
178+
InitSerializedPageReader(num_rows);
179+
std::shared_ptr<Page> current_page = page_reader_->NextPage();
180+
ASSERT_NO_FATAL_FAILURE(CheckDataPageHeader(data_page_header_v2_, current_page.get()));
181+
}
182+
137183
TEST_F(TestPageSerde, TestLargePageHeaders) {
138184
int stats_size = 256 * 1024; // 256 KB
139185
AddDummyStats(stats_size, data_page_header_);

0 commit comments

Comments
 (0)