Skip to content

Commit df7726d

Browse files
committed
ARROW-88: [C++] Refactor usages of parquet_cpp namespace
I also removed an unneeded `Py_XDECREF` from ARROW-30; didn't want to create a separate patch for that. Author: Wes McKinney <wesm@apache.org> Closes #49 from wesm/ARROW-88 and squashes the following commits: c4d81dc [Wes McKinney] Refactor usages of parquet_cpp namespace
1 parent 80ec2c1 commit df7726d

5 files changed

Lines changed: 39 additions & 39 deletions

File tree

cpp/src/arrow/parquet/parquet-schema-test.cc

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,17 @@
2626

2727
#include "arrow/parquet/schema.h"
2828

29+
using ParquetType = parquet::Type;
30+
using parquet::LogicalType;
31+
using parquet::Repetition;
32+
using parquet::schema::NodePtr;
33+
using parquet::schema::GroupNode;
34+
using parquet::schema::PrimitiveNode;
35+
2936
namespace arrow {
3037

3138
namespace parquet {
3239

33-
using parquet_cpp::Repetition;
34-
using parquet_cpp::schema::NodePtr;
35-
using parquet_cpp::schema::GroupNode;
36-
using parquet_cpp::schema::PrimitiveNode;
37-
3840
const auto BOOL = std::make_shared<BooleanType>();
3941
const auto UINT8 = std::make_shared<UInt8Type>();
4042
const auto INT32 = std::make_shared<Int32Type>();
@@ -66,7 +68,7 @@ class TestConvertParquetSchema : public ::testing::Test {
6668
}
6769

6870
protected:
69-
parquet_cpp::SchemaDescriptor descr_;
71+
::parquet::SchemaDescriptor descr_;
7072
std::shared_ptr<Schema> result_schema_;
7173
};
7274

@@ -75,40 +77,40 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
7577
std::vector<std::shared_ptr<Field>> arrow_fields;
7678

7779
parquet_fields.push_back(
78-
PrimitiveNode::Make("boolean", Repetition::REQUIRED, parquet_cpp::Type::BOOLEAN));
80+
PrimitiveNode::Make("boolean", Repetition::REQUIRED, ParquetType::BOOLEAN));
7981
arrow_fields.push_back(std::make_shared<Field>("boolean", BOOL, false));
8082

8183
parquet_fields.push_back(
82-
PrimitiveNode::Make("int32", Repetition::REQUIRED, parquet_cpp::Type::INT32));
84+
PrimitiveNode::Make("int32", Repetition::REQUIRED, ParquetType::INT32));
8385
arrow_fields.push_back(std::make_shared<Field>("int32", INT32, false));
8486

8587
parquet_fields.push_back(
86-
PrimitiveNode::Make("int64", Repetition::REQUIRED, parquet_cpp::Type::INT64));
88+
PrimitiveNode::Make("int64", Repetition::REQUIRED, ParquetType::INT64));
8789
arrow_fields.push_back(std::make_shared<Field>("int64", INT64, false));
8890

8991
parquet_fields.push_back(
90-
PrimitiveNode::Make("float", Repetition::OPTIONAL, parquet_cpp::Type::FLOAT));
92+
PrimitiveNode::Make("float", Repetition::OPTIONAL, ParquetType::FLOAT));
9193
arrow_fields.push_back(std::make_shared<Field>("float", FLOAT));
9294

9395
parquet_fields.push_back(
94-
PrimitiveNode::Make("double", Repetition::OPTIONAL, parquet_cpp::Type::DOUBLE));
96+
PrimitiveNode::Make("double", Repetition::OPTIONAL, ParquetType::DOUBLE));
9597
arrow_fields.push_back(std::make_shared<Field>("double", DOUBLE));
9698

9799
parquet_fields.push_back(
98100
PrimitiveNode::Make("binary", Repetition::OPTIONAL,
99-
parquet_cpp::Type::BYTE_ARRAY));
101+
ParquetType::BYTE_ARRAY));
100102
arrow_fields.push_back(std::make_shared<Field>("binary", BINARY));
101103

102104
parquet_fields.push_back(
103105
PrimitiveNode::Make("string", Repetition::OPTIONAL,
104-
parquet_cpp::Type::BYTE_ARRAY,
105-
parquet_cpp::LogicalType::UTF8));
106+
ParquetType::BYTE_ARRAY,
107+
LogicalType::UTF8));
106108
arrow_fields.push_back(std::make_shared<Field>("string", UTF8));
107109

108110
parquet_fields.push_back(
109111
PrimitiveNode::Make("flba-binary", Repetition::OPTIONAL,
110-
parquet_cpp::Type::FIXED_LEN_BYTE_ARRAY,
111-
parquet_cpp::LogicalType::NONE, 12));
112+
ParquetType::FIXED_LEN_BYTE_ARRAY,
113+
LogicalType::NONE, 12));
112114
arrow_fields.push_back(std::make_shared<Field>("flba-binary", BINARY));
113115

114116
auto arrow_schema = std::make_shared<Schema>(arrow_fields);
@@ -121,18 +123,18 @@ TEST_F(TestConvertParquetSchema, UnsupportedThings) {
121123
std::vector<NodePtr> unsupported_nodes;
122124

123125
unsupported_nodes.push_back(
124-
PrimitiveNode::Make("int96", Repetition::REQUIRED, parquet_cpp::Type::INT96));
126+
PrimitiveNode::Make("int96", Repetition::REQUIRED, ParquetType::INT96));
125127

126128
unsupported_nodes.push_back(
127129
GroupNode::Make("repeated-group", Repetition::REPEATED, {}));
128130

129131
unsupported_nodes.push_back(
130132
PrimitiveNode::Make("int32", Repetition::OPTIONAL,
131-
parquet_cpp::Type::INT32, parquet_cpp::LogicalType::DATE));
133+
ParquetType::INT32, LogicalType::DATE));
132134

133135
unsupported_nodes.push_back(
134136
PrimitiveNode::Make("int64", Repetition::OPTIONAL,
135-
parquet_cpp::Type::INT64, parquet_cpp::LogicalType::TIMESTAMP_MILLIS));
137+
ParquetType::INT64, LogicalType::TIMESTAMP_MILLIS));
136138

137139
for (const NodePtr& node : unsupported_nodes) {
138140
ASSERT_RAISES(NotImplemented, ConvertSchema({node}));

cpp/src/arrow/parquet/schema.cc

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,13 @@
2424
#include "arrow/util/status.h"
2525
#include "arrow/types/decimal.h"
2626

27-
using parquet_cpp::schema::Node;
28-
using parquet_cpp::schema::NodePtr;
29-
using parquet_cpp::schema::GroupNode;
30-
using parquet_cpp::schema::PrimitiveNode;
27+
using parquet::schema::Node;
28+
using parquet::schema::NodePtr;
29+
using parquet::schema::GroupNode;
30+
using parquet::schema::PrimitiveNode;
3131

32-
using parquet_cpp::LogicalType;
32+
using ParquetType = parquet::Type;
33+
using parquet::LogicalType;
3334

3435
namespace arrow {
3536

@@ -124,30 +125,30 @@ Status NodeToField(const NodePtr& node, std::shared_ptr<Field>* out) {
124125
const PrimitiveNode* primitive = static_cast<const PrimitiveNode*>(node.get());
125126

126127
switch (primitive->physical_type()) {
127-
case parquet_cpp::Type::BOOLEAN:
128+
case ParquetType::BOOLEAN:
128129
type = BOOL;
129130
break;
130-
case parquet_cpp::Type::INT32:
131+
case ParquetType::INT32:
131132
RETURN_NOT_OK(FromInt32(primitive, &type));
132133
break;
133-
case parquet_cpp::Type::INT64:
134+
case ParquetType::INT64:
134135
RETURN_NOT_OK(FromInt64(primitive, &type));
135136
break;
136-
case parquet_cpp::Type::INT96:
137+
case ParquetType::INT96:
137138
// TODO: Do we have that type in Arrow?
138139
// type = TypePtr(new Int96Type());
139140
return Status::NotImplemented("int96");
140-
case parquet_cpp::Type::FLOAT:
141+
case ParquetType::FLOAT:
141142
type = FLOAT;
142143
break;
143-
case parquet_cpp::Type::DOUBLE:
144+
case ParquetType::DOUBLE:
144145
type = DOUBLE;
145146
break;
146-
case parquet_cpp::Type::BYTE_ARRAY:
147+
case ParquetType::BYTE_ARRAY:
147148
// TODO: Do we have that type in Arrow?
148149
RETURN_NOT_OK(FromByteArray(primitive, &type));
149150
break;
150-
case parquet_cpp::Type::FIXED_LEN_BYTE_ARRAY:
151+
case ParquetType::FIXED_LEN_BYTE_ARRAY:
151152
RETURN_NOT_OK(FromFLBA(primitive, &type));
152153
break;
153154
}
@@ -157,7 +158,7 @@ Status NodeToField(const NodePtr& node, std::shared_ptr<Field>* out) {
157158
return Status::OK();
158159
}
159160

160-
Status FromParquetSchema(const parquet_cpp::SchemaDescriptor* parquet_schema,
161+
Status FromParquetSchema(const ::parquet::SchemaDescriptor* parquet_schema,
161162
std::shared_ptr<Schema>* out) {
162163
// TODO(wesm): Consider adding an arrow::Schema name attribute, which comes
163164
// from the root Parquet node

cpp/src/arrow/parquet/schema.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ class Status;
3131

3232
namespace parquet {
3333

34-
Status NodeToField(const parquet_cpp::schema::NodePtr& node,
34+
Status NodeToField(const ::parquet::schema::NodePtr& node,
3535
std::shared_ptr<Field>* out);
3636

37-
Status FromParquetSchema(const parquet_cpp::SchemaDescriptor* parquet_schema,
37+
Status FromParquetSchema(const ::parquet::SchemaDescriptor* parquet_schema,
3838
std::shared_ptr<Schema>* out);
3939

4040
} // namespace parquet

python/pyarrow/array.pyx

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,4 @@ cdef class Table:
359359
names.append(frombytes(col.get().name()))
360360
data.append(<object> arr)
361361

362-
# One ref count too many
363-
Py_XDECREF(arr)
364-
365362
return pd.DataFrame(dict(zip(names, data)), columns=names)

python/pyarrow/includes/parquet.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
from pyarrow.includes.common cimport *
2121

22-
cdef extern from "parquet/api/reader.h" namespace "parquet_cpp" nogil:
22+
cdef extern from "parquet/api/reader.h" namespace "parquet" nogil:
2323
cdef cppclass ColumnReader:
2424
pass
2525

0 commit comments

Comments
 (0)