Skip to content

Commit 2f6440a

Browse files
committed
Remove not-currently-needed enum, add comment about an example partition structure
1 parent 68712f8 commit 2f6440a

2 files changed

Lines changed: 30 additions & 9 deletions

File tree

cpp/src/arrow/dataset/dataset.h

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#pragma once
1919

2020
#include <memory>
21+
#include <string>
2122
#include <vector>
2223

2324
#include "arrow/dataset/type_fwd.h"
@@ -54,15 +55,9 @@ struct DataSelector {
5455
/// DataFragments
5556
class ARROW_DS_EXPORT DataSource {
5657
public:
57-
enum Type {
58-
SIMPLE, // Flat collection
59-
PARTITIONED, // Partitioned collection
60-
GENERIC // All others
61-
};
62-
6358
virtual ~DataSource() = default;
6459

65-
virtual Type type() const = 0;
60+
virtual std::string type() const = 0;
6661

6762
virtual std::unique_ptr<DataFragmentIterator> GetFragments(
6863
const DataSelector& selector) = 0;

cpp/src/arrow/dataset/partition.h

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,35 @@ class ARROW_DS_EXPORT HivePartitionScheme : public PartitionScheme {
117117
// ----------------------------------------------------------------------
118118
//
119119

120+
// Partitioned datasets come in different forms. Here is an example of
121+
// a Hive-style partitioned dataset:
122+
//
123+
// dataset_root/
124+
// key1=$k1_v1/
125+
// key2=$k2_v1/
126+
// 0.parquet
127+
// 1.parquet
128+
// 2.parquet
129+
// 3.parquet
130+
// key2=$k2_v2/
131+
// 0.parquet
132+
// 1.parquet
133+
// key1=$k1_v2/
134+
// key2=$k2_v1/
135+
// 0.parquet
136+
// 1.parquet
137+
// key2=$k2_v2/
138+
// 0.parquet
139+
// 1.parquet
140+
// 2.parquet
141+
//
142+
// In this case, the dataset has 11 fragments (11 files) to be
143+
// scanned, or potentially more if it is configured to split Parquet
144+
// files at the row group level
145+
120146
class ARROW_DS_EXPORT Partition : public DataSource {
121147
public:
122-
DataSource::Type type() const override;
148+
std::string type() const override;
123149

124150
/// \brief The key for this partition source, may be nullptr,
125151
/// e.g. for the top-level partitioned source container
@@ -129,7 +155,7 @@ class ARROW_DS_EXPORT Partition : public DataSource {
129155
const Selector& selector) = 0;
130156
};
131157

132-
/// \brief Container for a dataset partition, which consists of a
158+
/// \brief Simple implementation of Partition, which consists of a
133159
/// partition identifier, subpartitions, and some data fragments
134160
class ARROW_DS_EXPORT SimplePartition : public Partition {
135161
public:

0 commit comments

Comments
 (0)