File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1818#pragma once
1919
2020#include < memory>
21+ #include < string>
2122#include < vector>
2223
2324#include " arrow/dataset/type_fwd.h"
@@ -54,15 +55,9 @@ struct DataSelector {
5455// / DataFragments
5556class ARROW_DS_EXPORT DataSource {
5657 public:
57- enum Type {
58- SIMPLE, // Flat collection
59- PARTITIONED, // Partitioned collection
60- GENERIC // All others
61- };
62-
6358 virtual ~DataSource () = default ;
6459
65- virtual Type type () const = 0;
60+ virtual std::string type () const = 0;
6661
6762 virtual std::unique_ptr<DataFragmentIterator> GetFragments (
6863 const DataSelector& selector) = 0;
Original file line number Diff line number Diff line change @@ -117,9 +117,35 @@ class ARROW_DS_EXPORT HivePartitionScheme : public PartitionScheme {
117117// ----------------------------------------------------------------------
118118//
119119
120+ // Partitioned datasets come in different forms. Here is an example of
121+ // a Hive-style partitioned dataset:
122+ //
123+ // dataset_root/
124+ // key1=$k1_v1/
125+ // key2=$k2_v1/
126+ // 0.parquet
127+ // 1.parquet
128+ // 2.parquet
129+ // 3.parquet
130+ // key2=$k2_v2/
131+ // 0.parquet
132+ // 1.parquet
133+ // key1=$k1_v2/
134+ // key2=$k2_v1/
135+ // 0.parquet
136+ // 1.parquet
137+ // key2=$k2_v2/
138+ // 0.parquet
139+ // 1.parquet
140+ // 2.parquet
141+ //
142+ // In this case, the dataset has 11 fragments (11 files) to be
143+ // scanned, or potentially more if it is configured to split Parquet
144+ // files at the row group level
145+
120146class ARROW_DS_EXPORT Partition : public DataSource {
121147 public:
122- DataSource::Type type () const override ;
148+ std::string type () const override ;
123149
124150 // / \brief The key for this partition source, may be nullptr,
125151 // / e.g. for the top-level partitioned source container
@@ -129,7 +155,7 @@ class ARROW_DS_EXPORT Partition : public DataSource {
129155 const Selector& selector) = 0;
130156};
131157
132- // / \brief Container for a dataset partition , which consists of a
158+ // / \brief Simple implementation of Partition , which consists of a
133159// / partition identifier, subpartitions, and some data fragments
134160class ARROW_DS_EXPORT SimplePartition : public Partition {
135161 public:
You can’t perform that action at this time.
0 commit comments