Skip to content

Commit 5b74c2d

Browse files
authored
Improved ergonomy for CREATE EXTERNAL TABLE OPTIONS: Don't require quotations for simple namespaced keys like foo.bar (#10483)
* Don't require quotations for simple namespaced keys like foo.bar * Add comments clarifying parse error cases for unquoted namespaced keys
1 parent adf0bfc commit 5b74c2d

6 files changed

Lines changed: 84 additions & 70 deletions

File tree

datafusion/common/src/config.rs

Lines changed: 26 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,9 @@ macro_rules! config_namespace {
130130
$(
131131
stringify!($field_name) => self.$field_name.set(rem, value),
132132
)*
133-
_ => return Err(DataFusionError::Configuration(format!(
133+
_ => return _config_err!(
134134
"Config value \"{}\" not found on {}", key, stringify!($struct_name)
135-
)))
135+
)
136136
}
137137
}
138138

@@ -676,22 +676,17 @@ impl ConfigOptions {
676676

677677
/// Set a configuration option
678678
pub fn set(&mut self, key: &str, value: &str) -> Result<()> {
679-
let (prefix, key) = key.split_once('.').ok_or_else(|| {
680-
DataFusionError::Configuration(format!(
681-
"could not find config namespace for key \"{key}\"",
682-
))
683-
})?;
679+
let Some((prefix, key)) = key.split_once('.') else {
680+
return _config_err!("could not find config namespace for key \"{key}\"");
681+
};
684682

685683
if prefix == "datafusion" {
686684
return ConfigField::set(self, key, value);
687685
}
688686

689-
let e = self.extensions.0.get_mut(prefix);
690-
let e = e.ok_or_else(|| {
691-
DataFusionError::Configuration(format!(
692-
"Could not find config namespace \"{prefix}\""
693-
))
694-
})?;
687+
let Some(e) = self.extensions.0.get_mut(prefix) else {
688+
return _config_err!("Could not find config namespace \"{prefix}\"");
689+
};
695690
e.0.set(key, value)
696691
}
697692

@@ -1279,22 +1274,17 @@ impl TableOptions {
12791274
///
12801275
/// A result indicating success or failure in setting the configuration option.
12811276
pub fn set(&mut self, key: &str, value: &str) -> Result<()> {
1282-
let (prefix, _) = key.split_once('.').ok_or_else(|| {
1283-
DataFusionError::Configuration(format!(
1284-
"could not find config namespace for key \"{key}\""
1285-
))
1286-
})?;
1277+
let Some((prefix, _)) = key.split_once('.') else {
1278+
return _config_err!("could not find config namespace for key \"{key}\"");
1279+
};
12871280

12881281
if prefix == "format" {
12891282
return ConfigField::set(self, key, value);
12901283
}
12911284

1292-
let e = self.extensions.0.get_mut(prefix);
1293-
let e = e.ok_or_else(|| {
1294-
DataFusionError::Configuration(format!(
1295-
"Could not find config namespace \"{prefix}\""
1296-
))
1297-
})?;
1285+
let Some(e) = self.extensions.0.get_mut(prefix) else {
1286+
return _config_err!("Could not find config namespace \"{prefix}\"");
1287+
};
12981288
e.0.set(key, value)
12991289
}
13001290

@@ -1413,19 +1403,19 @@ impl ConfigField for TableParquetOptions {
14131403
fn set(&mut self, key: &str, value: &str) -> Result<()> {
14141404
// Determine if the key is a global, metadata, or column-specific setting
14151405
if key.starts_with("metadata::") {
1416-
let k =
1417-
match key.split("::").collect::<Vec<_>>()[..] {
1418-
[_meta] | [_meta, ""] => return Err(DataFusionError::Configuration(
1406+
let k = match key.split("::").collect::<Vec<_>>()[..] {
1407+
[_meta] | [_meta, ""] => {
1408+
return _config_err!(
14191409
"Invalid metadata key provided, missing key in metadata::<key>"
1420-
.to_string(),
1421-
)),
1422-
[_meta, k] => k.into(),
1423-
_ => {
1424-
return Err(DataFusionError::Configuration(format!(
1410+
)
1411+
}
1412+
[_meta, k] => k.into(),
1413+
_ => {
1414+
return _config_err!(
14251415
"Invalid metadata key provided, found too many '::' in \"{key}\""
1426-
)))
1427-
}
1428-
};
1416+
)
1417+
}
1418+
};
14291419
self.key_value_metadata.insert(k, Some(value.into()));
14301420
Ok(())
14311421
} else if key.contains("::") {
@@ -1498,10 +1488,7 @@ macro_rules! config_namespace_with_hashmap {
14981488

14991489
inner_value.set(inner_key, value)
15001490
}
1501-
_ => Err(DataFusionError::Configuration(format!(
1502-
"Unrecognized key '{}'.",
1503-
key
1504-
))),
1491+
_ => _config_err!("Unrecognized key '{key}'."),
15051492
}
15061493
}
15071494

datafusion/core/src/execution/context/mod.rs

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ use std::ops::ControlFlow;
2323
use std::sync::{Arc, Weak};
2424

2525
use super::options::ReadOptions;
26+
#[cfg(feature = "array_expressions")]
27+
use crate::functions_array;
2628
use crate::{
2729
catalog::information_schema::{InformationSchemaProvider, INFORMATION_SCHEMA},
2830
catalog::listing_schema::ListingSchemaProvider,
@@ -53,53 +55,49 @@ use crate::{
5355
},
5456
optimizer::analyzer::{Analyzer, AnalyzerRule},
5557
optimizer::optimizer::{Optimizer, OptimizerConfig, OptimizerRule},
58+
physical_expr::{create_physical_expr, PhysicalExpr},
5659
physical_optimizer::optimizer::{PhysicalOptimizer, PhysicalOptimizerRule},
5760
physical_plan::ExecutionPlan,
5861
physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner},
5962
variable::{VarProvider, VarType},
6063
};
61-
62-
#[cfg(feature = "array_expressions")]
63-
use crate::functions_array;
6464
use crate::{functions, functions_aggregate};
6565

6666
use arrow::datatypes::{DataType, SchemaRef};
6767
use arrow::record_batch::RecordBatch;
6868
use arrow_schema::Schema;
69-
use async_trait::async_trait;
70-
use chrono::{DateTime, Utc};
71-
use datafusion_common::tree_node::TreeNode;
7269
use datafusion_common::{
7370
alias::AliasGenerator,
7471
config::{ConfigExtension, TableOptions},
7572
exec_err, not_impl_err, plan_datafusion_err, plan_err,
76-
tree_node::{TreeNodeRecursion, TreeNodeVisitor},
73+
tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor},
7774
DFSchema, SchemaReference, TableReference,
7875
};
7976
use datafusion_execution::registry::SerializerRegistry;
8077
use datafusion_expr::{
78+
expr_rewriter::FunctionRewrite,
8179
logical_plan::{DdlStatement, Statement},
80+
simplify::SimplifyInfo,
8281
var_provider::is_system_variables,
8382
Expr, ExprSchemable, StringifiedPlan, UserDefinedLogicalNode, WindowUDF,
8483
};
84+
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
8585
use datafusion_sql::{
8686
parser::{CopyToSource, CopyToStatement, DFParser},
8787
planner::{object_name_to_table_reference, ContextProvider, ParserOptions, SqlToRel},
8888
ResolvedTableReference,
8989
};
90-
use parking_lot::RwLock;
9190
use sqlparser::dialect::dialect_from_str;
91+
92+
use async_trait::async_trait;
93+
use chrono::{DateTime, Utc};
94+
use parking_lot::RwLock;
9295
use url::Url;
9396
use uuid::Uuid;
9497

95-
use crate::physical_expr::PhysicalExpr;
9698
pub use datafusion_execution::config::SessionConfig;
9799
pub use datafusion_execution::TaskContext;
98100
pub use datafusion_expr::execution_props::ExecutionProps;
99-
use datafusion_expr::expr_rewriter::FunctionRewrite;
100-
use datafusion_expr::simplify::SimplifyInfo;
101-
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
102-
use datafusion_physical_expr::create_physical_expr;
103101

104102
mod avro;
105103
mod csv;

datafusion/proto/tests/cases/roundtrip_logical_plan.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use std::any::Any;
19+
use std::collections::HashMap;
20+
use std::fmt::{self, Debug, Formatter};
21+
use std::sync::Arc;
22+
use std::vec;
23+
1824
use arrow::array::{ArrayRef, FixedSizeListArray};
1925
use arrow::datatypes::{
2026
DataType, Field, Fields, Int32Type, IntervalDayTimeType, IntervalMonthDayNanoType,
@@ -24,6 +30,7 @@ use datafusion::datasource::provider::TableProviderFactory;
2430
use datafusion::datasource::TableProvider;
2531
use datafusion::execution::context::SessionState;
2632
use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
33+
use datafusion::execution::FunctionRegistry;
2734
use datafusion::functions_aggregate::covariance::{covar_pop, covar_samp};
2835
use datafusion::functions_aggregate::expr_fn::first_value;
2936
use datafusion::prelude::*;
@@ -51,16 +58,11 @@ use datafusion_proto::bytes::{
5158
logical_plan_to_bytes, logical_plan_to_bytes_with_extension_codec,
5259
};
5360
use datafusion_proto::logical_plan::to_proto::serialize_expr;
54-
use datafusion_proto::logical_plan::LogicalExtensionCodec;
55-
use datafusion_proto::logical_plan::{from_proto, DefaultLogicalExtensionCodec};
61+
use datafusion_proto::logical_plan::{
62+
from_proto, DefaultLogicalExtensionCodec, LogicalExtensionCodec,
63+
};
5664
use datafusion_proto::protobuf;
57-
use std::any::Any;
58-
use std::collections::HashMap;
59-
use std::fmt::{self, Debug, Formatter};
60-
use std::sync::Arc;
61-
use std::vec;
6265

63-
use datafusion::execution::FunctionRegistry;
6466
use prost::Message;
6567

6668
#[cfg(feature = "json")]

datafusion/sql/src/parser.rs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,21 @@ impl<'a> DFParser<'a> {
462462
pub fn parse_option_key(&mut self) -> Result<String, ParserError> {
463463
let next_token = self.parser.next_token();
464464
match next_token.token {
465-
Token::Word(Word { value, .. }) => Ok(value),
465+
Token::Word(Word { value, .. }) => {
466+
let mut parts = vec![value];
467+
while self.parser.consume_token(&Token::Period) {
468+
let next_token = self.parser.next_token();
469+
if let Token::Word(Word { value, .. }) = next_token.token {
470+
parts.push(value);
471+
} else {
472+
// Unquoted namespaced keys have to conform to the syntax
473+
// "<WORD>[\.<WORD>]*". If we have a key that breaks this
474+
// pattern, error out:
475+
return self.parser.expected("key name", next_token);
476+
}
477+
}
478+
Ok(parts.join("."))
479+
}
466480
Token::SingleQuotedString(s) => Ok(s),
467481
Token::DoubleQuotedString(s) => Ok(s),
468482
Token::EscapedStringLiteral(s) => Ok(s),
@@ -712,15 +726,15 @@ impl<'a> DFParser<'a> {
712726
} else {
713727
self.parser.expect_keyword(Keyword::HEADER)?;
714728
self.parser.expect_keyword(Keyword::ROW)?;
715-
return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')");
729+
return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)");
716730
}
717731
}
718732
Keyword::DELIMITER => {
719-
return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS ('format.delimiter' ',')");
733+
return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')");
720734
}
721735
Keyword::COMPRESSION => {
722736
self.parser.expect_keyword(Keyword::TYPE)?;
723-
return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS ('format.compression' 'gzip')");
737+
return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)");
724738
}
725739
Keyword::PARTITIONED => {
726740
self.parser.expect_keyword(Keyword::BY)?;
@@ -933,7 +947,7 @@ mod tests {
933947
expect_parse_ok(sql, expected)?;
934948

935949
// positive case with delimiter
936-
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS ('format.delimiter' '|')";
950+
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
937951
let display = None;
938952
let expected = Statement::CreateExternalTable(CreateExternalTable {
939953
name: "t".into(),

datafusion/sqllogictest/test_files/create_external_table.slt

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ LOCATION 'test_files/scratch/create_external_table/manual_partitioning/';
190190
statement error DataFusion error: Error during planning: Option format.delimiter is specified multiple times
191191
CREATE EXTERNAL TABLE t STORED AS CSV OPTIONS (
192192
'format.delimiter' '*',
193-
'format.has_header' 'true',
194-
'format.delimiter' '|')
193+
'format.has_header' 'true',
194+
'format.delimiter' '|')
195195
LOCATION 'foo.csv';
196196

197197
# If a config does not belong to any namespace, we assume it is a 'format' option and apply the 'format' prefix for backwards compatibility.
@@ -201,7 +201,20 @@ CREATE EXTERNAL TABLE IF NOT EXISTS region (
201201
r_name VARCHAR,
202202
r_comment VARCHAR,
203203
r_rev VARCHAR,
204-
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
204+
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
205205
OPTIONS (
206206
'format.delimiter' '|',
207-
'has_header' 'false');
207+
'has_header' 'false');
208+
209+
# Verify that we do not need quotations for simple namespaced keys.
210+
statement ok
211+
CREATE EXTERNAL TABLE IF NOT EXISTS region (
212+
r_regionkey BIGINT,
213+
r_name VARCHAR,
214+
r_comment VARCHAR,
215+
r_rev VARCHAR,
216+
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
217+
OPTIONS (
218+
format.delimiter '|',
219+
has_header false,
220+
compression gzip);

datafusion/sqllogictest/test_files/tpch/create_tables.slt.part

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,4 +121,4 @@ CREATE EXTERNAL TABLE IF NOT EXISTS region (
121121
r_name VARCHAR,
122122
r_comment VARCHAR,
123123
r_rev VARCHAR,
124-
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS ('format.delimiter' '|');
124+
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS ('format.delimiter' '|');

0 commit comments

Comments
 (0)