Skip to content

Commit f50300e

Browse files
committed
Defer file creation to write
1 parent 5909866 commit f50300e

6 files changed

Lines changed: 17 additions & 28 deletions

File tree

datafusion/core/src/datasource/listing/url.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ impl ListingTableUrl {
116116
/// Get object store for specified input_url
117117
/// if input_url is actually not a url, we assume it is a local file path
118118
/// if we have a local path, create it if not exists so ListingTableUrl::parse works
119+
#[deprecated(note = "Use parse")]
119120
pub fn parse_create_local_if_not_exists(
120121
s: impl AsRef<str>,
121122
is_directory: bool,

datafusion/core/src/datasource/listing_table_factory.rs

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -148,19 +148,18 @@ impl TableProviderFactory for ListingTableFactory {
148148
.unwrap_or(false)
149149
};
150150

151-
let create_local_path = statement_options
152-
.take_bool_option("create_local_path")?
153-
.unwrap_or(false);
154151
let single_file = statement_options
155152
.take_bool_option("single_file")?
156153
.unwrap_or(false);
157154

158155
// Backwards compatibility
159156
if let Some(s) = statement_options.take_str_option("insert_mode") {
160157
if !s.eq_ignore_ascii_case("append_new_files") {
161-
return plan_err!("Unknown or unsupported insert mode {s}. Only append_to_file supported");
158+
return plan_err!("Unknown or unsupported insert mode {s}. Only append_new_files supported");
162159
}
163160
}
161+
statement_options.take_bool_option("create_local_path")?;
162+
164163
let file_type = file_format.file_type();
165164

166165
// Use remaining options and session state to build FileTypeWriterOptions
@@ -199,13 +198,7 @@ impl TableProviderFactory for ListingTableFactory {
199198
FileType::AVRO => file_type_writer_options,
200199
};
201200

202-
let table_path = match create_local_path {
203-
true => ListingTableUrl::parse_create_local_if_not_exists(
204-
&cmd.location,
205-
!single_file,
206-
),
207-
false => ListingTableUrl::parse(&cmd.location),
208-
}?;
201+
let table_path = ListingTableUrl::parse(&cmd.location)?;
209202

210203
let options = ListingOptions::new(file_format)
211204
.with_collect_stat(state.config().collect_statistics())

datafusion/core/src/datasource/stream.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,15 +179,15 @@ impl StreamConfig {
179179
match &self.encoding {
180180
StreamEncoding::Csv => {
181181
let header = self.header && !self.location.exists();
182-
let file = OpenOptions::new().append(true).open(&self.location)?;
182+
let file = OpenOptions::new().create(true).append(true).open(&self.location)?;
183183
let writer = arrow::csv::WriterBuilder::new()
184184
.with_header(header)
185185
.build(file);
186186

187187
Ok(Box::new(writer))
188188
}
189189
StreamEncoding::Json => {
190-
let file = OpenOptions::new().append(true).open(&self.location)?;
190+
let file = OpenOptions::new().create(true).append(true).open(&self.location)?;
191191
Ok(Box::new(arrow::json::LineDelimitedWriter::new(file)))
192192
}
193193
}

datafusion/core/src/physical_planner.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -571,11 +571,7 @@ impl DefaultPhysicalPlanner {
571571
copy_options,
572572
}) => {
573573
let input_exec = self.create_initial_plan(input, session_state).await?;
574-
575-
// TODO: make this behavior configurable via options (should copy to create path/file as needed?)
576-
// TODO: add additional configurable options for if existing files should be overwritten or
577-
// appended to
578-
let parsed_url = ListingTableUrl::parse_create_local_if_not_exists(output_url, !*single_file_output)?;
574+
let parsed_url = ListingTableUrl::parse(output_url)?;
579575
let object_store_url = parsed_url.object_store();
580576

581577
let schema: Schema = (**input.schema()).clone().into();

datafusion/sqllogictest/test_files/insert_to_external.slt

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ CREATE EXTERNAL TABLE dictionary_encoded_parquet_partitioned(
5757
b varchar,
5858
)
5959
STORED AS parquet
60-
LOCATION 'test_files/scratch/insert_to_external/parquet_types_partitioned'
60+
LOCATION 'test_files/scratch/insert_to_external/parquet_types_partitioned/'
6161
PARTITIONED BY (b)
6262
OPTIONS(
6363
create_local_path 'true',
@@ -292,7 +292,7 @@ statement ok
292292
CREATE EXTERNAL TABLE
293293
directory_test(a bigint, b bigint)
294294
STORED AS parquet
295-
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q0'
295+
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q0/'
296296
OPTIONS(
297297
create_local_path 'true',
298298
);
@@ -312,7 +312,7 @@ statement ok
312312
CREATE EXTERNAL TABLE
313313
table_without_values(field1 BIGINT NULL, field2 BIGINT NULL)
314314
STORED AS parquet
315-
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q1'
315+
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q1/'
316316
OPTIONS (create_local_path 'true');
317317

318318
query TT
@@ -378,7 +378,7 @@ statement ok
378378
CREATE EXTERNAL TABLE
379379
table_without_values(field1 BIGINT NULL, field2 BIGINT NULL)
380380
STORED AS parquet
381-
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q2'
381+
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q2/'
382382
OPTIONS (create_local_path 'true');
383383

384384
query TT
@@ -423,7 +423,7 @@ statement ok
423423
CREATE EXTERNAL TABLE
424424
table_without_values(c1 varchar NULL)
425425
STORED AS parquet
426-
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q3'
426+
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q3/'
427427
OPTIONS (create_local_path 'true');
428428

429429
# verify that the sort order of the insert query is maintained into the
@@ -462,7 +462,7 @@ statement ok
462462
CREATE EXTERNAL TABLE
463463
table_without_values(id BIGINT, name varchar)
464464
STORED AS parquet
465-
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q4'
465+
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q4/'
466466
OPTIONS (create_local_path 'true');
467467

468468
query IT
@@ -505,7 +505,7 @@ statement ok
505505
CREATE EXTERNAL TABLE
506506
table_without_values(field1 BIGINT NOT NULL, field2 BIGINT NULL)
507507
STORED AS parquet
508-
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q5'
508+
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q5/'
509509
OPTIONS (create_local_path 'true');
510510

511511
query II
@@ -555,7 +555,7 @@ CREATE EXTERNAL TABLE test_column_defaults(
555555
d text default lower('DEFAULT_TEXT'),
556556
e timestamp default now()
557557
) STORED AS parquet
558-
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q6'
558+
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q6/'
559559
OPTIONS (create_local_path 'true');
560560

561561
# fill in all column values
@@ -608,5 +608,5 @@ CREATE EXTERNAL TABLE test_column_defaults(
608608
a int,
609609
b int default a+1
610610
) STORED AS parquet
611-
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q7'
611+
LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q7/'
612612
OPTIONS (create_local_path 'true');

docs/source/user-guide/sql/write_options.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ The following special options are specific to creating an external table.
8181
| Option | Description | Default Value |
8282
| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------- |
8383
| SINGLE_FILE | If true, indicates that this external table is backed by a single file. INSERT INTO queries will append to this file. | false |
84-
| CREATE_LOCAL_PATH | If true, the folder or file backing this table will be created on the local file system if it does not already exist when running INSERT INTO queries. | false |
8584
| INSERT_MODE | Determines if INSERT INTO queries should append to existing files or append new files to an existing directory. Valid values are append_to_file, append_new_files, and error. Note that "error" will block inserting data into this table. | CSV and JSON default to append_to_file. Parquet defaults to append_new_files |
8685

8786
### JSON Format Specific Options

0 commit comments

Comments
 (0)