@@ -34,7 +34,9 @@ use futures::{future, stream, StreamExt, TryStreamExt};
3434use crate :: datasource:: file_format:: file_compression_type:: {
3535 FileCompressionType , FileTypeExt ,
3636} ;
37- use crate :: datasource:: physical_plan:: { FileScanConfig , FileSinkConfig } ;
37+ use crate :: datasource:: physical_plan:: {
38+ is_plan_streaming, FileScanConfig , FileSinkConfig ,
39+ } ;
3840use crate :: datasource:: {
3941 file_format:: {
4042 arrow:: ArrowFormat , avro:: AvroFormat , csv:: CsvFormat , json:: JsonFormat ,
@@ -894,7 +896,13 @@ impl TableProvider for ListingTable {
894896 output_schema : self . schema ( ) ,
895897 table_partition_cols : self . options . table_partition_cols . clone ( ) ,
896898 writer_mode,
897- unbounded_input : self . options ( ) . infinite_source ,
899+ // A plan can produce finite number of rows even if it has unbounded sources, like LIMIT
900+ // queries. Thus, we can check if the plan is streaming to ensure file sink input is
901+ // unbounded. When `unbounded_input` flag is `true` for sink, we occasionally call `yield_now`
902+ // to consume data at the input. When `unbounded_input` flag is `false` (e.g non-streaming data),
903+ // all of the data at the input is sink after execution finishes. See discussion for rationale:
904+ // https://github.com/apache/arrow-datafusion/pull/7610#issuecomment-1728979918
905+ unbounded_input : is_plan_streaming ( & input) ?,
898906 single_file_output : self . options . single_file ,
899907 overwrite,
900908 file_type_writer_options,
0 commit comments