1717
1818use crate :: { Expr , LogicalPlan , SortExpr , Volatility } ;
1919use std:: cmp:: Ordering ;
20- use std:: collections:: HashMap ;
20+ use std:: collections:: { BTreeSet , HashMap , HashSet } ;
2121use std:: sync:: Arc ;
2222use std:: {
2323 fmt:: { self , Display } ,
@@ -28,7 +28,8 @@ use crate::expr::Sort;
2828use arrow:: datatypes:: DataType ;
2929use datafusion_common:: tree_node:: { Transformed , TreeNodeContainer , TreeNodeRecursion } ;
3030use datafusion_common:: {
31- Constraints , DFSchemaRef , Result , SchemaReference , TableReference ,
31+ schema_err, Column , Constraints , DFSchema , DFSchemaRef , Result ,
32+ SchemaError , SchemaReference , TableReference ,
3233} ;
3334use sqlparser:: ast:: Ident ;
3435
@@ -306,6 +307,7 @@ impl CreateExternalTable {
306307 constraints,
307308 column_defaults,
308309 } = fields;
310+ check_fields_unique ( & schema) ?;
309311 Ok ( Self {
310312 name,
311313 schema,
@@ -544,6 +546,7 @@ impl CreateMemoryTable {
544546 column_defaults,
545547 temporary,
546548 } = fields;
549+ check_fields_unique ( input. schema ( ) ) ?;
547550 Ok ( Self {
548551 name,
549552 constraints,
@@ -698,6 +701,7 @@ impl CreateView {
698701 definition,
699702 temporary,
700703 } = fields;
704+ check_fields_unique ( input. schema ( ) ) ?;
701705 Ok ( Self {
702706 name,
703707 input,
@@ -800,6 +804,48 @@ impl CreateViewBuilder {
800804 } )
801805 }
802806}
807+ fn check_fields_unique ( schema : & DFSchema ) -> Result < ( ) > {
808+ // Use tree set for deterministic error messages
809+ let mut qualified_names = BTreeSet :: new ( ) ;
810+ let mut unqualified_names = HashSet :: new ( ) ;
811+ let mut name_occurrences: HashMap < & String , usize > = HashMap :: new ( ) ;
812+
813+ for ( qualifier, field) in schema. iter ( ) {
814+ if let Some ( qualifier) = qualifier {
815+ // Check for duplicate qualified field names
816+ if !qualified_names. insert ( ( qualifier, field. name ( ) ) ) {
817+ return schema_err ! ( SchemaError :: DuplicateQualifiedField {
818+ qualifier: Box :: new( qualifier. clone( ) ) ,
819+ name: field. name( ) . to_string( ) ,
820+ } ) ;
821+ }
822+ // Check for duplicate unqualified field names
823+ } else if !unqualified_names. insert ( field. name ( ) ) {
824+ return schema_err ! ( SchemaError :: DuplicateUnqualifiedField {
825+ name: field. name( ) . to_string( )
826+ } ) ;
827+ }
828+ * name_occurrences. entry ( field. name ( ) ) . or_default ( ) += 1 ;
829+ }
830+
831+ for ( qualifier, name) in qualified_names {
832+ // Check for duplicate between qualified and unqualified field names
833+ if unqualified_names. contains ( name) {
834+ return schema_err ! ( SchemaError :: AmbiguousReference {
835+ field: Column :: new( Some ( qualifier. clone( ) ) , name)
836+ } ) ;
837+ }
838+ // Check for duplicates between qualified names as the qualification will be stripped off
839+ if name_occurrences[ name] > 1 {
840+ return schema_err ! ( SchemaError :: QualifiedFieldWithDuplicateName {
841+ qualifier: Box :: new( qualifier. clone( ) ) ,
842+ name: name. to_owned( ) ,
843+ } ) ;
844+ }
845+ }
846+
847+ Ok ( ( ) )
848+ }
803849
804850/// Creates a catalog (aka "Database").
805851#[ derive( Debug , Clone , PartialEq , Eq , Hash ) ]
@@ -1085,7 +1131,9 @@ impl PartialOrd for CreateIndex {
10851131
10861132#[ cfg( test) ]
10871133mod test {
1134+ use super :: * ;
10881135 use crate :: { CreateCatalog , DdlStatement , DropView } ;
1136+ use arrow:: datatypes:: { DataType , Field , Schema } ;
10891137 use datafusion_common:: { DFSchema , DFSchemaRef , TableReference } ;
10901138 use std:: cmp:: Ordering ;
10911139
@@ -1112,4 +1160,87 @@ mod test {
11121160
11131161 assert_eq ! ( drop_view. partial_cmp( & catalog) , Some ( Ordering :: Greater ) ) ;
11141162 }
1163+
1164+ #[ test]
1165+ fn test_check_fields_unique ( ) -> Result < ( ) > {
1166+ // no duplicate fields, unqualified schema
1167+ check_fields_unique ( & DFSchema :: try_from ( Schema :: new ( vec ! [
1168+ Field :: new( "c100" , DataType :: Boolean , true ) ,
1169+ Field :: new( "c101" , DataType :: Boolean , true ) ,
1170+ ] ) ) ?) ?;
1171+
1172+ // no duplicate fields, qualified schema
1173+ check_fields_unique ( & DFSchema :: try_from_qualified_schema (
1174+ "t1" ,
1175+ & Schema :: new ( vec ! [
1176+ Field :: new( "c100" , DataType :: Boolean , true ) ,
1177+ Field :: new( "c101" , DataType :: Boolean , true ) ,
1178+ ] ) ,
1179+ ) ?) ?;
1180+
1181+ // duplicate unqualified field name
1182+ assert_eq ! (
1183+ check_fields_unique( & DFSchema :: try_from( Schema :: new( vec![
1184+ Field :: new( "c0" , DataType :: Boolean , true ) ,
1185+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1186+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1187+ Field :: new( "c2" , DataType :: Boolean , true ) ,
1188+ ] ) ) ?)
1189+ . unwrap_err( )
1190+ . strip_backtrace( )
1191+ . to_string( ) ,
1192+ "Schema error: Schema contains duplicate unqualified field name c1"
1193+ ) ;
1194+
1195+ // duplicate qualified field with same qualifier
1196+ assert_eq ! (
1197+ DFSchema :: try_from_qualified_schema(
1198+ "t1" ,
1199+ & Schema :: new( vec![
1200+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1201+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1202+ ] )
1203+ )
1204+ // if schema construction succeeds (due to future changes in DFSchema), call check_fields_unique on it
1205+ . unwrap_err( )
1206+ . strip_backtrace( )
1207+ . to_string( ) ,
1208+ "Schema error: Schema contains duplicate qualified field name t1.c1"
1209+ ) ;
1210+
1211+ // duplicate qualified and unqualified field
1212+ assert_eq ! (
1213+ DFSchema :: from_field_specific_qualified_schema(
1214+ vec![
1215+ None ,
1216+ Some ( TableReference :: from( "t1" ) ) ,
1217+ ] ,
1218+ & Arc :: new( Schema :: new( vec![
1219+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1220+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1221+ ] ) )
1222+ )
1223+ // if schema construction succeeds (due to future changes in DFSchema), call check_fields_unique on it
1224+ . unwrap_err( ) . strip_backtrace( ) . to_string( ) ,
1225+ "Schema error: Schema contains qualified field name t1.c1 and unqualified field name c1 which would be ambiguous"
1226+ ) ;
1227+
1228+ // qualified fields with duplicate unqualified names
1229+ assert_eq ! (
1230+ check_fields_unique( & DFSchema :: from_field_specific_qualified_schema(
1231+ vec![
1232+ Some ( TableReference :: from( "t1" ) ) ,
1233+ Some ( TableReference :: from( "t2" ) ) ,
1234+ ] ,
1235+ & Arc :: new( Schema :: new( vec![
1236+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1237+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1238+ ] ) )
1239+ ) ?)
1240+ . unwrap_err( ) . strip_backtrace( ) . to_string( ) ,
1241+ "Schema error: Schema contains qualified fields with duplicate unqualified names t1.c1"
1242+ ) ;
1243+
1244+ Ok ( ( ) )
1245+ }
11151246}
0 commit comments