Skip to content

Commit 5aaf700

Browse files
Mike PigottPindikura Ravindra
authored andcommitted
ARROW-4142: [Java] JDBC Array -> Arrow ListVector
https://issues.apache.org/jira/browse/ARROW-4142 This adds support for reading JDBC arrays and converting them to Arrow ListVectors. JDBC does not provide a great way to get the array type; there is a ResultSet object for walking the array, but its ResultSetMetaData may not contain the right value type (H2, for example, returns a JDBC type of NULL). This is based on #3134, which includes ARROW-3965 and ARROW-3966. I found Arrow arrays to be very confusing, and I am not sure if I am using them correctly here.  One thing I noticed was if I added a null array to a ListVector of VarCharVectors, the next value in the VarCharVector would be empty.  I would appreciate any help on why!  The ListVector unit tests weren't very helpful. For all other cases, this code seems to work.  I look forward to your review! Author: Mike Pigott <mpigott@gmail.com> Closes #3294 from mikepigott/jdbc-array-field and squashes the following commits: 66376dd <Mike Pigott> Support for reading Array records to ListVector from JDBC
1 parent 24a98e4 commit 5aaf700

9 files changed

Lines changed: 1043 additions & 238 deletions

File tree

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.arrow.adapter.jdbc;
19+
20+
import java.sql.ResultSetMetaData;
21+
import java.sql.SQLException;
22+
import java.sql.Types;
23+
24+
import com.google.common.base.Preconditions;
25+
26+
/**
27+
* This class represents the information about a JDBC ResultSet Field that is
28+
* needed to construct an {@link org.apache.arrow.vector.types.pojo.ArrowType}.
29+
* Currently, this is:
30+
* <ul>
31+
* <li>The JDBC {@link java.sql.Types} type.</li>
32+
* <li>The field's precision (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types)</li>
33+
* <li>The field's scale (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types)</li>
34+
* </ul>
35+
*/
36+
public class JdbcFieldInfo {
37+
private final int jdbcType;
38+
private final int precision;
39+
private final int scale;
40+
41+
/**
42+
* Builds a <code>JdbcFieldInfo</code> using only the {@link java.sql.Types} type. Do not use this constructor
43+
* if the field type is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}; the precision and
44+
* scale will be set to <code>0</code>.
45+
*
46+
* @param jdbcType The {@link java.sql.Types} type.
47+
* @throws IllegalArgumentException if jdbcType is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}.
48+
*/
49+
public JdbcFieldInfo(int jdbcType) {
50+
Preconditions.checkArgument(
51+
(jdbcType != Types.DECIMAL && jdbcType != Types.NUMERIC),
52+
"DECIMAL and NUMERIC types require a precision and scale; please use another constructor.");
53+
54+
this.jdbcType = jdbcType;
55+
this.precision = 0;
56+
this.scale = 0;
57+
}
58+
59+
/**
60+
* Builds a <code>JdbcFieldInfo</code> from the {@link java.sql.Types} type, precision, and scale.
61+
* Use this constructor for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types.
62+
*
63+
* @param jdbcType The {@link java.sql.Types} type.
64+
* @param precision The field's numeric precision.
65+
* @param scale The field's numeric scale.
66+
*/
67+
public JdbcFieldInfo(int jdbcType, int precision, int scale) {
68+
this.jdbcType = jdbcType;
69+
this.precision = precision;
70+
this.scale = scale;
71+
}
72+
73+
/**
74+
* Builds a <code>JdbcFieldInfo</code> from the corresponding {@link java.sql.ResultSetMetaData} column.
75+
*
76+
* @param rsmd The {@link java.sql.ResultSetMetaData} to get the field information from.
77+
* @param column The column to get the field information for (on a 1-based index).
78+
* @throws SQLException If the column information cannot be retrieved.
79+
* @throws NullPointerException if <code>rsmd</code> is <code>null</code>.
80+
* @throws IllegalArgumentException if <code>column</code> is out of bounds.
81+
*/
82+
public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException {
83+
Preconditions.checkNotNull(rsmd, "ResultSetMetaData cannot be null.");
84+
Preconditions.checkArgument(column > 0, "ResultSetMetaData columns have indices starting at 1.");
85+
Preconditions.checkArgument(
86+
column <= rsmd.getColumnCount(),
87+
"The index must be within the number of columns (1 to %s, inclusive)", rsmd.getColumnCount());
88+
89+
this.jdbcType = rsmd.getColumnType(column);
90+
this.precision = rsmd.getPrecision(column);
91+
this.scale = rsmd.getScale(column);
92+
}
93+
94+
/**
95+
* The {@link java.sql.Types} type.
96+
*/
97+
public int getJdbcType() {
98+
return jdbcType;
99+
}
100+
101+
/**
102+
* The numeric precision, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types.
103+
*/
104+
public int getPrecision() {
105+
return precision;
106+
}
107+
108+
/**
109+
* The numeric scale, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types.
110+
*/
111+
public int getScale() {
112+
return scale;
113+
}
114+
}

java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, B
8888
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
8989

9090
JdbcToArrowConfig config =
91-
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar(), false);
91+
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
9292
return sqlToArrow(connection, query, config);
9393
}
9494

@@ -116,7 +116,7 @@ public static VectorSchemaRoot sqlToArrow(
116116
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
117117
Preconditions.checkNotNull(calendar, "Calendar object can not be null");
118118

119-
return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar, false));
119+
return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar));
120120
}
121121

122122
/**
@@ -170,7 +170,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
170170
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
171171

172172
JdbcToArrowConfig config =
173-
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar(), false);
173+
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
174174
return sqlToArrow(resultSet, config);
175175
}
176176

@@ -184,8 +184,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
184184
*/
185185
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException {
186186
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
187-
188-
return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar, false));
187+
return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar));
189188
}
190189

191190
/**
@@ -205,7 +204,7 @@ public static VectorSchemaRoot sqlToArrow(
205204
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
206205
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
207206

208-
return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar, false));
207+
return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar));
209208
}
210209

211210
/**

java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.arrow.adapter.jdbc;
1919

2020
import java.util.Calendar;
21+
import java.util.Map;
2122

2223
import org.apache.arrow.memory.BaseAllocator;
2324

@@ -28,16 +29,29 @@
2829
* <p>
2930
* The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot},
3031
* and the calendar is used to define the time zone of any {@link org.apahe.arrow.vector.pojo.ArrowType.Timestamp}
31-
* fields that are created during the conversion.
32+
* fields that are created during the conversion. Neither field may be <code>null</code>.
3233
* </p>
3334
* <p>
34-
* Neither field may be <code>null</code>.
35+
* If the <code>includeMetadata</code> flag is set, the Arrow field metadata will contain information
36+
* from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the
37+
* {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding
38+
* {@link org.apache.arrow.vector.FieldVector}.
39+
* </p>
40+
* <p>
41+
* If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the corresponding
42+
* {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, the sub-type
43+
* information cannot be retrieved from all JDBC implementations (H2 for example, returns
44+
* {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The column index
45+
* or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the conversion.
3546
* </p>
3647
*/
3748
public final class JdbcToArrowConfig {
49+
3850
private Calendar calendar;
3951
private BaseAllocator allocator;
4052
private boolean includeMetadata;
53+
private Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex;
54+
private Map<String, JdbcFieldInfo> arraySubTypesByColumnName;
4155

4256
/**
4357
* Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code>
@@ -46,20 +60,47 @@ public final class JdbcToArrowConfig {
4660
*
4761
* @param allocator The memory allocator to construct the Arrow vectors with.
4862
* @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
49-
* @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata.
5063
*/
51-
JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) {
64+
JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar) {
5265
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
5366

5467
this.allocator = allocator;
5568
this.calendar = calendar;
69+
this.includeMetadata = false;
70+
this.arraySubTypesByColumnIndex = null;
71+
this.arraySubTypesByColumnName = null;
72+
}
73+
74+
/**
75+
* Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code>
76+
* is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define
77+
* Arrow Timestamp fields, and to read time-based fields from the JDBC <code>ResultSet</code>.
78+
*
79+
* @param allocator The memory allocator to construct the Arrow vectors with.
80+
* @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
81+
* @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata.
82+
* @param arraySubTypesByColumnIndex The type of the JDBC array at the column index (1-based).
83+
* @param arraySubTypesByColumnName The type of the JDBC array at the column name.
84+
*/
85+
JdbcToArrowConfig(
86+
BaseAllocator allocator,
87+
Calendar calendar,
88+
boolean includeMetadata,
89+
Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex,
90+
Map<String, JdbcFieldInfo> arraySubTypesByColumnName) {
91+
92+
this(allocator, calendar);
93+
5694
this.includeMetadata = includeMetadata;
95+
this.arraySubTypesByColumnIndex = arraySubTypesByColumnIndex;
96+
this.arraySubTypesByColumnName = arraySubTypesByColumnName;
5797
}
5898

5999
/**
60100
* The calendar to use when defining Arrow Timestamp fields
61101
* and retrieving {@link Date}, {@link Time}, or {@link Timestamp}
62102
* data types from the {@link ResultSet}, or <code>null</code> if not converting.
103+
*
63104
* @return the calendar.
64105
*/
65106
public Calendar getCalendar() {
@@ -82,4 +123,32 @@ public BaseAllocator getAllocator() {
82123
public boolean shouldIncludeMetadata() {
83124
return includeMetadata;
84125
}
126+
127+
/**
128+
* Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index.
129+
*
130+
* @param index The {@link java.sql.ResultSetMetaData} column index of an {@link java.sql.Types#ARRAY} type.
131+
* @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined.
132+
*/
133+
public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) {
134+
if (arraySubTypesByColumnIndex == null) {
135+
return null;
136+
} else {
137+
return arraySubTypesByColumnIndex.get(index);
138+
}
139+
}
140+
141+
/**
142+
* Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name.
143+
*
144+
* @param index The {@link java.sql.ResultSetMetaData} column name of an {@link java.sql.Types#ARRAY} type.
145+
* @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined.
146+
*/
147+
public JdbcFieldInfo getArraySubTypeByColumnName(String name) {
148+
if (arraySubTypesByColumnName == null) {
149+
return null;
150+
} else {
151+
return arraySubTypesByColumnName.get(name);
152+
}
153+
}
85154
}

java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.arrow.adapter.jdbc;
1919

2020
import java.util.Calendar;
21+
import java.util.Map;
2122

2223
import org.apache.arrow.memory.BaseAllocator;
2324

@@ -30,6 +31,8 @@ public class JdbcToArrowConfigBuilder {
3031
private Calendar calendar;
3132
private BaseAllocator allocator;
3233
private boolean includeMetadata;
34+
private Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex;
35+
private Map<String, JdbcFieldInfo> arraySubTypesByColumnName;
3336

3437
/**
3538
* Default constructor for the <code>JdbcToArrowConfigBuilder}</code>.
@@ -40,6 +43,8 @@ public JdbcToArrowConfigBuilder() {
4043
this.allocator = null;
4144
this.calendar = null;
4245
this.includeMetadata = false;
46+
this.arraySubTypesByColumnIndex = null;
47+
this.arraySubTypesByColumnName = null;
4348
}
4449

4550
/**
@@ -126,6 +131,29 @@ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) {
126131
return this;
127132
}
128133

134+
/**
135+
* Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
136+
* The column index is 1-based, to match the JDBC column index.
137+
*
138+
* @param map The mapping.
139+
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
140+
*/
141+
public JdbcToArrowConfigBuilder setArraySubTypeByColumnIndexMap(Map<Integer, JdbcFieldInfo> map) {
142+
this.arraySubTypesByColumnIndex = map;
143+
return this;
144+
}
145+
146+
/**
147+
* Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
148+
*
149+
* @param map The mapping.
150+
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
151+
*/
152+
public JdbcToArrowConfigBuilder setArraySubTypeByColumnNameMap(Map<String, JdbcFieldInfo> map) {
153+
this.arraySubTypesByColumnName = map;
154+
return this;
155+
}
156+
129157
/**
130158
* This builds the {@link JdbcToArrowConfig} from the provided
131159
* {@link BaseAllocator} and {@link Calendar}.
@@ -134,6 +162,11 @@ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) {
134162
* @throws NullPointerException if either the allocator or calendar was not set.
135163
*/
136164
public JdbcToArrowConfig build() {
137-
return new JdbcToArrowConfig(allocator, calendar, includeMetadata);
165+
return new JdbcToArrowConfig(
166+
allocator,
167+
calendar,
168+
includeMetadata,
169+
arraySubTypesByColumnIndex,
170+
arraySubTypesByColumnName);
138171
}
139172
}

0 commit comments

Comments
 (0)