Skip to content

Commit 47e40ec

Browse files
xxlaykxxFiV0
andauthored
GH-87: [Vector] Add ExtensionWriter (apache#697) (#86)
* GH-87: [Vector] Add ExtensionWriter (apache#697) Based on changes from apache#41731. Added writer ExtensionWriter with 3 methods: - write method for writing values from Extension holders; - writeExtensionType method for writing values (arguments is Object because we don't know exact type); - addExtensionTypeFactory method - because the exact vector and value type are unknown, the user should create their own extension type vector, write for it, and ExtensionTypeFactory, which should map the vector and writer. Closes #87. Co-authored-by: Finn Völkel <finn.volkel@gmail.com> * just checking * Update Brewfile * revert * Fix for Cmake 3.xx installation * Fix for Cmake 3.xx installation * update thrift * update thrift url * update thrift url * Update github.yml * fix format --------- Co-authored-by: Finn Völkel <finn.volkel@gmail.com>
1 parent b02259e commit 47e40ec

25 files changed

Lines changed: 745 additions & 71 deletions

cpp/cmake_modules/ThirdpartyToolchain.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -817,6 +817,7 @@ if(DEFINED ENV{ARROW_THRIFT_URL})
817817
set(THRIFT_SOURCE_URL "$ENV{ARROW_THRIFT_URL}")
818818
else()
819819
set(THRIFT_SOURCE_URL
820+
"https://www.apache.org/dyn/closer.lua/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz?action=download"
820821
"https://dlcdn.apache.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
821822
)
822823
endif()

cpp/thirdparty/versions.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ DEPENDENCIES=(
164164
"ARROW_RE2_URL re2-${ARROW_RE2_BUILD_VERSION}.tar.gz https://github.com/google/re2/archive/${ARROW_RE2_BUILD_VERSION}.tar.gz"
165165
"ARROW_S2N_TLS_URL s2n-${ARROW_S2N_TLS_BUILD_VERSION}.tar.gz https://github.com/aws/s2n-tls/archive/${ARROW_S2N_TLS_BUILD_VERSION}.tar.gz"
166166
"ARROW_SNAPPY_URL snappy-${ARROW_SNAPPY_BUILD_VERSION}.tar.gz https://github.com/google/snappy/archive/${ARROW_SNAPPY_BUILD_VERSION}.tar.gz"
167-
"ARROW_THRIFT_URL thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz https://dlcdn.apache.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
167+
"ARROW_THRIFT_URL thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz https://www.apache.org/dyn/closer.lua/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz?action=download"
168168
"ARROW_UCX_URL ucx-${ARROW_UCX_BUILD_VERSION}.tar.gz https://github.com/openucx/ucx/archive/v${ARROW_UCX_BUILD_VERSION}.tar.gz"
169169
"ARROW_UTF8PROC_URL utf8proc-${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz https://github.com/JuliaStrings/utf8proc/archive/${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz"
170170
"ARROW_XSIMD_URL xsimd-${ARROW_XSIMD_BUILD_VERSION}.tar.gz https://github.com/xtensor-stack/xsimd/archive/${ARROW_XSIMD_BUILD_VERSION}.tar.gz"

dev/tasks/java-jars/github.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,18 @@ jobs:
148148
# used on test We uninstall Homebrew's Protobuf to ensure using
149149
# bundled Protobuf.
150150
brew uninstall protobuf
151+
# fix cmake and boost versionsAdd commentMore actions
152+
brew uninstall -f boost || true
153+
brew uninstall -f cmake || true
154+
mkdir -p homebrew-custom/Formula
155+
curl -o homebrew-custom/Formula/cmake.rb https://raw.githubusercontent.com/Homebrew/homebrew-core/f68532bfe5cb87474093df8a839c3818c6aa44dd/Formula/c/cmake.rb
156+
curl -o homebrew-custom/Formula/boost.rb https://raw.githubusercontent.com/Homebrew/homebrew-core/23f9c56c5075dd56b4471e2c93f89f6400b49ddd/Formula/b/boost.rb
157+
brew install -v ./homebrew-custom/Formula/cmake.rb
158+
brew install -v ./homebrew-custom/Formula/boost.rb
159+
brew pin cmake
160+
brew pin boost
161+
#
162+
151163
152164
brew bundle --file=arrow/java/Brewfile
153165
- name: Build C++ libraries

java/vector/src/main/codegen/templates/AbstractFieldWriter.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,16 @@ public void endEntry() {
107107
throw new IllegalStateException(String.format("You tried to end a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
108108
}
109109

110+
public void write(ExtensionHolder var1) {
111+
this.fail("ExtensionType");
112+
}
113+
public void writeExtension(Object var1) {
114+
this.fail("ExtensionType");
115+
}
116+
public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) {
117+
this.fail("ExtensionType");
118+
}
119+
110120
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
111121
<#assign fields = minor.fields!type.fields />
112122
<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
@@ -241,6 +251,18 @@ public MapWriter map(String name, boolean keysSorted) {
241251
fail("Map");
242252
return null;
243253
}
254+
255+
@Override
256+
public ExtensionWriter extension(String name, ArrowType arrowType) {
257+
fail("Extension");
258+
return null;
259+
}
260+
261+
@Override
262+
public ExtensionWriter extension(ArrowType arrowType) {
263+
fail("Extension");
264+
return null;
265+
}
244266
<#list vv.types as type><#list type.minor as minor>
245267
<#assign lowerName = minor.class?uncap_first />
246268
<#if lowerName == "int" ><#assign lowerName = "integer" /></#if>

java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,11 @@ public MapWriter map(boolean keysSorted) {
293293
return getWriter(MinorType.MAP, new ArrowType.Map(keysSorted));
294294
}
295295
296+
@Override
297+
public ExtensionWriter extension(ArrowType arrowType) {
298+
return getWriter(MinorType.EXTENSIONTYPE).extension(arrowType);
299+
}
300+
296301
@Override
297302
public StructWriter struct(String name) {
298303
return getWriter(MinorType.STRUCT).struct(name);
@@ -318,6 +323,11 @@ public MapWriter map(String name, boolean keysSorted) {
318323
return getWriter(MinorType.STRUCT).map(name, keysSorted);
319324
}
320325
326+
@Override
327+
public ExtensionWriter extension(String name, ArrowType arrowType) {
328+
return getWriter(MinorType.EXTENSIONTYPE).extension(name, arrowType);
329+
}
330+
321331
<#list vv.types as type><#list type.minor as minor>
322332
<#assign lowerName = minor.class?uncap_first />
323333
<#if lowerName == "int" ><#assign lowerName = "integer" /></#if>

java/vector/src/main/codegen/templates/BaseWriter.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ public interface StructWriter extends BaseWriter {
6161
6262
void copyReaderToField(String name, FieldReader reader);
6363
StructWriter struct(String name);
64+
ExtensionWriter extension(String name, ArrowType arrowType);
6465
ListWriter list(String name);
6566
ListWriter listView(String name);
6667
MapWriter map(String name);
@@ -79,6 +80,7 @@ public interface ListWriter extends BaseWriter {
7980
ListWriter listView();
8081
MapWriter map();
8182
MapWriter map(boolean keysSorted);
83+
ExtensionWriter extension(ArrowType arrowType);
8284
void copyReader(FieldReader reader);
8385
8486
<#list vv.types as type><#list type.minor as minor>
@@ -101,6 +103,35 @@ public interface MapWriter extends ListWriter {
101103
MapWriter value();
102104
}
103105

106+
public interface ExtensionWriter extends BaseWriter {
107+
108+
/**
109+
* Writes a null value.
110+
*/
111+
void writeNull();
112+
113+
/**
114+
* Writes value from the given extension holder.
115+
*
116+
* @param holder the extension holder to write
117+
*/
118+
void write(ExtensionHolder holder);
119+
120+
/**
121+
* Writes the given extension type value.
122+
*
123+
* @param value the extension type value to write
124+
*/
125+
void writeExtension(Object value);
126+
127+
/**
128+
* Adds the given extension type factory. This factory allows configuring writer implementations for specific ExtensionTypeVector.
129+
*
130+
* @param factory the extension type factory to add
131+
*/
132+
void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory);
133+
}
134+
104135
public interface ScalarWriter extends
105136
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Writer, </#list></#list> BaseWriter {}
106137

java/vector/src/main/codegen/templates/PromotableWriter.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,9 @@ protected void setWriter(ValueVector v) {
285285
case UNION:
286286
writer = new UnionWriter((UnionVector) vector, nullableStructWriterFactory);
287287
break;
288+
case EXTENSIONTYPE:
289+
writer = new UnionExtensionWriter((ExtensionTypeVector) vector);
290+
break;
288291
default:
289292
writer = type.getNewFieldWriter(vector);
290293
break;
@@ -316,6 +319,7 @@ protected boolean requiresArrowType(MinorType type) {
316319
|| type == MinorType.MAP
317320
|| type == MinorType.DURATION
318321
|| type == MinorType.FIXEDSIZEBINARY
322+
|| type == MinorType.EXTENSIONTYPE
319323
|| (type.name().startsWith("TIMESTAMP") && type.name().endsWith("TZ"));
320324
}
321325

@@ -536,6 +540,16 @@ public void writeLargeVarChar(String value) {
536540
getWriter(MinorType.LARGEVARCHAR).writeLargeVarChar(value);
537541
}
538542

543+
@Override
544+
public void writeExtension(Object value) {
545+
getWriter(MinorType.EXTENSIONTYPE).writeExtension(value);
546+
}
547+
548+
@Override
549+
public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory) {
550+
getWriter(MinorType.EXTENSIONTYPE).addExtensionTypeWriterFactory(factory);
551+
}
552+
539553
@Override
540554
public void allocate() {
541555
getWriter().allocate();

java/vector/src/main/codegen/templates/StructWriters.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ public class ${mode}StructWriter extends AbstractFieldWriter {
8383
fields.put(handleCase(child.getName()), writer);
8484
break;
8585
}
86+
case EXTENSIONTYPE:
87+
extension(child.getName(), child.getType());
88+
break;
8689
case UNION:
8790
FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.UNION.getType(), null, null);
8891
UnionWriter writer = new UnionWriter(container.addOrGet(child.getName(), fieldType, UnionVector.class), getNullableStructWriterFactory());
@@ -159,6 +162,29 @@ public StructWriter struct(String name) {
159162
return writer;
160163
}
161164

165+
@Override
166+
public ExtensionWriter extension(String name, ArrowType arrowType) {
167+
String finalName = handleCase(name);
168+
FieldWriter writer = fields.get(finalName);
169+
if (writer == null) {
170+
int vectorCount=container.size();
171+
FieldType fieldType = new FieldType(addVectorAsNullable, arrowType, null, null);
172+
ExtensionTypeVector vector = container.addOrGet(name, fieldType, ExtensionTypeVector.class);
173+
writer = new PromotableWriter(vector, container, getNullableStructWriterFactory());
174+
if (vectorCount != container.size()) {
175+
writer.allocate();
176+
}
177+
writer.setPosition(idx());
178+
fields.put(finalName, writer);
179+
} else {
180+
if (writer instanceof PromotableWriter) {
181+
// ensure writers are initialized
182+
((PromotableWriter)writer).getWriter(MinorType.EXTENSIONTYPE, arrowType);
183+
}
184+
}
185+
return (ExtensionWriter) writer;
186+
}
187+
162188
@Override
163189
public void close() throws Exception {
164190
clear();

java/vector/src/main/codegen/templates/UnionListWriter.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,18 @@ public MapWriter map(String name, boolean keysSorted) {
201201
return mapWriter;
202202
}
203203
204+
@Override
205+
public ExtensionWriter extension(ArrowType arrowType) {
206+
writer.extension(arrowType);
207+
return writer;
208+
}
209+
210+
@Override
211+
public ExtensionWriter extension(String name, ArrowType arrowType) {
212+
ExtensionWriter extensionWriter = writer.extension(name, arrowType);
213+
return extensionWriter;
214+
}
215+
204216
<#if listName == "LargeList">
205217
@Override
206218
public void startList() {
@@ -323,6 +335,20 @@ public void writeNull() {
323335
}
324336
}
325337
338+
@Override
339+
public void writeExtension(Object value) {
340+
writer.writeExtension(value);
341+
}
342+
343+
@Override
344+
public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) {
345+
writer.addExtensionTypeWriterFactory(var1);
346+
}
347+
348+
public void write(ExtensionHolder var1) {
349+
writer.write(var1);
350+
}
351+
326352
<#list vv.types as type>
327353
<#list type.minor as minor>
328354
<#assign name = minor.class?cap_first />

java/vector/src/main/codegen/templates/UnionMapWriter.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,4 +231,16 @@ public MapWriter map() {
231231
return super.map();
232232
}
233233
}
234+
235+
@Override
236+
public ExtensionWriter extension(ArrowType type) {
237+
switch (mode) {
238+
case KEY:
239+
return entryWriter.extension(MapVector.KEY_NAME, type);
240+
case VALUE:
241+
return entryWriter.extension(MapVector.VALUE_NAME, type);
242+
default:
243+
return super.extension(type);
244+
}
245+
}
234246
}

0 commit comments

Comments
 (0)