Skip to content

Commit da0e218

Browse files
tianchenemkornfield
authored andcommitted
ARROW-7259: [Java] Support subfield encoder use different hasher
Related to [ARROW-7259](https://issues.apache.org/jira/browse/ARROW-7259). Currently ListSubFieldEncoder/StructSubFieldEncoder use default hasher for calculating hashCode. This issue enables them to use different hasher or even user-defined hasher for their own use cases just like DictionaryEncoder does. Closes apache#5899 from tianchen92/ARROW-7259 and squashes the following commits: 7140a4a <tianchen> ARROW-7259: Support subfield encoder use different hasher Authored-by: tianchen <niki.lj@alibaba-inc.com> Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
1 parent 6a40bc6 commit da0e218

2 files changed

Lines changed: 20 additions & 4 deletions

File tree

java/vector/src/main/java/org/apache/arrow/vector/dictionary/ListSubfieldEncoder.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import java.util.Collections;
2121

2222
import org.apache.arrow.memory.BufferAllocator;
23+
import org.apache.arrow.memory.util.hash.ArrowBufHasher;
24+
import org.apache.arrow.memory.util.hash.SimpleHasher;
2325
import org.apache.arrow.vector.BaseIntVector;
2426
import org.apache.arrow.vector.FieldVector;
2527
import org.apache.arrow.vector.ValueVector;
@@ -38,14 +40,18 @@ public class ListSubfieldEncoder {
3840
private final Dictionary dictionary;
3941
private final BufferAllocator allocator;
4042

43+
public ListSubfieldEncoder(Dictionary dictionary, BufferAllocator allocator) {
44+
this (dictionary, allocator, SimpleHasher.INSTANCE);
45+
}
46+
4147
/**
4248
* Construct an instance.
4349
*/
44-
public ListSubfieldEncoder(Dictionary dictionary, BufferAllocator allocator) {
50+
public ListSubfieldEncoder(Dictionary dictionary, BufferAllocator allocator, ArrowBufHasher hasher) {
4551
this.dictionary = dictionary;
4652
this.allocator = allocator;
4753
BaseListVector dictVector = (BaseListVector) dictionary.getVector();
48-
hashTable = new DictionaryHashTable(getDataVector(dictVector));
54+
hashTable = new DictionaryHashTable(getDataVector(dictVector), hasher);
4955
}
5056

5157
private FieldVector getDataVector(BaseListVector vector) {

java/vector/src/main/java/org/apache/arrow/vector/dictionary/StructSubfieldEncoder.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
import java.util.Map;
2424

2525
import org.apache.arrow.memory.BufferAllocator;
26+
import org.apache.arrow.memory.util.hash.ArrowBufHasher;
27+
import org.apache.arrow.memory.util.hash.SimpleHasher;
2628
import org.apache.arrow.util.Preconditions;
2729
import org.apache.arrow.vector.BaseIntVector;
2830
import org.apache.arrow.vector.FieldVector;
@@ -44,20 +46,28 @@ public class StructSubfieldEncoder {
4446
private final DictionaryProvider.MapDictionaryProvider provider;
4547
private final Map<Long, DictionaryHashTable> dictionaryIdToHashTable;
4648

49+
/**
50+
* Construct an instance.
51+
*/
52+
public StructSubfieldEncoder(BufferAllocator allocator, DictionaryProvider.MapDictionaryProvider provider) {
53+
this (allocator, provider, SimpleHasher.INSTANCE);
54+
}
55+
4756
/**
4857
* Construct an instance.
4958
*/
5059
public StructSubfieldEncoder(
5160
BufferAllocator allocator,
52-
DictionaryProvider.MapDictionaryProvider provider) {
61+
DictionaryProvider.MapDictionaryProvider provider,
62+
ArrowBufHasher hasher) {
5363

5464
this.allocator = allocator;
5565
this.provider = provider;
5666

5767
this.dictionaryIdToHashTable = new HashMap<>();
5868

5969
provider.getDictionaryIds().forEach(id ->
60-
dictionaryIdToHashTable.put(id, new DictionaryHashTable(provider.lookup(id).getVector())));
70+
dictionaryIdToHashTable.put(id, new DictionaryHashTable(provider.lookup(id).getVector(), hasher)));
6171
}
6272

6373
private FieldVector getChildVector(StructVector vector, int index) {

0 commit comments

Comments
 (0)