Skip to content

Commit 8e5bad7

Browse files
authored
fix bigtable schema caching (#23)
Signed-off-by: Oleksii Moskalenko <moskalenko.alexey@gmail.com>
1 parent 8f41e36 commit 8e5bad7

File tree

2 files changed

+37
-13
lines changed

2 files changed

+37
-13
lines changed

storage/connectors/bigtable/src/main/java/feast/storage/connectors/bigtable/retriever/BigTableOnlineRetriever.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
import java.util.stream.Collectors;
3636
import java.util.stream.StreamSupport;
3737
import org.apache.avro.AvroRuntimeException;
38-
import org.apache.avro.Schema;
3938
import org.apache.avro.generic.GenericDatumReader;
4039
import org.apache.avro.generic.GenericRecord;
4140
import org.apache.avro.io.*;
@@ -138,6 +137,7 @@ private List<Feature> decodeFeatures(
138137
String tableName,
139138
ByteString value,
140139
List<FeatureReferenceV2> featureReferences,
140+
BinaryDecoder reusedDecoder,
141141
long timestamp)
142142
throws IOException {
143143
ByteString schemaReferenceBytes = value.substring(0, 4);
@@ -146,11 +146,10 @@ private List<Feature> decodeFeatures(
146146
BigTableSchemaRegistry.SchemaReference schemaReference =
147147
new BigTableSchemaRegistry.SchemaReference(tableName, schemaReferenceBytes);
148148

149-
Schema schema = schemaRegistry.getSchema(schemaReference);
149+
GenericDatumReader<GenericRecord> reader = schemaRegistry.getReader(schemaReference);
150150

151-
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
152-
Decoder decoder = DecoderFactory.get().binaryDecoder(featureValueBytes, null);
153-
GenericRecord record = reader.read(null, decoder);
151+
reusedDecoder = DecoderFactory.get().binaryDecoder(featureValueBytes, reusedDecoder);
152+
GenericRecord record = reader.read(null, reusedDecoder);
154153

155154
return featureReferences.stream()
156155
.map(
@@ -237,6 +236,8 @@ private List<List<Feature>> convertRowToFeature(
237236
Map<ByteString, Row> rows,
238237
List<FeatureReferenceV2> featureReferences) {
239238

239+
BinaryDecoder reusedDecoder = DecoderFactory.get().binaryDecoder(new byte[0], null);
240+
240241
return rowKeys.stream()
241242
.map(
242243
rowKey -> {
@@ -269,6 +270,7 @@ private List<List<Feature>> convertRowToFeature(
269270
tableName,
270271
value,
271272
localFeatureReferences,
273+
reusedDecoder,
272274
rowCell.getTimestamp());
273275
} catch (IOException e) {
274276
throw new RuntimeException("Failed to decode features from BigTable");

storage/connectors/bigtable/src/main/java/feast/storage/connectors/bigtable/retriever/BigTableSchemaRegistry.java

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,12 @@
2727
import com.google.protobuf.ByteString;
2828
import java.util.concurrent.ExecutionException;
2929
import org.apache.avro.Schema;
30+
import org.apache.avro.generic.GenericDatumReader;
31+
import org.apache.avro.generic.GenericRecord;
3032

3133
public class BigTableSchemaRegistry {
3234
private final BigtableDataClient client;
33-
private final LoadingCache<SchemaReference, Schema> cache;
35+
private final LoadingCache<SchemaReference, GenericDatumReader<GenericRecord>> cache;
3436

3537
private static String COLUMN_FAMILY = "metadata";
3638
private static String QUALIFIER = "avro";
@@ -52,34 +54,54 @@ public String getTableName() {
5254
public ByteString getSchemaHash() {
5355
return schemaHash;
5456
}
57+
58+
@Override
59+
public int hashCode() {
60+
int result = tableName.hashCode();
61+
result = 31 * result + schemaHash.hashCode();
62+
return result;
63+
}
64+
65+
@Override
66+
public boolean equals(Object o) {
67+
if (this == o) return true;
68+
if (o == null || getClass() != o.getClass()) return false;
69+
70+
SchemaReference that = (SchemaReference) o;
71+
72+
if (!tableName.equals(that.tableName)) return false;
73+
return schemaHash.equals(that.schemaHash);
74+
}
5575
}
5676

5777
public BigTableSchemaRegistry(BigtableDataClient client) {
5878
this.client = client;
5979

60-
CacheLoader<SchemaReference, Schema> schemaCacheLoader = CacheLoader.from(this::loadSchema);
80+
CacheLoader<SchemaReference, GenericDatumReader<GenericRecord>> schemaCacheLoader =
81+
CacheLoader.from(this::loadReader);
6182

6283
cache = CacheBuilder.newBuilder().build(schemaCacheLoader);
6384
}
6485

65-
public Schema getSchema(SchemaReference reference) {
66-
Schema schema;
86+
public GenericDatumReader<GenericRecord> getReader(SchemaReference reference) {
87+
GenericDatumReader<GenericRecord> reader;
6788
try {
68-
schema = this.cache.get(reference);
89+
reader = this.cache.get(reference);
6990
} catch (ExecutionException | CacheLoader.InvalidCacheLoadException e) {
7091
throw new RuntimeException(String.format("Unable to find Schema"), e);
7192
}
72-
return schema;
93+
return reader;
7394
}
7495

75-
private Schema loadSchema(SchemaReference reference) {
96+
private GenericDatumReader<GenericRecord> loadReader(SchemaReference reference) {
7697
Row row =
7798
client.readRow(
7899
reference.getTableName(),
79100
ByteString.copyFrom(KEY_PREFIX.getBytes()).concat(reference.getSchemaHash()),
80101
Filters.FILTERS.family().exactMatch(COLUMN_FAMILY));
81102
RowCell last = Iterables.getLast(row.getCells(COLUMN_FAMILY, QUALIFIER));
82103

83-
return new Schema.Parser().parse(last.getValue().toStringUtf8());
104+
Schema schema = new Schema.Parser().parse(last.getValue().toStringUtf8());
105+
return new GenericDatumReader<>(schema);
84106
}
85107
}

0 commit comments

Comments
 (0)