Skip to content

Commit d100fdf

Browse files
authored
Fix SSTable name length restrictions during retrieval (feast-dev#31)
* Add name length restrictions for retrieval Signed-off-by: Terence Lim <terencelimxp@gmail.com> * Add hash suffix logic Signed-off-by: Terence Lim <terencelimxp@gmail.com> * Add superlong hash suffix IT Signed-off-by: Terence Lim <terencelimxp@gmail.com> * Address comments Signed-off-by: Terence Lim <terencelimxp@gmail.com> * Update IT Signed-off-by: Terence Lim <terencelimxp@gmail.com>
1 parent baf15da commit d100fdf

File tree

3 files changed

+168
-2
lines changed

3 files changed

+168
-2
lines changed

serving/src/test/java/feast/serving/it/ServingServiceBigTableIT.java

Lines changed: 130 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,21 @@ static void globalSetup() throws IOException {
158158
.build();
159159
TestUtils.applyEntity(coreClient, projectName, driverEntitySpec);
160160

161+
// Apply Entity (this_is_a_long_long_long_long_long_long_entity_id)
162+
String superLongEntityName = "this_is_a_long_long_long_long_long_long_entity_id";
163+
String superLongEntityDescription = "My super long entity id";
164+
ValueProto.ValueType.Enum superLongEntityType = ValueProto.ValueType.Enum.INT64;
165+
EntityProto.EntitySpecV2 superLongEntitySpec =
166+
EntityProto.EntitySpecV2.newBuilder()
167+
.setName(superLongEntityName)
168+
.setDescription(superLongEntityDescription)
169+
.setValueType(superLongEntityType)
170+
.build();
171+
TestUtils.applyEntity(coreClient, projectName, superLongEntitySpec);
172+
161173
// Apply Entity (merchant_id)
162174
String merchantEntityName = "merchant_id";
163-
String merchantEntityDescription = "My driver id";
175+
String merchantEntityDescription = "My merchant id";
164176
ValueProto.ValueType.Enum merchantEntityType = ValueProto.ValueType.Enum.INT64;
165177
EntityProto.EntitySpecV2 merchantEntitySpec =
166178
EntityProto.EntitySpecV2.newBuilder()
@@ -186,6 +198,27 @@ static void globalSetup() throws IOException {
186198
TestUtils.applyFeatureTable(
187199
coreClient, projectName, ridesFeatureTableName, ridesEntities, ridesFeatures, 7200);
188200

201+
// Apply FeatureTable (superLong)
202+
String superLongFeatureTableName = "superlong";
203+
ImmutableList<String> superLongEntities = ImmutableList.of(superLongEntityName);
204+
ImmutableMap<String, ValueProto.ValueType.Enum> superLongFeatures =
205+
ImmutableMap.of(
206+
"trip_cost",
207+
ValueProto.ValueType.Enum.INT64,
208+
"trip_distance",
209+
ValueProto.ValueType.Enum.DOUBLE,
210+
"trip_empty",
211+
ValueProto.ValueType.Enum.DOUBLE,
212+
"trip_wrong_type",
213+
ValueProto.ValueType.Enum.STRING);
214+
TestUtils.applyFeatureTable(
215+
coreClient,
216+
projectName,
217+
superLongFeatureTableName,
218+
superLongEntities,
219+
superLongFeatures,
220+
7200);
221+
189222
// Apply FeatureTable (rides_merchant)
190223
String rideMerchantFeatureTableName = "rides_merchant";
191224
ImmutableList<String> ridesMerchantEntities =
@@ -199,6 +232,13 @@ static void globalSetup() throws IOException {
199232
7200);
200233

201234
// BigTable Table names
235+
String superLongBtTableName = String.format("%s__%s", projectName, superLongEntityName);
236+
String hashSuffix =
237+
Hashing.murmur3_32().hashBytes(superLongBtTableName.substring(42).getBytes()).toString();
238+
superLongBtTableName =
239+
superLongBtTableName
240+
.substring(0, Math.min(superLongBtTableName.length(), 42))
241+
.concat(hashSuffix);
202242
String btTableName = String.format("%s__%s", projectName, driverEntityName);
203243
String compoundBtTableName =
204244
String.format(
@@ -237,6 +277,39 @@ static void globalSetup() throws IOException {
237277
ingestData(
238278
featureTableName, btTableName, entityFeatureKey, entityFeatureValue, schemaKey, ftSchema);
239279

280+
/** SuperLong Entity Ingestion Workflow */
281+
Schema superLongFtSchema =
282+
SchemaBuilder.record("SuperLongData")
283+
.namespace(superLongFeatureTableName)
284+
.fields()
285+
.requiredLong(feature1Reference.getName())
286+
.requiredDouble(feature2Reference.getName())
287+
.nullableString(feature3Reference.getName(), "null")
288+
.requiredString(feature4Reference.getName())
289+
.endRecord();
290+
byte[] superLongSchemaReference =
291+
Hashing.murmur3_32().hashBytes(superLongFtSchema.toString().getBytes()).asBytes();
292+
293+
GenericRecord superLongRecord =
294+
new GenericRecordBuilder(superLongFtSchema)
295+
.set("trip_cost", 5L)
296+
.set("trip_distance", 3.5)
297+
.set("trip_empty", null)
298+
.set("trip_wrong_type", "test")
299+
.build();
300+
byte[] superLongEntityFeatureKey =
301+
String.valueOf(DataGenerator.createInt64Value(1).getInt64Val()).getBytes();
302+
byte[] superLongEntityFeatureValue =
303+
createEntityValue(superLongFtSchema, superLongSchemaReference, superLongRecord);
304+
byte[] superLongSchemaKey = createSchemaKey(superLongSchemaReference);
305+
ingestData(
306+
superLongFeatureTableName,
307+
superLongBtTableName,
308+
superLongEntityFeatureKey,
309+
superLongEntityFeatureValue,
310+
superLongSchemaKey,
311+
superLongFtSchema);
312+
240313
/** Compound Entity Ingestion Workflow */
241314
Schema compoundFtSchema =
242315
SchemaBuilder.record("DriverMerchantData")
@@ -726,6 +799,62 @@ public void shouldSupportAllFeastTypes() throws IOException {
726799
.allMatch(status -> status.equals(GetOnlineFeaturesResponse.FieldStatus.PRESENT));
727800
}
728801

802+
@Test
803+
public void shouldRegisterSuperLongEntityAndGetOnlineFeatures() {
804+
// getOnlineFeatures Information
805+
String projectName = "default";
806+
String entityName = "this_is_a_long_long_long_long_long_long_entity_id";
807+
ValueProto.Value entityValue = ValueProto.Value.newBuilder().setInt64Val(1).build();
808+
809+
// Instantiate EntityRows
810+
GetOnlineFeaturesRequestV2.EntityRow entityRow1 =
811+
DataGenerator.createEntityRow(entityName, DataGenerator.createInt64Value(1), 100);
812+
ImmutableList<GetOnlineFeaturesRequestV2.EntityRow> entityRows = ImmutableList.of(entityRow1);
813+
814+
// Instantiate FeatureReferences
815+
FeatureReferenceV2 featureReference =
816+
DataGenerator.createFeatureReference("superlong", "trip_cost");
817+
FeatureReferenceV2 notFoundFeatureReference =
818+
DataGenerator.createFeatureReference("superlong", "trip_transaction");
819+
820+
ImmutableList<FeatureReferenceV2> featureReferences =
821+
ImmutableList.of(featureReference, notFoundFeatureReference);
822+
823+
// Build GetOnlineFeaturesRequestV2
824+
GetOnlineFeaturesRequestV2 onlineFeatureRequest =
825+
TestUtils.createOnlineFeatureRequest(projectName, featureReferences, entityRows);
826+
GetOnlineFeaturesResponse featureResponse =
827+
servingStub.getOnlineFeaturesV2(onlineFeatureRequest);
828+
829+
ImmutableMap<String, ValueProto.Value> expectedValueMap =
830+
ImmutableMap.of(
831+
entityName,
832+
entityValue,
833+
FeatureV2.getFeatureStringRef(featureReference),
834+
DataGenerator.createInt64Value(5),
835+
FeatureV2.getFeatureStringRef(notFoundFeatureReference),
836+
DataGenerator.createEmptyValue());
837+
838+
ImmutableMap<String, GetOnlineFeaturesResponse.FieldStatus> expectedStatusMap =
839+
ImmutableMap.of(
840+
entityName,
841+
GetOnlineFeaturesResponse.FieldStatus.PRESENT,
842+
FeatureV2.getFeatureStringRef(featureReference),
843+
GetOnlineFeaturesResponse.FieldStatus.PRESENT,
844+
FeatureV2.getFeatureStringRef(notFoundFeatureReference),
845+
GetOnlineFeaturesResponse.FieldStatus.NOT_FOUND);
846+
847+
GetOnlineFeaturesResponse.FieldValues expectedFieldValues =
848+
GetOnlineFeaturesResponse.FieldValues.newBuilder()
849+
.putAllFields(expectedValueMap)
850+
.putAllStatuses(expectedStatusMap)
851+
.build();
852+
ImmutableList<GetOnlineFeaturesResponse.FieldValues> expectedFieldValuesList =
853+
ImmutableList.of(expectedFieldValues);
854+
855+
assertEquals(expectedFieldValuesList, featureResponse.getFieldValuesList());
856+
}
857+
729858
@TestConfiguration
730859
public static class TestConfig {
731860
@Bean

storage/connectors/cassandra/src/main/java/feast/storage/connectors/cassandra/retriever/CassandraOnlineRetriever.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ public class CassandraOnlineRetriever implements SSTableOnlineRetriever<ByteBuff
5151
private static final String ENTITY_KEY = "key";
5252
private static final String SCHEMA_REF_SUFFIX = "__schema_ref";
5353
private static final String EVENT_TIMESTAMP_SUFFIX = "__event_timestamp";
54+
private static final int MAX_TABLE_NAME_LENGTH = 48;
5455

5556
public CassandraOnlineRetriever(CqlSession session) {
5657
this.session = session;
@@ -74,6 +75,19 @@ public ByteBuffer convertEntityValueToKey(EntityRow entityRow, List<String> enti
7475
.getBytes());
7576
}
7677

78+
/**
79+
* Generate Cassandra table name, with limit of 48 characters.
80+
*
81+
* @param project Name of Feast project
82+
* @param entityNames List of entities used in retrieval call
83+
* @return Cassandra table name for retrieval
84+
*/
85+
@Override
86+
public String getSSTable(String project, List<String> entityNames) {
87+
String tableName = String.format("%s__%s", project, String.join("__", entityNames));
88+
return trimAndHash(tableName, MAX_TABLE_NAME_LENGTH);
89+
}
90+
7791
/**
7892
* Converts Cassandra rows into @NativeFeature type.
7993
*

storage/connectors/sstable/src/main/java/feast/storage/connectors/sstable/retriever/SSTableOnlineRetriever.java

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*/
1717
package feast.storage.connectors.sstable.retriever;
1818

19+
import com.google.common.hash.Hashing;
1920
import feast.proto.serving.ServingAPIProto.FeatureReferenceV2;
2021
import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow;
2122
import feast.proto.types.ValueProto;
@@ -31,6 +32,8 @@
3132
*/
3233
public interface SSTableOnlineRetriever<K, V> extends OnlineRetrieverV2 {
3334

35+
int MAX_TABLE_NAME_LENGTH = 50;
36+
3437
@Override
3538
default List<List<Feature>> getOnlineFeatures(
3639
String project,
@@ -93,7 +96,8 @@ List<List<Feature>> convertRowToFeature(
9396
* @return Name of Cassandra table
9497
*/
9598
default String getSSTable(String project, List<String> entityNames) {
96-
return String.format("%s__%s", project, String.join("__", entityNames));
99+
return trimAndHash(
100+
String.format("%s__%s", project, String.join("__", entityNames)), MAX_TABLE_NAME_LENGTH);
97101
}
98102

99103
/**
@@ -137,4 +141,23 @@ default List<String> getSSTableColumns(List<FeatureReferenceV2> featureReference
137141
.distinct()
138142
.collect(Collectors.toList());
139143
}
144+
145+
/**
146+
* Trims long SSTable table names and appends hash suffix for uniqueness.
147+
*
148+
* @param expr Original SSTable table name
149+
* @param maxLength Maximum length allowed for SSTable
150+
* @return Hashed suffix SSTable table name
151+
*/
152+
default String trimAndHash(String expr, int maxLength) {
153+
// Length 8 as derived from murmurhash_32 implementation
154+
int maxPrefixLength = maxLength - 8;
155+
String finalName = expr;
156+
if (expr.length() > maxLength) {
157+
String hashSuffix =
158+
Hashing.murmur3_32().hashBytes(expr.substring(maxPrefixLength).getBytes()).toString();
159+
finalName = expr.substring(0, Math.min(expr.length(), maxPrefixLength)).concat(hashSuffix);
160+
}
161+
return finalName;
162+
}
140163
}

0 commit comments

Comments
 (0)