Skip to content

Commit 3e40a61

Browse files
author
zhilingc
committed
Apply PR #392
1 parent a340613 commit 3e40a61

File tree

4 files changed

+84
-18
lines changed

4 files changed

+84
-18
lines changed

serving/src/main/java/feast/serving/service/BigQueryServingService.java

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import com.google.cloud.bigquery.Schema;
3232
import com.google.cloud.bigquery.Table;
3333
import com.google.cloud.bigquery.TableId;
34+
import com.google.cloud.bigquery.TableInfo;
3435
import com.google.cloud.storage.Storage;
3536
import feast.serving.ServingAPIProto;
3637
import feast.serving.ServingAPIProto.DataFormat;
@@ -56,10 +57,12 @@
5657
import java.util.Optional;
5758
import java.util.UUID;
5859
import java.util.stream.Collectors;
60+
import org.joda.time.Duration;
5961
import org.slf4j.Logger;
6062

6163
public class BigQueryServingService implements ServingService {
6264

65+
public static final long TEMP_TABLE_EXPIRY_DURATION_MS = Duration.standardDays(1).getMillis();
6366
private static final Logger log = org.slf4j.LoggerFactory.getLogger(BigQueryServingService.class);
6467

6568
private final BigQuery bigquery;
@@ -179,22 +182,33 @@ private Table loadEntities(DatasetSource datasetSource) {
179182
switch (datasetSource.getDatasetSourceCase()) {
180183
case FILE_SOURCE:
181184
try {
182-
String tableName = generateTemporaryTableName();
183-
log.info("Loading entity dataset to table {}.{}.{}", projectId, datasetId, tableName);
184-
TableId tableId = TableId.of(projectId, datasetId, tableName);
185-
// Currently only avro supported
185+
// Currently only AVRO format is supported
186+
186187
if (datasetSource.getFileSource().getDataFormat() != DataFormat.DATA_FORMAT_AVRO) {
187188
throw Status.INVALID_ARGUMENT
188-
.withDescription("Invalid file format, only avro supported")
189+
.withDescription("Invalid file format, only AVRO is supported.")
189190
.asRuntimeException();
190191
}
192+
193+
TableId tableId = TableId.of(projectId, datasetId, createTempTableName());
194+
log.info("Loading entity rows to: {}.{}.{}", projectId, datasetId, tableId.getTable());
195+
191196
LoadJobConfiguration loadJobConfiguration =
192197
LoadJobConfiguration.of(
193198
tableId, datasetSource.getFileSource().getFileUrisList(), FormatOptions.avro());
194199
loadJobConfiguration =
195200
loadJobConfiguration.toBuilder().setUseAvroLogicalTypes(true).build();
196201
Job job = bigquery.create(JobInfo.of(loadJobConfiguration));
197202
job.waitFor();
203+
204+
TableInfo expiry =
205+
bigquery
206+
.getTable(tableId)
207+
.toBuilder()
208+
.setExpirationTime(System.currentTimeMillis() + TEMP_TABLE_EXPIRY_DURATION_MS)
209+
.build();
210+
bigquery.update(expiry);
211+
198212
loadedEntityTable = bigquery.getTable(tableId);
199213
if (!loadedEntityTable.exists()) {
200214
throw new RuntimeException(
@@ -204,7 +218,7 @@ private Table loadEntities(DatasetSource datasetSource) {
204218
} catch (Exception e) {
205219
log.error("Exception has occurred in loadEntities method: ", e);
206220
throw Status.INTERNAL
207-
.withDescription("Failed to load entity dataset into store")
221+
.withDescription("Failed to load entity dataset into store: " + e.toString())
208222
.withCause(e)
209223
.asRuntimeException();
210224
}
@@ -216,20 +230,23 @@ private Table loadEntities(DatasetSource datasetSource) {
216230
}
217231
}
218232

219-
private String generateTemporaryTableName() {
220-
String source = String.format("feastserving%d", System.currentTimeMillis());
221-
String guid = UUID.nameUUIDFromBytes(source.getBytes()).toString();
222-
String suffix = guid.substring(0, Math.min(guid.length(), 10)).replaceAll("-", "");
223-
return String.format("temp_%s", suffix);
224-
}
225-
226233
private TableId generateUUIDs(Table loadedEntityTable) {
227234
try {
228235
String uuidQuery =
229236
createEntityTableUUIDQuery(generateFullTableName(loadedEntityTable.getTableId()));
230-
QueryJobConfiguration queryJobConfig = QueryJobConfiguration.newBuilder(uuidQuery).build();
237+
QueryJobConfiguration queryJobConfig =
238+
QueryJobConfiguration.newBuilder(uuidQuery)
239+
.setDestinationTable(TableId.of(projectId, datasetId, createTempTableName()))
240+
.build();
231241
Job queryJob = bigquery.create(JobInfo.of(queryJobConfig));
232242
queryJob.waitFor();
243+
TableInfo expiry =
244+
bigquery
245+
.getTable(queryJobConfig.getDestinationTable())
246+
.toBuilder()
247+
.setExpirationTime(System.currentTimeMillis() + TEMP_TABLE_EXPIRY_DURATION_MS)
248+
.build();
249+
bigquery.update(expiry);
233250
queryJobConfig = queryJob.getConfiguration();
234251
return queryJobConfig.getDestinationTable();
235252
} catch (InterruptedException | BigQueryException e) {
@@ -239,4 +256,8 @@ private TableId generateUUIDs(Table loadedEntityTable) {
239256
.asRuntimeException();
240257
}
241258
}
259+
260+
public static String createTempTableName() {
261+
return "_" + UUID.randomUUID().toString().replace("-", "");
262+
}
242263
}

serving/src/main/java/feast/serving/store/bigquery/BatchRetrievalQueryRunnable.java

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
*/
1717
package feast.serving.store.bigquery;
1818

19+
import static feast.serving.service.BigQueryServingService.TEMP_TABLE_EXPIRY_DURATION_MS;
20+
import static feast.serving.service.BigQueryServingService.createTempTableName;
1921
import static feast.serving.store.bigquery.QueryTemplater.createTimestampLimitQuery;
2022

2123
import com.google.auto.value.AutoValue;
@@ -27,6 +29,8 @@
2729
import com.google.cloud.bigquery.Job;
2830
import com.google.cloud.bigquery.JobInfo;
2931
import com.google.cloud.bigquery.QueryJobConfiguration;
32+
import com.google.cloud.bigquery.TableId;
33+
import com.google.cloud.bigquery.TableInfo;
3034
import com.google.cloud.bigquery.TableResult;
3135
import com.google.cloud.storage.Blob;
3236
import com.google.cloud.storage.Storage;
@@ -179,10 +183,13 @@ Job runBatchQuery(List<String> featureSetQueries)
179183

180184
for (int i = 0; i < featureSetQueries.size(); i++) {
181185
QueryJobConfiguration queryJobConfig =
182-
QueryJobConfiguration.newBuilder(featureSetQueries.get(i)).build();
186+
QueryJobConfiguration.newBuilder(featureSetQueries.get(i))
187+
.setDestinationTable(TableId.of(projectId(), datasetId(), createTempTableName()))
188+
.build();
183189
Job subqueryJob = bigquery().create(JobInfo.of(queryJobConfig));
184190
executorCompletionService.submit(
185191
SubqueryCallable.builder()
192+
.setBigquery(bigquery())
186193
.setFeatureSetInfo(featureSetInfos().get(i))
187194
.setSubqueryJob(subqueryJob)
188195
.build());
@@ -214,10 +221,21 @@ Job runBatchQuery(List<String> featureSetQueries)
214221
String joinQuery =
215222
QueryTemplater.createJoinQuery(
216223
featureSetInfos, entityTableColumnNames(), entityTableName());
217-
QueryJobConfiguration queryJobConfig = QueryJobConfiguration.newBuilder(joinQuery).build();
224+
QueryJobConfiguration queryJobConfig =
225+
QueryJobConfiguration.newBuilder(joinQuery)
226+
.setDestinationTable(TableId.of(projectId(), datasetId(), createTempTableName()))
227+
.build();
218228
queryJob = bigquery().create(JobInfo.of(queryJobConfig));
219229
queryJob.waitFor();
220230

231+
TableInfo expiry =
232+
bigquery()
233+
.getTable(queryJobConfig.getDestinationTable())
234+
.toBuilder()
235+
.setExpirationTime(System.currentTimeMillis() + TEMP_TABLE_EXPIRY_DURATION_MS)
236+
.build();
237+
bigquery().update(expiry);
238+
221239
return queryJob;
222240
}
223241

@@ -248,10 +266,18 @@ private FieldValueList getTimestampLimits(String entityTableName) {
248266
QueryJobConfiguration getTimestampLimitsQuery =
249267
QueryJobConfiguration.newBuilder(createTimestampLimitQuery(entityTableName))
250268
.setDefaultDataset(DatasetId.of(projectId(), datasetId()))
269+
.setDestinationTable(TableId.of(projectId(), datasetId(), createTempTableName()))
251270
.build();
252271
try {
253272
Job job = bigquery().create(JobInfo.of(getTimestampLimitsQuery));
254273
TableResult getTimestampLimitsQueryResult = job.waitFor().getQueryResults();
274+
TableInfo expiry =
275+
bigquery()
276+
.getTable(getTimestampLimitsQuery.getDestinationTable())
277+
.toBuilder()
278+
.setExpirationTime(System.currentTimeMillis() + TEMP_TABLE_EXPIRY_DURATION_MS)
279+
.build();
280+
bigquery().update(expiry);
255281
FieldValueList result = null;
256282
for (FieldValueList fields : getTimestampLimitsQueryResult.getValues()) {
257283
result = fields;

serving/src/main/java/feast/serving/store/bigquery/SubqueryCallable.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,16 @@
1616
*/
1717
package feast.serving.store.bigquery;
1818

19+
import static feast.serving.service.BigQueryServingService.TEMP_TABLE_EXPIRY_DURATION_MS;
1920
import static feast.serving.store.bigquery.QueryTemplater.generateFullTableName;
2021

2122
import com.google.auto.value.AutoValue;
23+
import com.google.cloud.bigquery.BigQuery;
2224
import com.google.cloud.bigquery.BigQueryException;
2325
import com.google.cloud.bigquery.Job;
2426
import com.google.cloud.bigquery.QueryJobConfiguration;
2527
import com.google.cloud.bigquery.TableId;
28+
import com.google.cloud.bigquery.TableInfo;
2629
import feast.serving.store.bigquery.model.FeatureSetInfo;
2730
import java.util.concurrent.Callable;
2831

@@ -33,6 +36,8 @@
3336
@AutoValue
3437
public abstract class SubqueryCallable implements Callable<FeatureSetInfo> {
3538

39+
public abstract BigQuery bigquery();
40+
3641
public abstract FeatureSetInfo featureSetInfo();
3742

3843
public abstract Job subqueryJob();
@@ -44,6 +49,8 @@ public static Builder builder() {
4449
@AutoValue.Builder
4550
public abstract static class Builder {
4651

52+
public abstract Builder setBigquery(BigQuery bigquery);
53+
4754
public abstract Builder setFeatureSetInfo(FeatureSetInfo featureSetInfo);
4855

4956
public abstract Builder setSubqueryJob(Job subqueryJob);
@@ -57,6 +64,15 @@ public FeatureSetInfo call() throws BigQueryException, InterruptedException {
5764
subqueryJob().waitFor();
5865
subqueryConfig = subqueryJob().getConfiguration();
5966
TableId destinationTable = subqueryConfig.getDestinationTable();
67+
68+
TableInfo expiry =
69+
bigquery()
70+
.getTable(destinationTable)
71+
.toBuilder()
72+
.setExpirationTime(System.currentTimeMillis() + TEMP_TABLE_EXPIRY_DURATION_MS)
73+
.build();
74+
bigquery().update(expiry);
75+
6076
String fullTablePath = generateFullTableName(destinationTable);
6177

6278
return new FeatureSetInfo(featureSetInfo(), fullTablePath);

tests/e2e/bq-batch-retrieval.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@
1818
from google.protobuf.duration_pb2 import Duration
1919
from pandavro import to_avro
2020

21+
pd.set_option('display.max_columns', None)
22+
2123
PROJECT_NAME = 'batch_' + uuid.uuid4().hex.upper()[0:6]
2224

25+
2326
@pytest.fixture(scope="module")
2427
def core_url(pytestconfig):
2528
return pytestconfig.getoption("core_url")
@@ -319,8 +322,8 @@ def test_multiple_featureset_joins(client):
319322
feature_retrieval_job = client.get_batch_features(
320323
entity_rows=entity_df, feature_refs=[f"{PROJECT_NAME}/feature_value6:1", f"{PROJECT_NAME}/other_feature_value7:1"]
321324
)
322-
output = feature_retrieval_job.to_dataframe()
323-
print(output.head())
325+
output = feature_retrieval_job.to_dataframe().sort_values(by=["entity_id"])
326+
print(output.head(10))
324327

325328
assert output["entity_id"].to_list() == [int(i) for i in output["feature_value6"].to_list()]
326329
assert output["other_entity_id"].to_list() == output["other_feature_value7"].to_list()

0 commit comments

Comments
 (0)