Skip to content

Commit ce63d63

Browse files
committed
Update JobCoordinatorService to support duplicate source objects.
Changes: * Add Source Repository method to query source by type and config * Add getSurogateSource() get surogate source object for same source type and config. * Add test to check that JobCoordinatorService will handle duplicate objects correctly
1 parent 51f7d21 commit ce63d63

File tree

4 files changed

+171
-14
lines changed

4 files changed

+171
-14
lines changed

core/src/main/java/feast/core/dao/SourceRepository.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@
1717
package feast.core.dao;
1818

1919
import feast.core.model.Source;
20+
import feast.proto.core.SourceProto.SourceType;
2021
import org.springframework.data.jpa.repository.JpaRepository;
2122

2223
/** JPA repository supplying Source objects keyed by id. */
23-
public interface SourceRepository extends JpaRepository<Source, String> {}
24+
public interface SourceRepository extends JpaRepository<Source, String> {
25+
Source findFirstByTypeAndConfigOrderByIdAsc(SourceType type, String config);
26+
}

core/src/main/java/feast/core/model/Source.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,8 @@ public SourceProto.Source toProto() {
136136
}
137137

138138
/**
139-
* Override equality for sources.
140-
* Sources are compared based on their type and type-specific options.
139+
* Override equality for sources. Sources are compared based on their type and type-specific
140+
* options.
141141
*
142142
* @param other other Source
143143
* @return boolean equal

core/src/main/java/feast/core/service/JobCoordinatorService.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import feast.core.config.FeastProperties.JobProperties;
2222
import feast.core.dao.FeatureSetRepository;
2323
import feast.core.dao.JobRepository;
24+
import feast.core.dao.SourceRepository;
2425
import feast.core.job.JobManager;
2526
import feast.core.job.JobUpdateTask;
2627
import feast.core.model.FeatureSet;
@@ -55,6 +56,7 @@ public class JobCoordinatorService {
5556

5657
private final JobRepository jobRepository;
5758
private final FeatureSetRepository featureSetRepository;
59+
private final SourceRepository sourceRepository;
5860
private final SpecService specService;
5961
private final JobManager jobManager;
6062
private final JobProperties jobProperties;
@@ -63,11 +65,13 @@ public class JobCoordinatorService {
6365
public JobCoordinatorService(
6466
JobRepository jobRepository,
6567
FeatureSetRepository featureSetRepository,
68+
SourceRepository sourceRepository,
6669
SpecService specService,
6770
JobManager jobManager,
6871
FeastProperties feastProperties) {
6972
this.jobRepository = jobRepository;
7073
this.featureSetRepository = featureSetRepository;
74+
this.sourceRepository = sourceRepository;
7175
this.specService = specService;
7276
this.jobManager = jobManager;
7377
this.jobProperties = feastProperties.getJobs();
@@ -108,6 +112,11 @@ public void Poll() throws InvalidProtocolBufferException {
108112
.collect(Collectors.groupingBy(FeatureSet::getSource))
109113
.forEach(
110114
(source, setsForSource) -> {
115+
// Sources with same type and config in different Feature Sets are different
116+
// objects.
117+
// Make sure that we are dealing with the same source object when spawning jobs.
118+
source = getSurogateSource(source);
119+
111120
Optional<Job> originalJob = getJob(source, store);
112121
jobUpdateTasks.add(
113122
new JobUpdateTask(
@@ -189,4 +198,14 @@ public Optional<Job> getJob(Source source, Store store) {
189198
// return the latest
190199
return Optional.of(jobs.get(0));
191200
}
201+
202+
/**
203+
* Get the surogate source for the given source. Multiple source objects with different ids can
204+
* share the same source type and config. This returns the definitive source object for sources
205+
* with the same type and config.
206+
*/
207+
private Source getSurogateSource(Source source) {
208+
return sourceRepository.findFirstByTypeAndConfigOrderByIdAsc(
209+
source.getType(), source.getConfig());
210+
}
192211
}

core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java

Lines changed: 146 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
import feast.core.model.FeatureSet;
3939
import feast.core.model.Job;
4040
import feast.core.model.JobStatus;
41+
import feast.core.model.Source;
42+
import feast.core.model.Store;
4143
import feast.proto.core.CoreServiceProto.ListFeatureSetsRequest.Filter;
4244
import feast.proto.core.CoreServiceProto.ListFeatureSetsResponse;
4345
import feast.proto.core.CoreServiceProto.ListStoresResponse;
@@ -86,7 +88,12 @@ public void shouldDoNothingIfNoStoresFound() throws InvalidProtocolBufferExcepti
8688
when(specService.listStores(any())).thenReturn(ListStoresResponse.newBuilder().build());
8789
JobCoordinatorService jcs =
8890
new JobCoordinatorService(
89-
jobRepository, featureSetRepository, sourceRepository, specService, jobManager, feastProperties);
91+
jobRepository,
92+
featureSetRepository,
93+
sourceRepository,
94+
specService,
95+
jobManager,
96+
feastProperties);
9097
jcs.Poll();
9198
verify(jobRepository, times(0)).saveAndFlush(any());
9299
}
@@ -107,7 +114,12 @@ public void shouldDoNothingIfNoMatchingFeatureSetsFound() throws InvalidProtocol
107114
.thenReturn(ListFeatureSetsResponse.newBuilder().build());
108115
JobCoordinatorService jcs =
109116
new JobCoordinatorService(
110-
jobRepository, featureSetRepository, sourceRepository, specService, jobManager, feastProperties);
117+
jobRepository,
118+
featureSetRepository,
119+
sourceRepository,
120+
specService,
121+
jobManager,
122+
feastProperties);
111123
jcs.Poll();
112124
verify(jobRepository, times(0)).saveAndFlush(any());
113125
}
@@ -130,6 +142,7 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep
130142
.setBootstrapServers("servers:9092")
131143
.build())
132144
.build();
145+
Source source = Source.fromProto(sourceSpec);
133146

134147
FeatureSetProto.FeatureSet featureSetSpec1 =
135148
FeatureSetProto.FeatureSet.newBuilder()
@@ -159,7 +172,7 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep
159172
"",
160173
"",
161174
Runner.DATAFLOW,
162-
feast.core.model.Source.fromProto(sourceSpec),
175+
source,
163176
feast.core.model.Store.fromProto(storeSpec),
164177
Arrays.asList(featureSet1, featureSet2),
165178
JobStatus.PENDING);
@@ -169,13 +182,16 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep
169182
"some_id",
170183
extId,
171184
Runner.DATAFLOW,
172-
feast.core.model.Source.fromProto(sourceSpec),
185+
source,
173186
feast.core.model.Store.fromProto(storeSpec),
174187
Arrays.asList(featureSet1, featureSet2),
175188
JobStatus.RUNNING);
176189

177190
when(featureSetRepository.findAllByNameLikeAndProject_NameLikeOrderByNameAsc("%", "project1"))
178191
.thenReturn(Lists.newArrayList(featureSet1, featureSet2));
192+
when(sourceRepository.findFirstByTypeAndConfigOrderByIdAsc(
193+
source.getType(), source.getConfig()))
194+
.thenReturn(source);
179195
when(specService.listStores(any()))
180196
.thenReturn(ListStoresResponse.newBuilder().addStore(storeSpec).build());
181197

@@ -184,7 +200,12 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep
184200

185201
JobCoordinatorService jcs =
186202
new JobCoordinatorService(
187-
jobRepository, featureSetRepository, sourceRepository, specService, jobManager, feastProperties);
203+
jobRepository,
204+
featureSetRepository,
205+
sourceRepository,
206+
specService,
207+
jobManager,
208+
feastProperties);
188209
jcs.Poll();
189210
verify(jobRepository, times(1)).saveAll(jobArgCaptor.capture());
190211
List<Job> actual = jobArgCaptor.getValue();
@@ -209,6 +230,7 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException {
209230
.setBootstrapServers("servers:9092")
210231
.build())
211232
.build();
233+
Source source1 = Source.fromProto(source1Spec);
212234
SourceProto.Source source2Spec =
213235
SourceProto.Source.newBuilder()
214236
.setType(SourceType.KAFKA)
@@ -218,6 +240,7 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException {
218240
.setBootstrapServers("other.servers:9092")
219241
.build())
220242
.build();
243+
Source source2 = Source.fromProto(source2Spec);
221244

222245
FeatureSetProto.FeatureSet featureSetSpec1 =
223246
FeatureSetProto.FeatureSet.newBuilder()
@@ -246,7 +269,7 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException {
246269
"name1",
247270
"",
248271
Runner.DATAFLOW,
249-
feast.core.model.Source.fromProto(source1Spec),
272+
source1,
250273
feast.core.model.Store.fromProto(storeSpec),
251274
Arrays.asList(featureSet1),
252275
JobStatus.PENDING);
@@ -256,7 +279,7 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException {
256279
"name1",
257280
"extId1",
258281
Runner.DATAFLOW,
259-
feast.core.model.Source.fromProto(source1Spec),
282+
source1,
260283
feast.core.model.Store.fromProto(storeSpec),
261284
Arrays.asList(featureSet1),
262285
JobStatus.RUNNING);
@@ -266,7 +289,7 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException {
266289
"",
267290
"extId2",
268291
Runner.DATAFLOW,
269-
feast.core.model.Source.fromProto(source2Spec),
292+
source2,
270293
feast.core.model.Store.fromProto(storeSpec),
271294
Arrays.asList(featureSet2),
272295
JobStatus.PENDING);
@@ -276,15 +299,20 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException {
276299
"name2",
277300
"extId2",
278301
Runner.DATAFLOW,
279-
feast.core.model.Source.fromProto(source2Spec),
302+
source2,
280303
feast.core.model.Store.fromProto(storeSpec),
281304
Arrays.asList(featureSet2),
282305
JobStatus.RUNNING);
283306
ArgumentCaptor<List<Job>> jobArgCaptor = ArgumentCaptor.forClass(List.class);
284307

285308
when(featureSetRepository.findAllByNameLikeAndProject_NameLikeOrderByNameAsc("%", "project1"))
286309
.thenReturn(Lists.newArrayList(featureSet1, featureSet2));
287-
310+
when(sourceRepository.findFirstByTypeAndConfigOrderByIdAsc(
311+
source1.getType(), source1.getConfig()))
312+
.thenReturn(source1);
313+
when(sourceRepository.findFirstByTypeAndConfigOrderByIdAsc(
314+
source2.getType(), source2.getConfig()))
315+
.thenReturn(source2);
288316
when(specService.listStores(any()))
289317
.thenReturn(ListStoresResponse.newBuilder().addStore(storeSpec).build());
290318

@@ -294,7 +322,12 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException {
294322

295323
JobCoordinatorService jcs =
296324
new JobCoordinatorService(
297-
jobRepository, featureSetRepository, sourceRepository, specService, jobManager, feastProperties);
325+
jobRepository,
326+
featureSetRepository,
327+
sourceRepository,
328+
specService,
329+
jobManager,
330+
feastProperties);
298331
jcs.Poll();
299332

300333
verify(jobRepository, times(1)).saveAll(jobArgCaptor.capture());
@@ -303,4 +336,106 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException {
303336
assertThat(actual.get(0), equalTo(expected1));
304337
assertThat(actual.get(1), equalTo(expected2));
305338
}
339+
340+
@Test
341+
public void shouldGroupJobsBySourceAndIgnoreDuplicateSourceObjects()
342+
throws InvalidProtocolBufferException {
343+
StoreProto.Store storeSpec =
344+
StoreProto.Store.newBuilder()
345+
.setName("test")
346+
.setType(StoreType.REDIS)
347+
.setRedisConfig(RedisConfig.newBuilder().build())
348+
.addSubscriptions(Subscription.newBuilder().setProject("project1").setName("*").build())
349+
.build();
350+
Store store = Store.fromProto(storeSpec);
351+
352+
// simulate duplicate source objects: create source objects from the same spec but with
353+
// different ids
354+
SourceProto.Source sourceSpec =
355+
SourceProto.Source.newBuilder()
356+
.setType(SourceType.KAFKA)
357+
.setKafkaSourceConfig(
358+
KafkaSourceConfig.newBuilder()
359+
.setTopic("topic")
360+
.setBootstrapServers("servers:9092")
361+
.build())
362+
.build();
363+
Source source1 = Source.fromProto(sourceSpec);
364+
source1.setId(1);
365+
Source source2 = Source.fromProto(sourceSpec);
366+
source2.setId(2);
367+
368+
FeatureSetProto.FeatureSet featureSetSpec1 =
369+
FeatureSetProto.FeatureSet.newBuilder()
370+
.setSpec(
371+
FeatureSetSpec.newBuilder()
372+
.setSource(sourceSpec)
373+
.setProject("project1")
374+
.setName("features1"))
375+
.setMeta(FeatureSetMeta.newBuilder())
376+
.build();
377+
FeatureSet featureSet1 = FeatureSet.fromProto(featureSetSpec1);
378+
featureSet1.setSource(source1);
379+
380+
FeatureSetProto.FeatureSet featureSetSpec2 =
381+
FeatureSetProto.FeatureSet.newBuilder()
382+
.setSpec(
383+
FeatureSetSpec.newBuilder()
384+
.setSource(sourceSpec)
385+
.setProject("project1")
386+
.setName("features2"))
387+
.setMeta(FeatureSetMeta.newBuilder())
388+
.build();
389+
FeatureSet featureSet2 = FeatureSet.fromProto(featureSetSpec2);
390+
featureSet2.setSource(source2);
391+
392+
Job expectedInput =
393+
new Job(
394+
"",
395+
"",
396+
Runner.DATAFLOW,
397+
source1,
398+
store,
399+
List.of(featureSet1, featureSet2),
400+
JobStatus.PENDING);
401+
Job expected =
402+
new Job(
403+
"name",
404+
"extid",
405+
Runner.DATAFLOW,
406+
source1,
407+
store,
408+
List.of(featureSet1, featureSet2),
409+
JobStatus.RUNNING);
410+
411+
when(featureSetRepository.findAllByNameLikeAndProject_NameLikeOrderByNameAsc("%", "project1"))
412+
.thenReturn(Lists.newArrayList(featureSet1, featureSet2));
413+
when(sourceRepository.findFirstByTypeAndConfigOrderByIdAsc(
414+
source1.getType(), source1.getConfig()))
415+
.thenReturn(source1);
416+
when(sourceRepository.findFirstByTypeAndConfigOrderByIdAsc(
417+
source2.getType(), source2.getConfig()))
418+
.thenReturn(source1);
419+
420+
when(specService.listStores(any()))
421+
.thenReturn(ListStoresResponse.newBuilder().addStore(storeSpec).build());
422+
423+
when(jobManager.startJob(argThat(new JobMatcher(expectedInput)))).thenReturn(expected);
424+
when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW);
425+
426+
ArgumentCaptor<List<Job>> jobArgCaptor = ArgumentCaptor.forClass(List.class);
427+
428+
JobCoordinatorService jcs =
429+
new JobCoordinatorService(
430+
jobRepository,
431+
featureSetRepository,
432+
sourceRepository,
433+
specService,
434+
jobManager,
435+
feastProperties);
436+
jcs.Poll();
437+
verify(jobRepository, times(1)).saveAll(jobArgCaptor.capture());
438+
List<Job> actual = jobArgCaptor.getValue();
439+
assertThat(actual, equalTo(Collections.singletonList(expected)));
440+
}
306441
}

0 commit comments

Comments
 (0)