|
17 | 17 |
|
18 | 18 | package feast.ingestion.transform; |
19 | 19 |
|
20 | | -import com.google.common.base.Preconditions; |
21 | | -import com.google.common.base.Strings; |
| 20 | +import static com.google.common.base.Preconditions.checkArgument; |
| 21 | + |
22 | 22 | import feast.ingestion.deserializer.FeatureRowDeserializer; |
23 | 23 | import feast.ingestion.deserializer.FeatureRowKeyDeserializer; |
| 24 | +import feast.options.Options; |
24 | 25 | import feast.options.OptionsParser; |
25 | 26 | import feast.specs.ImportSpecProto.ImportSpec; |
26 | 27 | import feast.types.FeatureRowProto.FeatureRow; |
27 | 28 | import feast.types.FeatureRowProto.FeatureRowKey; |
| 29 | +import java.util.ArrayList; |
| 30 | +import java.util.Arrays; |
| 31 | +import java.util.List; |
| 32 | +import javax.validation.constraints.NotEmpty; |
28 | 33 | import org.apache.beam.sdk.io.kafka.KafkaIO; |
29 | 34 | import org.apache.beam.sdk.io.kafka.KafkaRecord; |
30 | 35 | import org.apache.beam.sdk.transforms.DoFn; |
31 | 36 | import org.apache.beam.sdk.transforms.ParDo; |
32 | 37 | import org.apache.beam.sdk.values.PCollection; |
33 | 38 | import org.apache.beam.sdk.values.PInput; |
34 | 39 |
|
35 | | -import java.util.ArrayList; |
36 | | -import java.util.Arrays; |
37 | | -import java.util.List; |
38 | | - |
39 | | -import static com.google.common.base.Preconditions.checkArgument; |
40 | | - |
41 | 40 | public class FeatureRowKafkaIO { |
| 41 | + static final String KAFKA_TYPE = "kafka"; |
42 | 42 |
|
43 | | - static final String KAFKA_TYPE = "kafka"; |
| 43 | + /** |
| 44 | + * Transform for reading {@link feast.types.FeatureRowProto.FeatureRow FeatureRow} proto messages |
| 45 | + * from kafka one or more kafka topics. |
| 46 | + */ |
| 47 | + public static Read read(ImportSpec importSpec) { |
| 48 | + return new Read(importSpec); |
| 49 | + } |
44 | 50 |
|
| 51 | + public static class KafkaReadOptions implements Options { |
| 52 | + @NotEmpty public String server; |
| 53 | + @NotEmpty public String topics; |
| 54 | + } |
45 | 55 |
|
46 | | - /** |
47 | | - * Transform for reading {@link feast.types.FeatureRowProto.FeatureRow FeatureRow} |
48 | | - * proto messages from kafka one or more kafka topics. |
49 | | - * |
50 | | - */ |
51 | | - public static Read read(ImportSpec importSpec) { |
52 | | - return new Read(importSpec); |
53 | | - } |
| 56 | + public static class Read extends FeatureIO.Read { |
54 | 57 |
|
55 | | - public static class Read extends FeatureIO.Read { |
| 58 | + private ImportSpec importSpec; |
56 | 59 |
|
57 | | - private ImportSpec importSpec; |
58 | | - |
59 | | - private Read(ImportSpec importSpec) { |
60 | | - this.importSpec = importSpec; |
61 | | - } |
62 | | - |
63 | | - @Override |
64 | | - public PCollection<FeatureRow> expand(PInput input) { |
65 | | - |
66 | | - checkArgument(importSpec.getType().equals(KAFKA_TYPE)); |
67 | | - |
68 | | - String bootstrapServer = importSpec.getOptionsMap().get("server"); |
69 | | - |
70 | | - Preconditions.checkArgument( |
71 | | - !Strings.isNullOrEmpty(bootstrapServer), "kafka bootstrap server must be set"); |
72 | | - |
73 | | - String topics = importSpec.getOptionsMap().get("topics"); |
74 | | - |
75 | | - Preconditions.checkArgument( |
76 | | - !Strings.isNullOrEmpty(topics), "kafka topic(s) must be set"); |
77 | | - |
78 | | - List<String> topicsList = new ArrayList<>(Arrays.asList(topics.split(","))); |
79 | | - |
80 | | - KafkaIO.Read<FeatureRowKey, FeatureRow> kafkaIOReader = KafkaIO.<FeatureRowKey, FeatureRow>read() |
81 | | - .withBootstrapServers(bootstrapServer) |
82 | | - .withTopics(topicsList) |
83 | | - .withKeyDeserializer(FeatureRowKeyDeserializer.class) |
84 | | - .withValueDeserializer(FeatureRowDeserializer.class); |
85 | | - |
86 | | - PCollection<KafkaRecord<FeatureRowKey, FeatureRow>> featureRowRecord = input.getPipeline().apply(kafkaIOReader); |
| 60 | + private Read(ImportSpec importSpec) { |
| 61 | + this.importSpec = importSpec; |
| 62 | + } |
87 | 63 |
|
88 | | - PCollection<FeatureRow> featureRow = featureRowRecord.apply( |
89 | | - ParDo.of( |
90 | | - new DoFn<KafkaRecord<FeatureRowKey, FeatureRow>, FeatureRow>() { |
91 | | - @ProcessElement |
92 | | - public void processElement(ProcessContext processContext) { |
93 | | - KafkaRecord<FeatureRowKey, FeatureRow> record = processContext.element(); |
94 | | - processContext.output(record.getKV().getValue()); |
95 | | - } |
96 | | - })); |
97 | | - return featureRow; |
98 | | - } |
| 64 | + @Override |
| 65 | + public PCollection<FeatureRow> expand(PInput input) { |
| 66 | + |
| 67 | + checkArgument(importSpec.getType().equals(KAFKA_TYPE)); |
| 68 | + |
| 69 | + KafkaReadOptions options = |
| 70 | + OptionsParser.parse(importSpec.getOptionsMap(), KafkaReadOptions.class); |
| 71 | + |
| 72 | + List<String> topicsList = new ArrayList<>(Arrays.asList(options.topics.split(","))); |
| 73 | + |
| 74 | + KafkaIO.Read<FeatureRowKey, FeatureRow> kafkaIOReader = |
| 75 | + KafkaIO.<FeatureRowKey, FeatureRow>read() |
| 76 | + .withBootstrapServers(options.server) |
| 77 | + .withTopics(topicsList) |
| 78 | + .withKeyDeserializer(FeatureRowKeyDeserializer.class) |
| 79 | + .withValueDeserializer(FeatureRowDeserializer.class); |
| 80 | + |
| 81 | + PCollection<KafkaRecord<FeatureRowKey, FeatureRow>> featureRowRecord = |
| 82 | + input.getPipeline().apply(kafkaIOReader); |
| 83 | + |
| 84 | + PCollection<FeatureRow> featureRow = |
| 85 | + featureRowRecord.apply( |
| 86 | + ParDo.of( |
| 87 | + new DoFn<KafkaRecord<FeatureRowKey, FeatureRow>, FeatureRow>() { |
| 88 | + @ProcessElement |
| 89 | + public void processElement(ProcessContext processContext) { |
| 90 | + KafkaRecord<FeatureRowKey, FeatureRow> record = processContext.element(); |
| 91 | + processContext.output(record.getKV().getValue()); |
| 92 | + } |
| 93 | + })); |
| 94 | + return featureRow; |
99 | 95 | } |
| 96 | + } |
100 | 97 | } |
0 commit comments