The Apache Flink 2.1 does not support mongodb python connectors. So I make the sample python codes by using SinkFunction.
from pyflink.datastream import StreamExecutionEnvironment
from pyflink.datastream.functions import SinkFunction
from pymongo import MongoClient
import json
class MongoSink(SinkFunction):
def __init__(self, uri, database, collection):
self._uri = uri
self._db = database
self._coll = collection
self._client = None
def open(self, runtime_context):
self._client = MongoClient(self._uri)
self.collection = self._client[self._db][self._coll]
def invoke(self, value, context):
doc = value
if isinstance(value, str):
doc = json.loads(value)
self.collection.insert_one(doc)
def close(self):
if self._client:
self._client.close()
def main():
env = StreamExecutionEnvironment.get_execution_environment()
env.add_jars('file:///home/joseph/flink/jars/flink-connector-mongodb-2.0.0-1.20.jar'
,'file:///home/joseph/flink/jars/flink-connector-mongodb-cdc-3.0.1.jar')
# your data stream
ds = env.from_collection([
'{"_id":1, "name":"Alice"}',
'{"_id":2, "name":"Bob"}'
])
ds.add_sink(MongoSink(
uri="mongodb://user:[email protected]:27017",
database="my_db",
collection="my_coll"
))
env.execute("PyFlink MongoDB")
if __name__ == "__main__":
main()
But the exceptions are thrown from Sink class.
Traceback (most recent call last):
File "/home/joseph/VSCode_Workspace/etl-stream-python/com/aaa/etl/etl_data_uploader_mysql.py", line 78, in <module>
main()
File "/home/joseph/VSCode_Workspace/etl-stream-python/com/aaa/etl/etl_data_uploader_mysql.py", line 70, in main
ds.add_sink(MongoSink(
File "/home/joseph/VSCode_Workspace/.venv-etl/lib/python3.11/site-packages/pyflink/datastream/data_stream.py", line 819, in add_sink
return DataStreamSink(self._j_data_stream.addSink(sink_func.get_java_function()))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/joseph/VSCode_Workspace/.venv-etl/lib/python3.11/site-packages/pyflink/datastream/functions.py", line 586, in get_java_function
return self._j_function
^^^^^^^^^^^^^^^^
AttributeError: 'MongoSink' object has no attribute '_j_function'
I want to know if I can make sink class with pyflink 2.1 or not. Kindly inform me the python MongoDB sink class example codes.
_j_functionand you may have to create it. But first you have to find information what it has to be.super().__init__()indef __init__()and this can make problem, See source code for class FlinkKafkaProducerBase(SinkFunction, ABC):