I want to fetch the flowfiles from the input stream in bulk and create the one bulk insertion query for Clickhouse.
I am currently creating the the one insertion query for each document of mongoDB. Here is following code for this:
import groovy.json.JsonSlurper
import java.nio.charset.StandardCharsets
import org.apache.nifi.processor.io.StreamCallback
import org.apache.commons.io.IOUtils
class GenerateInsertQuery implements StreamCallback {
// Convert MongoDB date string to ClickHouse-compatible DateTime format
String toClickhouseDateFormat(String mongoDate) {
if (!mongoDate) return null
def parsedDate = null
def formats = ["yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", "yyyy-MM-dd'T'HH:mm:ss'Z'"]
formats.each { format ->
try {
parsedDate = Date.parse(format, mongoDate)
return
} catch (Exception e) {
}
}
// If none of the formats match, return null
return parsedDate != null ? parsedDate.format("yyyy-MM-dd HH:mm:ss") : null
}
// Sanitize value for SQL
String sanitizeValue(String value) {
if (value == null || value.trim().isEmpty() || value == '') return null
return value.replace("'", "''")
}
@Override
void process(InputStream inputStream, OutputStream outputStream) {
// Read the JSON content from the FlowFile
def json_content = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
def json_object = new JsonSlurper().parseText(json_content)
// Extract and sanitize values, using null-safe operator
def id_value = sanitizeValue(json_object?._id?.'$oid')
def first_name = sanitizeValue(json_object?.first_name)
def last_name = sanitizeValue(json_object?.last_name)
// Create the SQL INSERT query
def insert_query = """INSERT INTO db.table_name (comma separated columns names)
VALUES (
${id_value != null ? "'${id_value}'" : null},
${first_name != null ? "'${first_name}'" : null},
${last_name != null ? "'${last_name}'" : null}
);"""
// Write the SQL query back to the FlowFile
outputStream.write(insert_query.getBytes(StandardCharsets.UTF_8))
}
}
// Apply the StreamCallback to modify the FlowFile content
flowFile = session.get()
if (flowFile) {
try {
flowFile = session.write(flowFile, new GenerateInsertQuery())
session.transfer(flowFile, REL_SUCCESS)
} catch (Exception e) {
// Handle any errors by transferring to the failure relationship
session.transfer(flowFile, REL_FAILURE)
}
} else {
session.transfer(flowFile, REL_FAILURE)
}
How can I make this script for nifi to make the bulk insertion query?. If anyone knows kindly create that script for me by modifying this script.
INSERT INTO .... are you using kind of jdbc driver for mongobd?addBatchandexecuteBatchfunctions - i can give you a snippet for your case.