Skip to content

Commit 373c3eb

Browse files
authored
[INLONG-11961][SDK] Transform supports array index access, the WHERE clause supports the LIKE operator, and the str_to_json function converts KV-format data into JSON format (#11962)
1 parent 28759a9 commit 373c3eb

File tree

4 files changed

+346
-0
lines changed

4 files changed

+346
-0
lines changed
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.inlong.sdk.transform.process.function.string;
19+
20+
import org.apache.inlong.sdk.transform.decode.SourceData;
21+
import org.apache.inlong.sdk.transform.process.Context;
22+
import org.apache.inlong.sdk.transform.process.function.FunctionConstant;
23+
import org.apache.inlong.sdk.transform.process.function.TransformFunction;
24+
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
25+
import org.apache.inlong.sdk.transform.process.parser.ValueParser;
26+
27+
import com.google.gson.JsonObject;
28+
import net.sf.jsqlparser.expression.Expression;
29+
import net.sf.jsqlparser.expression.Function;
30+
31+
import java.util.List;
32+
/**
33+
* StrToJsonFunction -> str_to_json(str, pairDelimiter, keyValueDelimiter)
34+
* description:
35+
* - Return NULL if 'str' is NULL
36+
* - Return a json string after splitting the 'str' into key/value pairs using 'pairDelimiter'(default is ',')
37+
* and 'keyValueDelimiter'(default is '=')
38+
* Note: Both 'pairDelimiter' and 'keyValueDelimiter' are treated as regular expressions.So special characters
39+
* (e.g. <([{^-=$!|]})?*+.>) need to be properly escaped before using as a delimiter literally.
40+
*/
41+
@TransformFunction(type = FunctionConstant.STRING_TYPE, names = {
42+
"str_to_json"}, parameter = "(String s1, String pairDelimiter, String keyValueDelimiter)", descriptions = {
43+
"- Return \"\" if 'str' is NULL;",
44+
"- Return a json string after splitting the 'str' into key/value pairs using 'pairDelimiter'(default is ',') "
45+
+
46+
"and 'keyValueDelimiter'(default is '=');",
47+
"Note: Both 'pairDelimiter' and 'keyValueDelimiter' are treated as regular expressions.So special " +
48+
"characters(e.g. <([{^-=$!|]})?*+.>) need to be properly escaped before using as a delimiter literally."
49+
}, examples = {
50+
"str_to_json('key1=value1,key2=value2,key3=value3') = {\"key1\":\"value1\",\"key2\":\"value2\",\"key3\"=\"value3\"}",
51+
"str_to_json(\"name->John!age->30!city->China\" , \"!\" , \"->\") = {\"name\":\"John\",\"age\":\"30\",\"city\":\"China\"}"
52+
})
53+
public class StrToJsonFunction implements ValueParser {
54+
55+
private ValueParser inputParser;
56+
57+
private ValueParser pairDelimiterParser;
58+
59+
private ValueParser kvDelimiterParser;
60+
61+
public StrToJsonFunction(Function expr) {
62+
List<Expression> expressions = expr.getParameters().getExpressions();
63+
if (!expressions.isEmpty()) {
64+
inputParser = OperatorTools.buildParser(expressions.get(0));
65+
if (expressions.size() >= 2) {
66+
pairDelimiterParser = OperatorTools.buildParser(expressions.get(1));
67+
if (expressions.size() >= 3) {
68+
kvDelimiterParser = OperatorTools.buildParser(expressions.get(2));
69+
}
70+
}
71+
}
72+
}
73+
74+
@Override
75+
public Object parse(SourceData sourceData, int rowIndex, Context context) {
76+
Object inputStringObj = inputParser.parse(sourceData, rowIndex, context);
77+
Object pairDelimiterStringObj = null;
78+
String pairDelimiterString = null;
79+
if (pairDelimiterParser != null) {
80+
pairDelimiterStringObj = pairDelimiterParser.parse(sourceData, rowIndex, context);
81+
pairDelimiterString = OperatorTools.parseString(pairDelimiterStringObj);
82+
}
83+
Object kvDelimiterStringObj = null;
84+
String kvDelimiterString = null;
85+
if (kvDelimiterParser != null) {
86+
kvDelimiterStringObj = kvDelimiterParser.parse(sourceData, rowIndex, context);
87+
kvDelimiterString = OperatorTools.parseString(kvDelimiterStringObj);
88+
}
89+
String inputString = OperatorTools.parseString(inputStringObj);
90+
91+
return parse2Json(pairDelimiterString, kvDelimiterString, inputString);
92+
}
93+
94+
private JsonObject parse2Json(String pairDelimiterString, String kvDelimiterString,
95+
String inputString) {
96+
String pairDelimiter =
97+
(pairDelimiterString == null || pairDelimiterString.isEmpty()) ? "," : escapeRegex(pairDelimiterString);
98+
String keyValueDelimiter =
99+
(kvDelimiterString == null || kvDelimiterString.isEmpty()) ? "=" : escapeRegex(kvDelimiterString);
100+
101+
JsonObject json = new JsonObject();
102+
String[] pairs = inputString.split(pairDelimiter);
103+
104+
for (String pair : pairs) {
105+
if (pair.contains(keyValueDelimiter)) {
106+
String[] keyValue = pair.split(keyValueDelimiter, 2);
107+
json.addProperty(keyValue[0], keyValue[1]);
108+
}
109+
}
110+
return json;
111+
}
112+
113+
private String escapeRegex(String delimiter) {
114+
return delimiter.replaceAll("([\\\\^$|?*+\\[\\](){}])", "\\\\$1");
115+
}
116+
}
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.inlong.sdk.transform.process.operator;
19+
20+
import org.apache.inlong.common.util.StringUtil;
21+
import org.apache.inlong.sdk.transform.decode.SourceData;
22+
import org.apache.inlong.sdk.transform.process.Context;
23+
import org.apache.inlong.sdk.transform.process.parser.ValueParser;
24+
25+
import lombok.extern.slf4j.Slf4j;
26+
import net.sf.jsqlparser.expression.operators.relational.LikeExpression;
27+
28+
import java.util.regex.Pattern;
29+
30+
/**
31+
* LikeOperator
32+
*
33+
*/
34+
@Slf4j
35+
@TransformOperator(values = LikeExpression.class)
36+
public class LikeOperator implements ExpressionOperator {
37+
38+
private final ValueParser destParser;
39+
private final ValueParser patternParser;
40+
private final ValueParser escapeParser;
41+
private final boolean isNot;
42+
private static final String REGEX_SPECIAL_CHAR = "[]()|^-+*?{}$\\.";
43+
44+
public LikeOperator(LikeExpression expr) {
45+
destParser = OperatorTools.buildParser(expr.getLeftExpression());
46+
patternParser = OperatorTools.buildParser(expr.getRightExpression());
47+
escapeParser = OperatorTools.buildParser(expr.getEscape());
48+
isNot = expr.isNot();
49+
}
50+
51+
private String buildLikeRegex(String pattern, char escapeChar) {
52+
int len = pattern.length();
53+
StringBuilder regexPattern = new StringBuilder(len + len);
54+
for (int i = 0; i < len; i++) {
55+
char c = pattern.charAt(i);
56+
if (REGEX_SPECIAL_CHAR.indexOf(c) >= 0) {
57+
regexPattern.append('\\');
58+
}
59+
if (c == escapeChar) {
60+
if (i == (pattern.length() - 1)) {
61+
// At the end of a string, the escape character represents itself
62+
regexPattern.append(c);
63+
continue;
64+
}
65+
char nextChar = pattern.charAt(i + 1);
66+
if (nextChar == '_' || nextChar == '%' || nextChar == escapeChar) {
67+
regexPattern.append(nextChar);
68+
i++;
69+
} else {
70+
throw new RuntimeException("Illegal pattern string");
71+
}
72+
} else if (c == '_') {
73+
regexPattern.append('.');
74+
} else if (c == '%') {
75+
regexPattern.append("(?s:.*)");
76+
} else {
77+
regexPattern.append(c);
78+
}
79+
}
80+
return regexPattern.toString();
81+
}
82+
83+
/**
84+
* check
85+
* @param sourceData
86+
* @param rowIndex
87+
* @return
88+
*/
89+
@Override
90+
public boolean check(SourceData sourceData, int rowIndex, Context context) {
91+
Object destObj = destParser.parse(sourceData, rowIndex, context);
92+
Object patternObj = patternParser.parse(sourceData, rowIndex, context);
93+
if (destObj == null || patternObj == null) {
94+
return false;
95+
}
96+
char escapeChr = '\\';
97+
if (escapeParser != null) {
98+
Object escapeObj = this.escapeParser.parse(sourceData, rowIndex, context);
99+
if (!StringUtil.isEmpty(escapeObj)) {
100+
escapeChr = escapeObj.toString().charAt(0);
101+
}
102+
}
103+
String destStr = destObj.toString();
104+
String pattern = patternObj.toString();
105+
try {
106+
final String regex = buildLikeRegex(pattern, escapeChr);
107+
boolean isMatch = Pattern.matches(regex.toLowerCase(), destStr.toLowerCase());
108+
if (isNot) {
109+
return !isMatch;
110+
}
111+
return isMatch;
112+
} catch (Exception e) {
113+
log.error(e.getMessage(), e);
114+
return false;
115+
}
116+
}
117+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.inlong.sdk.transform.process.parser;
19+
20+
import org.apache.inlong.sdk.transform.decode.SourceData;
21+
import org.apache.inlong.sdk.transform.process.Context;
22+
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
23+
24+
import net.sf.jsqlparser.expression.ArrayExpression;
25+
26+
import java.util.List;
27+
28+
/**
29+
* ArrayParser
30+
* Description: Support to get the value from array
31+
*/
32+
@TransformParser(values = ArrayExpression.class)
33+
public class ArrayParser implements ValueParser {
34+
35+
private final ValueParser left;
36+
37+
private final ValueParser right;
38+
39+
public ArrayParser(ArrayExpression expr) {
40+
this.left = OperatorTools.buildParser(expr.getObjExpression());
41+
this.right = OperatorTools.buildParser(expr.getIndexExpression());
42+
}
43+
44+
@Override
45+
public Object parse(SourceData sourceData, int rowIndex, Context context) {
46+
Object leftValue = this.left.parse(sourceData, rowIndex, context);
47+
Object rightValue = this.right.parse(sourceData, rowIndex, context);
48+
49+
if (leftValue instanceof List<?> && rightValue instanceof Number) {
50+
List<?> leftObj = (List<?>) leftValue;
51+
Number rightObj = (Number) rightValue;
52+
return leftObj.get(rightObj.intValue());
53+
}
54+
return null;
55+
}
56+
}

0 commit comments

Comments
 (0)