Skip to content

Commit 9a79697

Browse files
authored
[INLONG-12019][SDK] Transformation supports a caching mechanism for processing identical function parameters (#12020)
1 parent cd3d44a commit 9a79697

File tree

5 files changed

+114
-9
lines changed

5 files changed

+114
-9
lines changed

inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/Context.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,7 @@ public Long getLong(String key) {
8989
return null;
9090
}
9191

92+
public Map<String, Object> getRuntimeParams() {
93+
return runtimeParams;
94+
}
9295
}

inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/TransformProcessor.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ public List<O> transform(I input, Map<String, Object> extParams) {
180180
sinkData.addField(fieldName, "");
181181
} else {
182182
sinkData.addField(fieldName, fieldValue.toString());
183+
context.put(fieldName, fieldValue);
183184
}
184185
} catch (Throwable t) {
185186
sinkData.addField(fieldName, "");

inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/ParseUrlFunction.java

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,31 +57,40 @@ public class ParseUrlFunction implements ValueParser {
5757
private ValueParser urlParser;
5858
private ValueParser partParser;
5959
private ValueParser keyParser;
60+
private final String exprKey;
6061

6162
public ParseUrlFunction(Function expr) {
6263
List<Expression> params = expr.getParameters().getExpressions();
6364
urlParser = OperatorTools.buildParser(params.get(0));
6465
partParser = params.size() > 1 ? OperatorTools.buildParser(params.get(1)) : null;
6566
keyParser = params.size() > 2 ? OperatorTools.buildParser(params.get(2)) : null;
67+
exprKey = expr.toString();
6668
}
6769

6870
@Override
6971
public Object parse(SourceData sourceData, int rowIndex, Context context) {
72+
Map<String, Object> runtimeParams = context.getRuntimeParams();
73+
if (runtimeParams.containsKey(exprKey)) {
74+
return runtimeParams.get(exprKey);
75+
}
7076
if (urlParser == null || partParser == null) {
77+
runtimeParams.put(exprKey, null);
7178
return null;
7279
}
7380
Object urlObj = urlParser.parse(sourceData, rowIndex, context);
7481
Object partObj = partParser.parse(sourceData, rowIndex, context);
7582
Object keyObj = keyParser != null ? keyParser.parse(sourceData, rowIndex, context) : null;
7683

7784
if (urlObj == null || partObj == null) {
85+
runtimeParams.put(exprKey, null);
7886
return null;
7987
}
8088

8189
String url = OperatorTools.parseString(urlObj);
8290
String part = OperatorTools.parseString(partObj);
8391
String key = keyObj != null ? OperatorTools.parseString(keyObj) : null;
8492
if (keyParser != null && key == null) {
93+
runtimeParams.put(exprKey, null);
8594
return null;
8695
}
8796

@@ -95,6 +104,7 @@ public Object parse(SourceData sourceData, int rowIndex, Context context) {
95104
}
96105
Map<String, String> queryPairs = splitQuery(strQuery);
97106
if (key == null) {
107+
runtimeParams.put(exprKey, strQuery);
98108
return strQuery;
99109
}
100110
return queryPairs.getOrDefault(key, "");
@@ -103,23 +113,39 @@ public Object parse(SourceData sourceData, int rowIndex, Context context) {
103113
URL netUrl = new URL(url);
104114
switch (part) {
105115
case "HOST":
106-
return netUrl.getHost();
116+
String exprValue = netUrl.getHost();
117+
runtimeParams.put(exprKey, exprValue);
118+
return exprValue;
107119
case "PATH":
108-
return netUrl.getPath();
120+
exprValue = netUrl.getPath();
121+
runtimeParams.put(exprKey, exprValue);
122+
return exprValue;
109123
case "REF":
110-
return netUrl.getRef();
124+
exprValue = netUrl.getRef();
125+
runtimeParams.put(exprKey, exprValue);
126+
return exprValue;
111127
case "PROTOCOL":
112-
return netUrl.getProtocol();
128+
exprValue = netUrl.getProtocol();
129+
runtimeParams.put(exprKey, exprValue);
130+
return exprValue;
113131
case "AUTHORITY":
114-
return netUrl.getAuthority();
132+
exprValue = netUrl.getAuthority();
133+
runtimeParams.put(exprKey, exprValue);
134+
return exprValue;
115135
case "FILE":
116-
return netUrl.getFile();
136+
exprValue = netUrl.getFile();
137+
runtimeParams.put(exprKey, exprValue);
138+
return exprValue;
117139
case "USERINFO":
118-
return netUrl.getUserInfo();
140+
exprValue = netUrl.getUserInfo();
141+
runtimeParams.put(exprKey, exprValue);
142+
return exprValue;
119143
default:
144+
runtimeParams.put(exprKey, null);
120145
return null;
121146
}
122147
} catch (MalformedURLException e) {
148+
runtimeParams.put(exprKey, null);
123149
return null;
124150
}
125151
}

inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlDecodeFunction.java

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import java.net.URLDecoder;
3131
import java.nio.charset.StandardCharsets;
3232
import java.util.List;
33+
import java.util.Map;
3334

3435
/**
3536
* UrlDecodeFunction -> url_decode(str[, charset])
@@ -53,39 +54,54 @@ public class UrlDecodeFunction implements ValueParser {
5354

5455
private final ValueParser stringParser;
5556
private final ValueParser charsetParser;
57+
private final String exprKey;
5658

5759
public UrlDecodeFunction(Function expr) {
5860
List<Expression> params = expr.getParameters().getExpressions();
5961
stringParser = OperatorTools.buildParser(params.get(0));
6062
charsetParser = params.size() > 1 ? OperatorTools.buildParser(params.get(1)) : null;
63+
exprKey = expr.toString();
6164
}
6265

6366
@Override
6467
public Object parse(SourceData sourceData, int rowIndex, Context context) {
68+
Map<String, Object> runtimeParams = context.getRuntimeParams();
69+
if (runtimeParams.containsKey(exprKey)) {
70+
return runtimeParams.get(exprKey);
71+
}
6572
Object stringObj = stringParser.parse(sourceData, rowIndex, context);
6673
if (stringObj == null) {
74+
runtimeParams.put(exprKey, null);
6775
return null;
6876
}
6977
String string = OperatorTools.parseString(stringObj);
7078
if (string == null) {
79+
runtimeParams.put(exprKey, null);
7180
return null;
7281
}
7382

7483
try {
7584
if (charsetParser == null) {
76-
return URLDecoder.decode(string, StandardCharsets.UTF_8.toString());
85+
String exprValue = URLDecoder.decode(string, StandardCharsets.UTF_8.toString());
86+
runtimeParams.put(exprKey, exprValue);
87+
return exprValue;
7788
} else {
7889
Object charsetObj = charsetParser.parse(sourceData, rowIndex, context);
7990
if (charsetObj == null) {
91+
runtimeParams.put(exprKey, null);
8092
return null;
8193
}
8294
String charset = OperatorTools.parseString(charsetObj);
8395
if (charset == null) {
96+
runtimeParams.put(exprKey, null);
8497
return null;
8598
}
86-
return URLDecoder.decode(string, charset);
99+
String exprValue = URLDecoder.decode(string, charset);
100+
runtimeParams.put(exprKey, exprValue);
101+
return exprValue;
87102
}
88103
} catch (Exception e) {
104+
runtimeParams.put(exprKey, null);
89105
return null;
90106
}
91107
}

inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/processor/TestCsv2KvProcessor.java

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,4 +139,63 @@ public void testCsv2CsvSplit() throws Exception {
139139
Assert.assertEquals(output1.get(0),
140140
"20250101|2025-01-01 01:01:01.001|dt_imp|2025-01-01 01:01:01.001|12345678|123456|android|PJV110;Android 15,level 35|15|OPPO|PJV110|china|guangdong|shenzhen|wifi|12345678|1.2.0.12345|mobileapp|12345678|pg_sgrp_test||search|||MNJT|{\"A88\":\"12345678\",\"A89\":\"12345678\",\"A48\":\"\",\"dt_wxopenid\":\"\",\"dt_seqtime\":\"12345678\",\"app_bld\":\"12345678\",\"A100\":\"12345678\",\"dt_fchlid\":\"\",\"A1\":\"12345678\",\"os_vrsn\":\"Android 15\",\"A3\":\"12345678\",\"dt_mchlid\":\"\",\"dt_usstmp\":\"12345678\",\"client_process_name\":\"com.tencent.mobileqq\",\"dt_guid\":\"12345678\",\"A8\":\"12345678\",\"dt_callfrom\":\"0\",\"A9\":\"OPPO\",\"dt_qq\":\"12345678\",\"tianshu_id\":\"\",\"dt_element_params\":\"[{\\\\\"eid\\\\\":\\\\\"search\\\\\"}]\",\"dt_ussn\":\"12345678\",\"dt_ele_reuse_id\":\"\",\"A95\":\"1.2.0.12345\",\"A52\":\"480\",\"dt_qqopenid\":\"\",\"A10\":\"PJV110\",\"A99\":\"N\",\"A12\":\"zh\",\"dt_ele_scroll_flag\":\"0\",\"dt_eid\":\"search\",\"dt_seqid\":\"1480\",\"A58\":\"N\",\"param_is_gray_version\":\"false\",\"A17\":\"1080*2244\",\"A19\":\"wifi\",\"A159\":\"N\",\"red_pot\":\"0\",\"dt_ele_is_first_scroll_imp\":\"0\",\"dt_wbopenid\":\"\",\"A157\":\"1.2.0.12345\",\"A158\":\"12345678\",\"A156\":\"N\",\"A153\":\"123456\",\"qq_appid\":\"12345678\",\"dt_wxunionid\":\"\",\"ui_vrsn\":\"PJV(CN01)\",\"dt_sdkversion\":\"2445\",\"dt_coldstart\":\"0\",\"A67\":\"mobileapp\",\"A23\":\"12345678\",\"client_page_name\":\"page\",\"dt_starttype\":\"1\",\"dt123456\":\"0\",\"callfrom_type\":\"0\",\"A160\":\"shenzhen\",\"dt_tid\":\"\",\"dt_usid\":\"12345678\",\"A72\":\"1.2.3.4\",\"param_patch_version\":\"0\",\"A31\":\",,\",\"A76\":\"1.2.3.4\",\"dt_omgbzid\":\"\",\"A34\":\"12345678\",\"dt_mainlogin\":\"\",\"os\":\"1\",\"message_box\":\"{ \\\\t\\\\\"message_unread\\\\\": 0, \\\\t\\\\\"other_unread\\\\\": 0, \\\\t\\\\\"validation_message_unread\\\\\": 0, \\\\t\\\\\"1\\\\\": 0}\",\"dt_protoversion\":\"1\",\"dt_callschema\":\"1\",\"dt_simtype\":\"3\",\"dt_pgid\":\"pg_sgrp_test\",\"dt_oaid\":\"\",\"dt_adcode\":\"\",\"dt_accountid\":\"12345678\",\"app_vr\":\"1.2.3\"}|{\"eid\":\"search\",\"cur_pg\":{}}|1|1200");
141141
}
142+
143+
@Test
144+
public void testCsv2CsvRuntimesMap() throws Exception {
145+
List<FieldInfo> sourceFields = this.getTestFieldList("ftime", "extinfo", "country", "province", "operator",
146+
"apn", "gw", "src_ip_head", "info_str", "product_id", "app_version", "sdk_id", "sdk_version",
147+
"hardware_os", "qua", "upload_ip", "client_ip", "upload_apn", "event_code", "event_result",
148+
"package_size", "consume_time", "event_value", "event_time", "upload_time");
149+
List<FieldInfo> sinkFields = this.getTestFieldList("imp_hour", "ftime", "event_code", "event_time", "log_id",
150+
"qimei36", "platform", "hardware_os", "os_version", "brand", "model", "country", "province", "city",
151+
"network_type", "dt_qq", "app_version", "boundle_id", "dt_usid", "dt_pgid", "dt_ref_pgid", "dt_eid",
152+
"dt_element_lvtm", "dt_lvtm", "product_id", "biz_pub_params", "udf_kv", "sdk_type", "app_version_num");
153+
CsvSourceInfo csvSource = new CsvSourceInfo("UTF-8", '|', '\\', sourceFields);
154+
CsvSinkInfo csvSink = new CsvSinkInfo("UTF-8", '|', '\\', sinkFields);
155+
String transformSql = "select replace(substr(ftime,1,10),'-','') as imp_hour,"
156+
+ "url_decode(event_value,'GBK') as decode_event_value,"
157+
+ "url_decode(hardware_os,'GBK') as decode_hardvalue_os,"
158+
+ "lower($ctx.decode_hardvalue_os) as lower_hardvalue_os,"
159+
+ "ftime as ftime,event_code as event_code,"
160+
+ "event_time as event_time,"
161+
+ "parse_url($ctx.decode_event_value,'QUERY','A100') as log_id,"
162+
+ "parse_url($ctx.decode_event_value,'QUERY','A153') as qimei36,"
163+
+ "case when $ctx.lower_hardvalue_os like '%android%' then 'android' when $ctx.lower_hardvalue_os like '%ipad%' then 'ipad' when $ctx.lower_hardvalue_os like '%iphone%' then 'iphone' when $ctx.lower_hardvalue_os like '%harmony%' then 'harmony' when $ctx.lower_hardvalue_os like '%windows%' then 'windows' when $ctx.lower_hardvalue_os like '%mac%' then 'mac' when $ctx.lower_hardvalue_os like '%linux%' then 'linux' else 'unknown' end as platform,"
164+
+ "$ctx.decode_hardvalue_os as hardware_os,"
165+
+ "trim(case when hardware_os LIKE '%Android%' then regexp_extract($ctx.decode_hardvalue_os, 'Android(.+),level', 1) when hardware_os LIKE '%iPhone%' then regexp_extract($ctx.decode_hardvalue_os, 'OS(.+)\\\\(', 1) when hardware_os LIKE '%Harmony%' then regexp_extract($ctx.decode_hardvalue_os, 'Harmony\\\\s+[^\\\\s]+\\\\s+([^\\\\s]+)\\\\(', 1) else 'unknown' end) as os_version,"
166+
+ "parse_url($ctx.decode_event_value,'QUERY','A9') as brand,"
167+
+ "parse_url($ctx.decode_event_value,'QUERY','A10') as model,"
168+
+ "country as country,"
169+
+ "province as province,"
170+
+ "parse_url($ctx.decode_event_value,'QUERY','A160') as city,"
171+
+ "parse_url($ctx.decode_event_value,'QUERY','A19') as network_type,"
172+
+ "parse_url($ctx.decode_event_value,'QUERY','dt_qq') as dt_qq,"
173+
+ "url_decode(app_version,'GBK') as app_version,"
174+
+ "parse_url($ctx.decode_event_value,'QUERY','A67') as boundle_id,"
175+
+ "parse_url($ctx.decode_event_value,'QUERY','dt_usid') as dt_usid,"
176+
+ "parse_url($ctx.decode_event_value,'QUERY','dt_pgid') as dt_pgid,"
177+
+ "parse_url($ctx.decode_event_value,'QUERY','dt_ref_pgid') as dt_ref_pgid,"
178+
+ "parse_url($ctx.decode_event_value,'QUERY','dt_eid') as dt_eid,"
179+
+ "parse_url($ctx.decode_event_value,'QUERY','dt_element_lvtm') as dt_element_lvtm,"
180+
+ "parse_url($ctx.decode_event_value,'QUERY','dt_lvtm') as dt_lvtm,"
181+
+ "product_id as product_id,"
182+
+ "json_remove(str_to_json($ctx.decode_event_value,'&','='),'udf_kv') as biz_pub_params,"
183+
+ "parse_url($ctx.decode_event_value,'QUERY','udf_kv') as udf_kv,"
184+
+ "case when sdk_id='js' then 1 when sdk_id='weapp' then 2 else 0 end as sdk_type,"
185+
+ "split_index(app_version,'\\.',0)*1000+split_index(app_version,'\\.',1)*100+split_index(split_index(app_version,'\\.',2),'\\(',0) as app_version_num "
186+
+ "from source where parse_url(url_decode(event_value,'GBK'),'QUERY','dt_pgid') like 'pg_sgrp_%'";
187+
System.out.println(transformSql);
188+
TransformConfig config = new TransformConfig(transformSql, new HashMap<>(), false, true);
189+
// case1
190+
TransformProcessor<String, String> processor1 = TransformProcessor
191+
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
192+
SinkEncoderFactory.createCsvEncoder(csvSink));
193+
String sourceData =
194+
"2025-01-01 01:01:01.001|extinfo=127.0.0.1|china|guangdong|unite|unknown|unknown|127.0.0.1 2025-01-01 01:01:01.001|INFO|MNJT|1.2.0.12345|js|1.2.3.4-qqvideo6|PJV110%3BAndroid+15%2Clevel+35||127.0.0.1|127.0.0.1|wifi|dt_imp|true|0|0|A9%3DOPPO%26A89%3D12345678%26A76%3D1.2.3.4%26A58%3DN%26A52%3D480%26A17%3D1080*2244%26A12%3Dzh%26A10%3DPJV110%26A158%3D12345678%26A67%3Dmobileapp%26A159%3DN%26A31%3D%2C%2C%26A160%3Dshenzhen%26ui_vrsn%3DPJV%28CN01%29%26udf_kv%3D%7B%22eid%22%3A%22search%22%2C%22cur_pg%22%3A%7B%7D%7D%26tianshu_id%3D%26red_pot%3D0%26param_patch_version%3D0%26message_box%3D%7B+%09%22message_unread%22%3A+0%2C+%09%22other_unread%22%3A+0%2C+%09%22validation_message_unread%22%3A+0%2C+%09%221%22%3A+0%7D%26dt_wxunionid%3D%26dt_wxopenid%3D%26param_is_gray_version%3Dfalse%26dt_usstmp%3D12345678%26dt_ussn%3D12345678%26dt_tid%3D%26dt_simtype%3D3%26os_vrsn%3DAndroid+15%26dt_seqid%3D1480%26dt_sdkversion%3D2445%26dt_qqopenid%3D%26dt_qq%3D12345678%26dt_usid%3D12345678%26dt_protoversion%3D1%26A99%3DN%26callfrom_type%3D0%26dt_ele_reuse_id%3D%26dt_omgbzid%3D%26dt_ele_scroll_flag%3D0%26dt_element_params%3D%5B%7B%22eid%22%3A%22search%22%7D%5D%26app_bld%3D12345678%26dt_ele_is_first_scroll_imp%3D0%26A88%3D12345678%26A48%3D%26A95%3D1.2.0.12345%26A19%3Dwifi%26A3%3D12345678%26dt_seqtime%3D12345678%26dt_pgid%3Dpg_sgrp_test%26dt_adcode%3D%26dt_oaid%3D%26qq_appid%3D12345678%26dt_starttype%3D1%26A100%3D12345678%26dt_wbopenid%3D%26A23%3D12345678%26A156%3DN%26A72%3D1.2.3.4%26A157%3D1.2.0.12345%26dt_mainlogin%3D%26A34%3D12345678%26A153%3D123456%26dt_coldstart%3D0%26app_vr%3D1.2.3%26A8%3D12345678%26client_page_name%3Dpage%26dt123456%3D0%26dt_mchlid%3D%26client_process_name%3Dcom.tencent.mobileqq%26os%3D1%26dt_accountid%3D12345678%26dt_callfrom%3D0%26dt_eid%3Dsearch%26dt_guid%3D12345678%26A1%3D12345678%26dt_callschema%3D1%26dt_fchlid%3D|2025-01-01 01:01:01.001|2025-08-07 16:39:26";
195+
List<String> output1 = processor1.transform(sourceData, new HashMap<>());
196+
Assert.assertEquals(1, output1.size());
197+
System.out.println(output1.get(0));
198+
Assert.assertEquals(output1.get(0),
199+
"20250101|2025-01-01 01:01:01.001|dt_imp|2025-01-01 01:01:01.001|12345678|123456|android|PJV110;Android 15,level 35|15|OPPO|PJV110|china|guangdong|shenzhen|wifi|12345678|1.2.0.12345|mobileapp|12345678|pg_sgrp_test||search|||MNJT|{\"A88\":\"12345678\",\"A89\":\"12345678\",\"A48\":\"\",\"dt_wxopenid\":\"\",\"dt_seqtime\":\"12345678\",\"app_bld\":\"12345678\",\"A100\":\"12345678\",\"dt_fchlid\":\"\",\"A1\":\"12345678\",\"os_vrsn\":\"Android 15\",\"A3\":\"12345678\",\"dt_mchlid\":\"\",\"dt_usstmp\":\"12345678\",\"client_process_name\":\"com.tencent.mobileqq\",\"dt_guid\":\"12345678\",\"A8\":\"12345678\",\"dt_callfrom\":\"0\",\"A9\":\"OPPO\",\"dt_qq\":\"12345678\",\"tianshu_id\":\"\",\"dt_element_params\":\"[{\\\\\"eid\\\\\":\\\\\"search\\\\\"}]\",\"dt_ussn\":\"12345678\",\"dt_ele_reuse_id\":\"\",\"A95\":\"1.2.0.12345\",\"A52\":\"480\",\"dt_qqopenid\":\"\",\"A10\":\"PJV110\",\"A99\":\"N\",\"A12\":\"zh\",\"dt_ele_scroll_flag\":\"0\",\"dt_eid\":\"search\",\"dt_seqid\":\"1480\",\"A58\":\"N\",\"param_is_gray_version\":\"false\",\"A17\":\"1080*2244\",\"A19\":\"wifi\",\"A159\":\"N\",\"red_pot\":\"0\",\"dt_ele_is_first_scroll_imp\":\"0\",\"dt_wbopenid\":\"\",\"A157\":\"1.2.0.12345\",\"A158\":\"12345678\",\"A156\":\"N\",\"A153\":\"123456\",\"qq_appid\":\"12345678\",\"dt_wxunionid\":\"\",\"ui_vrsn\":\"PJV(CN01)\",\"dt_sdkversion\":\"2445\",\"dt_coldstart\":\"0\",\"A67\":\"mobileapp\",\"A23\":\"12345678\",\"client_page_name\":\"page\",\"dt_starttype\":\"1\",\"dt123456\":\"0\",\"callfrom_type\":\"0\",\"A160\":\"shenzhen\",\"dt_tid\":\"\",\"dt_usid\":\"12345678\",\"A72\":\"1.2.3.4\",\"param_patch_version\":\"0\",\"A31\":\",,\",\"A76\":\"1.2.3.4\",\"dt_omgbzid\":\"\",\"A34\":\"12345678\",\"dt_mainlogin\":\"\",\"os\":\"1\",\"message_box\":\"{ \\\\t\\\\\"message_unread\\\\\": 0, \\\\t\\\\\"other_unread\\\\\": 0, \\\\t\\\\\"validation_message_unread\\\\\": 0, \\\\t\\\\\"1\\\\\": 0}\",\"dt_protoversion\":\"1\",\"dt_callschema\":\"1\",\"dt_simtype\":\"3\",\"dt_pgid\":\"pg_sgrp_test\",\"dt_oaid\":\"\",\"dt_adcode\":\"\",\"dt_accountid\":\"12345678\",\"app_vr\":\"1.2.3\"}|{\"eid\":\"search\",\"cur_pg\":{}}|1|1200");
200+
}
142201
}

0 commit comments

Comments
 (0)