Skip to content

Commit 2756ce4

Browse files
author
Kyle Dong
committed
Added ENHANCED_SPLIT_INDEX function
1 parent 478ca3d commit 2756ce4

File tree

1 file changed

+32
-0
lines changed

1 file changed

+32
-0
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package com.tencent.cloud.oceanus.udf;
2+
3+
import org.apache.flink.table.functions.ScalarFunction;
4+
import org.slf4j.Logger;
5+
import org.slf4j.LoggerFactory;
6+
7+
import java.util.regex.Pattern;
8+
9+
/**
10+
* 增强版的 ENHANCED_SPLIT_INDEX, 对连续的分隔符会如实处理, 例如输入 "1,2,,3" 会从 ["1","2","","3"] 中取字段
11+
* Flink 自带的 SPLIT_INDEX 函数会把多个分隔符当作一个, 例如输入 "1,2,,3" 会从 ["1","2","3"] 中取字段
12+
*
13+
* SQL 代码声明方式:
14+
* CREATE TEMPORARY SYSTEM FUNCTION ENHANCED_SPLIT_INDEX AS 'com.tencent.cloud.oceanus.udf.EnhancedSplitIndex' LANGUAGE JAVA;
15+
*/
16+
public class EnhancedSplitIndex extends ScalarFunction {
17+
private static final Logger LOGGER = LoggerFactory.getLogger(EnhancedSplitIndex.class);
18+
19+
public String eval(String input, String separator, int index) {
20+
String[] splits = input.split(Pattern.quote(separator));
21+
22+
if (index >= splits.length || index < 0) {
23+
return null;
24+
}
25+
return splits[index];
26+
}
27+
28+
public static void main(String[] args) {
29+
EnhancedSplitIndex instance = new EnhancedSplitIndex();
30+
System.out.println(instance.eval("a|b|c|d|e|f|0|||||||1|0|2|4", "|", 0));
31+
}
32+
}

0 commit comments

Comments
 (0)