Skip to content

Commit

Permalink
[Bug](udf) forbid varchar type and convert to use string type in java…
Browse files Browse the repository at this point in the history
…-udf (apache#38409)

## Proposed changes
Two point have change:
1. forbid create udf function use varchar type, could use string type.2.
2. in order to the compatibly of old version, convert varchar to string
type to execute udf function.

<!--Describe your changes.-->
  • Loading branch information
zhangstar333 authored Aug 23, 2024
1 parent bd8df44 commit ab359e2
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ public class JavaUdfDataType {
public static final JavaUdfDataType BIGINT = new JavaUdfDataType("BIGINT", TPrimitiveType.BIGINT, 8);
public static final JavaUdfDataType FLOAT = new JavaUdfDataType("FLOAT", TPrimitiveType.FLOAT, 4);
public static final JavaUdfDataType DOUBLE = new JavaUdfDataType("DOUBLE", TPrimitiveType.DOUBLE, 8);
public static final JavaUdfDataType CHAR = new JavaUdfDataType("CHAR", TPrimitiveType.CHAR, 0);
public static final JavaUdfDataType VARCHAR = new JavaUdfDataType("VARCHAR", TPrimitiveType.VARCHAR, 0);
public static final JavaUdfDataType STRING = new JavaUdfDataType("STRING", TPrimitiveType.STRING, 0);
public static final JavaUdfDataType DATE = new JavaUdfDataType("DATE", TPrimitiveType.DATE, 8);
public static final JavaUdfDataType DATETIME = new JavaUdfDataType("DATETIME", TPrimitiveType.DATETIME, 8);
Expand Down Expand Up @@ -72,8 +70,6 @@ public class JavaUdfDataType {
JavaUdfDataTypeSet.add(BIGINT);
JavaUdfDataTypeSet.add(FLOAT);
JavaUdfDataTypeSet.add(DOUBLE);
JavaUdfDataTypeSet.add(CHAR);
JavaUdfDataTypeSet.add(VARCHAR);
JavaUdfDataTypeSet.add(STRING);
JavaUdfDataTypeSet.add(DATE);
JavaUdfDataTypeSet.add(DATETIME);
Expand Down Expand Up @@ -142,7 +138,9 @@ public static Set<JavaUdfDataType> getCandidateTypes(Class<?> c) {
} else if (c == double.class || c == Double.class) {
return Sets.newHashSet(JavaUdfDataType.DOUBLE);
} else if (c == char.class || c == Character.class) {
return Sets.newHashSet(JavaUdfDataType.CHAR);
// some users case have create UDF use varchar as parameter not
// string type, but evaluate is String Class, so set TPrimitiveType is STRING
return Sets.newHashSet(JavaUdfDataType.STRING);
} else if (c == String.class) {
return Sets.newHashSet(JavaUdfDataType.STRING);
} else if (Type.DATE_SUPPORTED_JAVA_TYPE.contains(c)) {
Expand Down Expand Up @@ -171,6 +169,10 @@ public static boolean isSupported(Type t) {
return true;
}
}
if (t.getPrimitiveType().toThrift() == TPrimitiveType.VARCHAR
|| t.getPrimitiveType().toThrift() == TPrimitiveType.CHAR) {
return true;
}
return false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.doris.catalog.Type;
import org.apache.doris.common.Pair;
import org.apache.doris.common.exception.InternalException;
import org.apache.doris.thrift.TPrimitiveType;

import org.apache.log4j.Logger;
import sun.misc.Unsafe;
Expand Down Expand Up @@ -112,8 +113,11 @@ public static Pair<Boolean, JavaUdfDataType> setReturnType(Type retType, Class<?
// Check if the evaluate method return type is compatible with the return type from
// the function definition. This happens when both of them map to the same primitive
// type.
Object[] res = javaTypes.stream().filter(
t -> t.getPrimitiveType() == retType.getPrimitiveType().toThrift()).toArray();
Object[] res = javaTypes.stream().filter(t -> {
TPrimitiveType t1 = t.getPrimitiveType();
TPrimitiveType ret = retType.getPrimitiveType().toThrift();
return (t1 == ret) || (t1 == TPrimitiveType.STRING && ret == TPrimitiveType.VARCHAR);
}).toArray();

JavaUdfDataType result = new JavaUdfDataType(
res.length == 0 ? javaTypes.iterator().next() : (JavaUdfDataType) res[0]);
Expand Down Expand Up @@ -160,8 +164,11 @@ public static Pair<Boolean, JavaUdfDataType[]> setArgTypes(Type[] parameterTypes
for (int i = 0; i < parameterTypes.length; ++i) {
Set<JavaUdfDataType> javaTypes = JavaUdfDataType.getCandidateTypes(udfArgTypes[i + firstPos]);
int finalI = i;
Object[] res = javaTypes.stream().filter(
t -> t.getPrimitiveType() == parameterTypes[finalI].getPrimitiveType().toThrift()).toArray();
Object[] res = javaTypes.stream().filter(t -> {
TPrimitiveType t1 = t.getPrimitiveType();
TPrimitiveType param = parameterTypes[finalI].getPrimitiveType().toThrift();
return (t1 == param) || (t1 == TPrimitiveType.STRING && param == TPrimitiveType.VARCHAR);
}).toArray();
inputArgTypes[i] = new JavaUdfDataType(
res.length == 0 ? javaTypes.iterator().next() : (JavaUdfDataType) res[0]);
if (parameterTypes[finalI].isDecimalV3() || parameterTypes[finalI].isDatetimeV2()) {
Expand Down
2 changes: 0 additions & 2 deletions fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,6 @@ public abstract class Type {
.put(PrimitiveType.DOUBLE, Sets.newHashSet(Double.class, double.class))
.put(PrimitiveType.BIGINT, Sets.newHashSet(Long.class, long.class))
.put(PrimitiveType.IPV4, Sets.newHashSet(Integer.class, int.class))
.put(PrimitiveType.CHAR, Sets.newHashSet(String.class))
.put(PrimitiveType.VARCHAR, Sets.newHashSet(String.class))
.put(PrimitiveType.STRING, Sets.newHashSet(String.class))
.put(PrimitiveType.DATE, DATE_SUPPORTED_JAVA_TYPE)
.put(PrimitiveType.DATEV2, DATE_SUPPORTED_JAVA_TYPE)
Expand Down
16 changes: 16 additions & 0 deletions regression-test/suites/javaudf_p0/test_javaudf_string.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,22 @@ suite("test_javaudf_string") {
test_javaudf_string
JOIN test_javaudf_string_2 ON test_javaudf_string.user_id = test_javaudf_string_2.user_id order by 1,2;
"""
test {
sql """ CREATE FUNCTION java_udf_string_test(varchar, int, int) RETURNS string PROPERTIES (
"file"="file://${jarPath}",
"symbol"="org.apache.doris.udf.StringTest",
"type"="JAVA_UDF"
); """
exception "does not support type"
}
test {
sql """ CREATE FUNCTION java_udf_string_test(string, int, int) RETURNS varchar PROPERTIES (
"file"="file://${jarPath}",
"symbol"="org.apache.doris.udf.StringTest",
"type"="JAVA_UDF"
); """
exception "does not support type"
}
} finally {
try_sql("DROP FUNCTION IF EXISTS java_udf_string_test(string, int, int);")
try_sql("DROP TABLE IF EXISTS ${tableName}")
Expand Down

0 comments on commit ab359e2

Please sign in to comment.