Skip to content

Commit 6e8d53c

Browse files
committed
More exhaustive athena types
1 parent abe92af commit 6e8d53c

1 file changed

Lines changed: 41 additions & 3 deletions

File tree

sdk/python/feast/type_map.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import decimal
1616
import json
1717
import logging
18+
import re
1819
import uuid as uuid_module
1920
from collections import defaultdict
2021
from datetime import datetime, timezone
@@ -2097,25 +2098,62 @@ def pg_type_code_to_arrow(code: int) -> str:
20972098

20982099
def athena_to_feast_value_type(athena_type_as_str: str) -> ValueType:
20992100
# Type names from https://docs.aws.amazon.com/athena/latest/ug/data-types.html
2101+
athena_type = athena_type_as_str.lower().strip()
2102+
if athena_type.startswith("array"):
2103+
inner_type_match = re.search(r'(?:<|\[)(.+)(?:>|\])', athena_type)
2104+
if inner_type_match:
2105+
inner_type = inner_type_match.group(1).strip()
2106+
inner_feast_type = athena_to_feast_value_type(inner_type)
2107+
2108+
list_mapping = {
2109+
ValueType.BYTES: ValueType.BYTES_LIST,
2110+
ValueType.STRING: ValueType.STRING_LIST,
2111+
ValueType.INT32: ValueType.INT32_LIST,
2112+
ValueType.INT64: ValueType.INT64_LIST,
2113+
ValueType.DOUBLE: ValueType.DOUBLE_LIST,
2114+
ValueType.FLOAT: ValueType.FLOAT_LIST,
2115+
ValueType.BOOL: ValueType.BOOL_LIST,
2116+
ValueType.UNIX_TIMESTAMP: ValueType.UNIX_TIMESTAMP_LIST,
2117+
ValueType.MAP: ValueType.MAP_LIST,
2118+
ValueType.JSON: ValueType.JSON_LIST,
2119+
ValueType.STRUCT: ValueType.STRUCT_LIST,
2120+
ValueType.UUID: ValueType.UUID_LIST,
2121+
ValueType.DECIMAL: ValueType.DECIMAL_LIST,
2122+
}
2123+
return list_mapping.get(inner_feast_type, ValueType.VALUE_LIST)
2124+
return ValueType.VALUE_LIST
2125+
2126+
base_type = re.split(r'[(<\[]', athena_type)[0].strip()
2127+
2128+
if "timestamp" in base_type or "time" in base_type or "date" in base_type:
2129+
return ValueType.UNIX_TIMESTAMP
2130+
21002131
type_map = {
2101-
"null": ValueType.UNKNOWN,
2132+
"null": ValueType.NULL,
21022133
"boolean": ValueType.BOOL,
21032134
"tinyint": ValueType.INT32,
21042135
"smallint": ValueType.INT32,
21052136
"int": ValueType.INT32,
2137+
"integer": ValueType.INT32,
21062138
"bigint": ValueType.INT64,
21072139
"double": ValueType.DOUBLE,
21082140
"float": ValueType.FLOAT,
2141+
"real": ValueType.FLOAT,
2142+
"decimal": ValueType.DECIMAL,
21092143
"binary": ValueType.BYTES,
2144+
"varbinary": ValueType.BYTES,
21102145
"char": ValueType.STRING,
21112146
"varchar": ValueType.STRING,
21122147
"string": ValueType.STRING,
2113-
"timestamp": ValueType.UNIX_TIMESTAMP,
21142148
"json": ValueType.JSON,
21152149
"struct": ValueType.STRUCT,
2150+
"row": ValueType.STRUCT,
21162151
"map": ValueType.MAP,
2152+
"uuid": ValueType.UUID,
2153+
"ipaddress": ValueType.STRING,
21172154
}
2118-
return type_map[athena_type_as_str.lower()]
2155+
2156+
return type_map.get(base_type, ValueType.UNKNOWN)
21192157

21202158

21212159
def pa_to_athena_value_type(pa_type: "pyarrow.DataType") -> str:

0 commit comments

Comments
 (0)