Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions markdown_frames/spark_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
make_table,
get_column_names_types,
get_data_from_table,
get_python_type
get_python_type,
get_array_inside_type
)
from markdown_frames.type_definitions import (
STRING,
Expand Down Expand Up @@ -138,6 +139,6 @@ def _array_type(column_type: str) -> ArrayType:
column_type.
:returns: ArrayType
"""
inside = column_type[6:-1].strip()
inside = get_array_inside_type(column_type)

return ArrayType(_types_mapping(inside))
14 changes: 14 additions & 0 deletions markdown_frames/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import List, Any, Optional
from datetime import datetime
from ast import literal_eval
import re

from markdown_frames.type_definitions import (
NULL,
Expand Down Expand Up @@ -94,3 +95,16 @@ def get_python_type(value_type: List[str]) -> Optional[Any]:
return literal_eval(value)
else:
return None

def get_array_inside_type(column_type: str) -> str:
"""
Given column_type string, extract
array inside pattern using regex.
:param column_type: string description of
column_type.
:returns: Str
"""
matchObj = re.match("array\<(.*)\>", column_type)
if matchObj:
return matchObj.group(1).strip()
return None
22 changes: 22 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
get_column_names_types,
get_data_from_table,
get_python_type,
get_array_inside_type
)


Expand Down Expand Up @@ -217,3 +218,24 @@ def test_get_python_type():
assert output6 == expected6
assert output7 == expected7
assert output8 == expected8

def test_get_array_type():
"""
Test fucntion that given array type string pattern
extract inside type
"""
input1 = "array<int>"
input2 = "array<int >"
input3 = "array<array<str>>"

expected1 = "int"
expected2 = "int"
expected3 = "array<str>"

output1 = get_array_inside_type(input1)
output2 = get_array_inside_type(input2)
output3 = get_array_inside_type(input3)

assert output1 == expected1
assert output2 == expected2
assert output3 == expected3