Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docs/config_file.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,12 @@ Default is `ModelLibraryType.SKLEARN`.
`columns (Optional[List[Dict[str, str]]])` - Columns description for model predict method.
Format: List[Dict].

Example [{"name": "col1", "type": "str", "required": True, "default": None}]
Example [{"name": "col1", "type": "str", "required": True, "default": None, "collection_type": "List"}]
name - Column name;
type - Column type in string: int, str, float, bool.
required - bool and optional field. By default is True.
Default - Any data and optional.
default - Any data and optional.
collection_type - Type of collection is optional field. Supported: List. Default is None.

If you not set columns, then columns validation should be False.

Expand Down
2 changes: 1 addition & 1 deletion mlup/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
from mlup.web.app import MLupWebApp, WebAppConfig


__version__ = "0.2.2"
__version__ = "0.2.3"
110 changes: 93 additions & 17 deletions mlup/utils/interspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,14 @@
import inspect
import logging
from enum import Enum
from typing import Callable, Any, Union, Optional, Type
from typing import Callable, Any, Union, Optional, Type, Generic, Sequence, Dict, Tuple

try:
from typing import get_args as typing_get_args, get_origin as typing_get_origin
# For Python <= 3.7
except ImportError:
typing_get_args = lambda t: getattr(t, '__args__', ()) if t is not Generic else Generic # noqa: E731
typing_get_origin = lambda t: getattr(t, '__origin__', None) # noqa: E731

from mlup.constants import IS_X, THERE_IS_ARGS, DEFAULT_X_ARG_NAME, BinarizationType, LoadedFile
from mlup.utils.profiling import TimeProfiler
Expand All @@ -11,6 +18,77 @@
logger = logging.getLogger('mlup')


SUPPORTED_PRIMITIVE_TYPES = {
int: 'int',
float: 'float',
bool: 'bool',
str: 'str',
}


def _is_optional(field: Any) -> bool:
return typing_get_origin(field) is Union and type(None) in typing_get_args(field)


def _is_sequence(field: Any) -> bool:
_type_for_check = typing_get_origin(field)
_collection_types = (list, set, tuple)
try:
return ((field is not None and field in _collection_types)
or (_type_for_check is not None and issubclass(_type_for_check, Sequence)))
except TypeError:
# If _type_for_check not in class object& For example Optional[int].
return False


def parse_attributes_from_generic_type(
param: inspect.Parameter,
) -> Tuple[Dict[str, Any], bool]:
"""
Search and return primitive type from single level of Generic.
If ont found supported types, return default type = str.

:param inspect.Parameter param: Parameter that needs to be parsed.

:return: Attributes from parsed Generic and result parsing.
If bool is True, then parsing was success, else parsing was failure.
{"type": "int", "required": False, "collection_type": None}, True
Key "type" is optional.
Key "collection_type" is optional.
:rtype: Dict[str, Any], bool

"""
result = {
'required': True,
}
_types_for_analyze = typing_get_args(param.annotation)

logger.debug(f"Analyze argument '{param.name}', attempt to pick up determine primitive type.")

if _is_optional(param.annotation):
result['required'] = False
if _is_sequence(param.annotation):
result['collection_type'] = 'List'
if len(_types_for_analyze) > 0 and _is_sequence(_types_for_analyze[0]):
result['collection_type'] = 'List'
_types_for_analyze = typing_get_args(_types_for_analyze[0])

for p in _types_for_analyze:
if p in SUPPORTED_PRIMITIVE_TYPES:
result['type'] = SUPPORTED_PRIMITIVE_TYPES[p]
break

_parse_error = False

if 'type' not in result:
logger.warning(f"Cannot determine primitive type for '{param.name}'.")
_parse_error = True

logger.debug(f"For argument '{param.name}' parsing result '{result}'")

return result, _parse_error


def get_class_by_path(path_to_class: Union[str, Enum]) -> Any:
"""
Get class by path to class. Use importlib.import_module.
Expand Down Expand Up @@ -66,12 +144,6 @@ def example(a, b = 100, *, c: float = 123):
sign = inspect.signature(func)
arg_spec = inspect.getfullargspec(func)
result = []
types = {
int: 'int',
float: 'float',
bool: 'bool',
str: 'str',
}
is_there_args = False
logger.info(f'Analyzing arguments in {func}.')

Expand All @@ -93,23 +165,27 @@ def example(a, b = 100, *, c: float = 123):
'required': True,
}
if param_obj.annotation is not inspect._empty:
if param_obj.annotation in types:
param_data['type'] = types[param_obj.annotation]
if param_obj.annotation in SUPPORTED_PRIMITIVE_TYPES:
param_data['type'] = SUPPORTED_PRIMITIVE_TYPES[param_obj.annotation]
else:
logger.warning(
f'For model predict argument writes not supported type {param_obj.annotation}. '
f'Skip added validation'
)
_param_attributes, _parse_error = parse_attributes_from_generic_type(param_obj)
param_data.update(**_param_attributes)

if _parse_error:
logger.warning(
f'For model predict argument writes not supported type {param_obj.annotation}. '
f'Skip added validation'
)

if param_obj.default is not inspect._empty:
param_data['required'] = False
param_data['default'] = param_obj.default
if 'type' not in param_data and type(param_obj.default) in types:
param_data['type'] = types[type(param_obj.default)]
if 'type' not in param_data and type(param_obj.default) in SUPPORTED_PRIMITIVE_TYPES:
param_data['type'] = SUPPORTED_PRIMITIVE_TYPES[type(param_obj.default)]

if param_name.lower().strip() == 'x' and auto_detect_predict_params:
logger.info('Found X param in model params. Set List type')
param_data['type'] = 'List'
param_data['collection_type'] = 'List'
param_data[IS_X] = True
_found_X = True

Expand All @@ -121,7 +197,7 @@ def example(a, b = 100, *, c: float = 123):
f'X argument in predict method not found. '
f'For predict data use first argument with name "{result[0]["name"]}".'
)
result[0]['type'] = 'List'
result[0]['collection_type'] = 'List'
result[0][IS_X] = True
else:
logger.info('Not found arguments in predict method.')
Expand Down
13 changes: 11 additions & 2 deletions mlup/web/api_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,19 @@ def make_columns_object_openapi_scheme(src_columns: List[Dict]) -> Tuple[Dict, L
cols_openapi_config = {}
required_columns = []
for col_config in src_columns:
col_name, col_type = col_config["name"], col_config.get("type", "str")
col_name = col_config["name"]
col_required, col_default = col_config.get("required", True), col_config.get("default", None)

_col_config = {"type": _openapi_types_map[col_type.lower()]}
_col_config = {}
col_type = col_config.get("type", None)
if "collection_type" in col_config:
if col_type is not None:
_col_config["items"] = {"type": _openapi_types_map[col_type.lower()]}
_col_config["type"] = _openapi_types_map[col_config["collection_type"].lower()]
col_type = "list"
else:
_col_config["type"] = _openapi_types_map[col_type].lower() or "string"

title = []
if col_default is not None:
title.append("Default")
Expand Down
6 changes: 6 additions & 0 deletions mlup/web/api_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ def make_map_pydantic_columns(
'str': str,
'list': list,
}
collection_types = {
'List': List,
}
__validators__ = {}
columns_pydantic_format = {}
# If set None, from ml.columns
Expand All @@ -102,6 +105,9 @@ def make_map_pydantic_columns(
f'Supported types {", ".join(column_types.keys())}.'
)
col_type = Any
# Process from "int" to "Generic[int]"
if col_config.get('collection_type', None) in collection_types:
col_type = collection_types[col_config['collection_type']][col_type]

# Required
field_required_default_value = Field(...)
Expand Down
102 changes: 82 additions & 20 deletions tests/unit_tests/ml/test_data_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,18 +93,19 @@ def test_transform_to_model_format_from_list(self):

def test_transform_to_model_format_from_dict_with_columns(self):
data = [
{'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
{'col1': 12, 'col2': 22, 'col3': 32, 'col4': 42, 'col5': 52},
{'col1': 13, 'col2': 23, 'col3': 33, 'col4': 43, 'col5': 53},
{'col1': 14, 'col2': 24, 'col3': 34, 'col4': 44, 'col5': 54},
{'col1': 15, 'col2': 25, 'col3': 35, 'col4': 45, 'col5': 55},
{'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51, 'col6': [61, 71]},
{'col1': 12, 'col2': 22, 'col3': 32, 'col4': 42, 'col5': 52, 'col6': [62, 72]},
{'col1': 13, 'col2': 23, 'col3': 33, 'col4': 43, 'col5': 53, 'col6': [63, 73]},
{'col1': 14, 'col2': 24, 'col3': 34, 'col4': 44, 'col5': 54, 'col6': [64, 74]},
{'col1': 15, 'col2': 25, 'col3': 35, 'col4': 45, 'col5': 55, 'col6': [65, 75]},
]
cols = [
{'name': 'col1', 'type': 'int'},
{'name': 'col2', 'type': 'int'},
{'name': 'col3', 'type': 'int'},
{'name': 'col4', 'type': 'int'},
{'name': 'col5', 'type': 'int'},
{'name': 'col6', 'type': 'int', 'collection_type': 'List'},
]
# Check create
pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
Expand Down Expand Up @@ -168,13 +169,14 @@ def test_transform_to_model_format_from_list(self):
{'name': 'col3', 'type': 'int'},
{'name': 'colstr', 'type': 'str'},
{'name': 'colbool', 'type': 'bool'},
{'name': 'collistfloat', 'type': 'float', 'collection_type': 'List'},
]
data = [
[11, 21, 31, 'colstr', True],
[12, 22, 32, 'colstr', True],
[13, 23, 33, 'colstr', False],
[14, 24, 34, 'colstr', True],
[15, 25, 35, 'colstr', True],
[11, 21, 31, 'colstr', True, [41.0, 51.0]],
[12, 22, 32, 'colstr', True, [42.0, 52.0]],
[13, 23, 33, 'colstr', False, [43.0, 53.0]],
[14, 24, 34, 'colstr', True, [44.0, 54.0]],
[15, 25, 35, 'colstr', True, [45.0, 55.0]],
]
df = self.transformer_class().transform_to_model_format(data, columns=cols)

Expand All @@ -187,11 +189,11 @@ def test_transform_to_model_format_from_list(self):

def test_transform_to_json_format(self):
data = [
{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True},
{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True},
{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False},
{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True},
{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True},
{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [41.0, 51.0]},
{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [42.0, 52.0]},
{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False, 'collistfloat': [43.0, 53.0]},
{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [44.0, 54.0]},
{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [45.0, 55.0]},
]
df = pd.DataFrame(data=data)
trans_data = self.transformer_class().transform_to_json_format(df)
Expand All @@ -200,11 +202,11 @@ def test_transform_to_json_format(self):

def test_transform_to_json_format_from_list(self):
data = [
[{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True}],
[{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True}],
[{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False}],
[{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True}],
[{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True}],
[{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [41.0, 51.0]}],
[{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [42.0, 52.0]}],
[{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False, 'collistfloat': [43.0, 53.0]}],
[{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [44.0, 54.0]}],
[{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [45.0, 55.0]}],
]
dfs = [pd.DataFrame(data=d) for d in data]
trans_data = self.transformer_class().transform_to_json_format(dfs)
Expand Down Expand Up @@ -253,6 +255,26 @@ def test_transform_to_model_format_from_list(self):
pred_d = self.transformer_class().transform_to_model_format(data)
assert np.array_equal(pred_d, np.array(data))

def test_transform_to_model_format_from_dict_with_list_columns(self):
data = [
{'col1': [11, 111], 'col2': [21, 221]},
{'col1': [12, 112], 'col2': [22, 222]},
{'col1': [13, 113], 'col2': [23, 223]},
{'col1': [14, 114], 'col2': [24, 224]},
{'col1': [15, 115], 'col2': [25, 225]},
]
cols = [
{'name': 'col1', 'type': 'int', 'collection_type': 'List'},
{'name': 'col2', 'type': 'int', 'collection_type': 'List'},
]
# Check create
pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
assert np.array_equal(pred_d, np.array([list(v.values()) for v in data]))

# Check order by columns
pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1])
assert np.array_equal(pred_d, np.array([list(v.values())[::-1] for v in data]))

def test_transform_to_model_format_from_dict_with_columns(self):
data = [
{'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
Expand Down Expand Up @@ -359,6 +381,26 @@ def test_transform_to_model_format_from_list(self):
pred_d = self.transformer_class().transform_to_model_format(data)
assert_tf_tensors(pred_d, tensorflow.convert_to_tensor(data))

def test_transform_to_model_format_from_dict_with_list_columns(self):
data = [
{'col1': [11, 111], 'col2': [21, 221]},
{'col1': [12, 112], 'col2': [22, 222]},
{'col1': [13, 113], 'col2': [23, 223]},
{'col1': [14, 114], 'col2': [24, 224]},
{'col1': [15, 115], 'col2': [25, 225]},
]
cols = [
{'name': 'col1', 'type': 'int', 'collection_type': 'List'},
{'name': 'col2', 'type': 'int', 'collection_type': 'List'},
]
# Check create
pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
assert_tf_tensors(pred_d, tensorflow.convert_to_tensor([list(v.values()) for v in data]))

# Check order by columns
pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1])
assert_tf_tensors(pred_d, tensorflow.convert_to_tensor([list(v.values())[::-1] for v in data]))

def test_transform_to_model_format_from_dict_with_columns(self):
data = [
{'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
Expand Down Expand Up @@ -467,6 +509,26 @@ def test_transform_to_model_format_from_list(self):
pred_d = self.transformer_class().transform_to_model_format(data)
assert is_equal_torch_tensors(pred_d, torch.tensor(data))

def test_transform_to_model_format_from_dict_with_list_columns(self):
data = [
{'col1': [11, 111], 'col2': [21, 221]},
{'col1': [12, 112], 'col2': [22, 222]},
{'col1': [13, 113], 'col2': [23, 223]},
{'col1': [14, 114], 'col2': [24, 224]},
{'col1': [15, 115], 'col2': [25, 225]},
]
cols = [
{'name': 'col1', 'type': 'int', 'collection_type': 'List'},
{'name': 'col2', 'type': 'int', 'collection_type': 'List'},
]
# Check create
pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
assert is_equal_torch_tensors(pred_d, torch.tensor([list(v.values()) for v in data]))

# Check order by columns
pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1])
assert is_equal_torch_tensors(pred_d, torch.tensor([list(v.values())[::-1] for v in data]))

def test_transform_to_model_format_from_dict_with_columns(self):
data = [
{'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
Expand Down
Loading
Loading