forked from feast-dev/feast
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprofiler.py
More file actions
88 lines (69 loc) · 1.98 KB
/
Copy pathprofiler.py
File metadata and controls
88 lines (69 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import abc
from typing import Any, List, Optional
import pandas as pd
class Profile:
@abc.abstractmethod
def validate(self, dataset: pd.DataFrame) -> "ValidationReport":
"""
Run set of rules / expectations from current profile against given dataset.
Return ValidationReport
"""
...
@abc.abstractmethod
def to_proto(self):
...
@classmethod
@abc.abstractmethod
def from_proto(cls, proto) -> "Profile":
...
class Profiler:
@abc.abstractmethod
def analyze_dataset(self, dataset: pd.DataFrame) -> Profile:
"""
Generate Profile object with dataset's characteristics (with rules / expectations)
from given dataset (as pandas dataframe).
"""
...
@abc.abstractmethod
def to_proto(self):
...
@classmethod
@abc.abstractmethod
def from_proto(cls, proto) -> "Profiler":
...
class ValidationReport:
@property
@abc.abstractmethod
def is_success(self) -> bool:
"""
Return whether validation was successful
"""
...
@property
@abc.abstractmethod
def errors(self) -> List["ValidationError"]:
"""
Return list of ValidationErrors if validation failed (is_success = false)
"""
...
class ValidationError:
check_name: str
column_name: str
check_config: Optional[Any]
missing_count: Optional[int]
missing_percent: Optional[float]
def __init__(
self,
check_name: str,
column_name: str,
check_config: Optional[Any] = None,
missing_count: Optional[int] = None,
missing_percent: Optional[float] = None,
):
self.check_name = check_name
self.column_name = column_name
self.check_config = check_config
self.missing_count = missing_count
self.missing_percent = missing_percent
def __repr__(self):
return f"<ValidationError {self.check_name}:{self.column_name}>"