-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Expand file tree
/
Copy pathprofiler.py
More file actions
105 lines (85 loc) · 2.77 KB
/
profiler.py
File metadata and controls
105 lines (85 loc) · 2.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import abc
from typing import Any, List, Optional
import pandas as pd
class Profile:
@abc.abstractmethod
def validate(self, dataset: pd.DataFrame) -> "ValidationReport":
"""
Run set of rules / expectations from current profile against given dataset.
Return ValidationReport
"""
...
@abc.abstractmethod
def to_proto(self): ...
@classmethod
@abc.abstractmethod
def from_proto(cls, proto) -> "Profile": ...
class Profiler:
@abc.abstractmethod
def analyze_dataset(self, dataset: pd.DataFrame) -> Profile:
"""
Generate Profile object with dataset's characteristics (with rules / expectations)
from given dataset (as pandas dataframe).
"""
...
@abc.abstractmethod
def to_proto(self): ...
@classmethod
@abc.abstractmethod
def from_proto(cls, proto) -> "Profiler": ...
class ValidationReport:
@property
@abc.abstractmethod
def is_success(self) -> bool:
"""
Return whether validation was successful
"""
...
@property
@abc.abstractmethod
def errors(self) -> List["ValidationError"]:
"""
Return list of ValidationErrors if validation failed (is_success = false)
"""
...
class ValidationError:
check_name: str
column_name: str
check_config: Optional[Any]
missing_count: Optional[int]
missing_percent: Optional[float]
observed_value: Optional[float]
unexpected_count: Optional[int]
unexpected_percent: Optional[float]
def __init__(
self,
check_name: str,
column_name: str,
check_config: Optional[Any] = None,
missing_count: Optional[int] = None,
missing_percent: Optional[float] = None,
observed_value: Optional[float] = None,
unexpected_count: Optional[int] = None,
unexpected_percent: Optional[float] = None,
):
self.check_name = check_name
self.column_name = column_name
self.check_config = check_config
self.missing_count = missing_count
self.missing_percent = missing_percent
self.observed_value = observed_value
self.unexpected_count = unexpected_count
self.unexpected_percent = unexpected_percent
def __repr__(self):
return f"<ValidationError {self.check_name}:{self.column_name}>"
def to_dict(self):
return dict(
check_name=self.check_name,
column_name=self.column_name,
check_config=self.check_config,
missing_count=self.missing_count,
missing_percent=self.missing_percent,
observed_value=self.observed_value,
unexpected_count=self.unexpected_count,
unexpected_percent=self.unexpected_percent,
)