Skip to content

Commit 5bbcec2

Browse files
committed
✨ add inference options
1 parent 13834be commit 5bbcec2

File tree

7 files changed

+88
-26
lines changed

7 files changed

+88
-26
lines changed

mindee/input/inference_parameters.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,16 @@ class InferenceParameters:
1111
model_id: str
1212
"""ID of the model, required."""
1313
rag: bool = False
14-
"""If set to `True`, will enable Retrieval-Augmented Generation."""
14+
"""Use Retrieval-Augmented Generation during inference."""
15+
raw_text: bool = False
16+
"""Extract the entire text from the document as strings, and fill the ``raw_text`` attribute."""
17+
polygon: bool = False
18+
"""Calculate bounding box polygons for values, and fill the ``locations`` attribute of fields"""
19+
confidence: bool = False
20+
"""
21+
Calculate confidence scores for values, and fill the ``confidence`` attribute of fields.
22+
Useful for automation.
23+
"""
1524
alias: Optional[str] = None
1625
"""Use an alias to link the file to your own DB. If empty, no alias will be used."""
1726
webhook_ids: Optional[List[str]] = None

mindee/mindee_http/mindee_api_v2.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,12 @@ def req_post_inference_enqueue(
8484

8585
if params.rag:
8686
data["rag"] = "true"
87+
if params.raw_text:
88+
data["raw_text"] = "true"
89+
if params.confidence:
90+
data["confidence"] = "true"
91+
if params.polygon:
92+
data["polygon"] = "true"
8793
if params.webhook_ids and len(params.webhook_ids) > 0:
8894
data["webhook_ids"] = ",".join(params.webhook_ids)
8995
if params.alias and len(params.alias):

mindee/parsing/v2/inference.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
from typing import Optional
2-
31
from mindee.parsing.common.string_dict import StringDict
2+
from mindee.parsing.v2.inference_active_options import ActiveOptions
43
from mindee.parsing.v2.inference_file import InferenceFile
54
from mindee.parsing.v2.inference_model import InferenceModel
65
from mindee.parsing.v2.inference_result import InferenceResult
@@ -9,25 +8,29 @@
98
class Inference:
109
"""Inference object for a V2 API return."""
1110

11+
id: str
12+
"""ID of the inference."""
1213
model: InferenceModel
1314
"""Model info for the inference."""
1415
file: InferenceFile
1516
"""File info for the inference."""
1617
result: InferenceResult
1718
"""Result of the inference."""
18-
id: Optional[str]
19-
"""ID of the inference."""
19+
active_options: ActiveOptions
20+
"""Active options for the inference."""
2021

2122
def __init__(self, raw_response: StringDict):
23+
self.id = raw_response["id"]
2224
self.model = InferenceModel(raw_response["model"])
2325
self.file = InferenceFile(raw_response["file"])
2426
self.result = InferenceResult(raw_response["result"])
25-
self.id = raw_response["id"] if "id" in raw_response else None
27+
self.active_options = ActiveOptions(raw_response["active_options"])
2628

2729
def __str__(self) -> str:
2830
return (
29-
f"Inference\n#########\n"
30-
f"{self.model}\n\n"
31-
f"{self.file}"
32-
f"{self.result}\n"
31+
f"Inference\n#########"
32+
f"\n{self.model}"
33+
f"\n\n{self.file}"
34+
f"\n\n{self.active_options}"
35+
f"\n\n{self.result}\n"
3336
)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from mindee.parsing.common.string_dict import StringDict
2+
3+
4+
class ActiveOptions:
5+
"""Active options for the inference."""
6+
7+
raw_text: bool
8+
polygon: bool
9+
confidence: bool
10+
rag: bool
11+
12+
def __init__(self, raw_response: StringDict):
13+
self.raw_text = raw_response["raw_text"]
14+
self.polygon = raw_response["polygon"]
15+
self.confidence = raw_response["confidence"]
16+
self.rag = raw_response["rag"]
17+
18+
def __str__(self) -> str:
19+
return (
20+
f"Active Options\n=============="
21+
f"\n:Raw Text: {self.raw_text}"
22+
f"\n:Polygon: {self.polygon}"
23+
f"\n:Confidence: {self.confidence}"
24+
f"\n:RAG: {self.rag}"
25+
)

mindee/parsing/v2/inference_result.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,5 @@ def __init__(self, raw_response: StringDict) -> None:
1919
self.raw_text = RawText(raw_response["raw_text"])
2020

2121
def __str__(self) -> str:
22-
out_str = f"\n\nFields\n======{self.fields}"
22+
out_str = f"Fields\n======{self.fields}"
2323
return out_str

mindee/parsing/v2/raw_text.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,6 @@ class RawText:
1212

1313
def __init__(self, raw_response: StringDict):
1414
self.pages = [RawTextPage(page) for page in raw_response.get("pages", [])]
15+
16+
def __str__(self) -> str:
17+
return "\n\n".join([page.content for page in self.pages])

tests/test_client_v2_integration.py

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,18 @@ def test_parse_file_empty_multiple_pages_must_succeed(
3737
file & model metadata.
3838
"""
3939
input_path: Path = FILE_TYPES_DIR / "pdf" / "multipage_cut-2.pdf"
40-
assert input_path.exists(), f"sample file missing: {input_path}"
4140

42-
input_doc = PathInput(input_path)
43-
options = InferenceParameters(findoc_model_id)
41+
input_source = PathInput(input_path)
42+
params = InferenceParameters(
43+
model_id=findoc_model_id,
44+
rag=False,
45+
raw_text=False,
46+
polygon=False,
47+
confidence=False,
48+
)
4449

4550
response: InferenceResponse = v2_client.enqueue_and_get_inference(
46-
input_doc, options
51+
input_source, params
4752
)
4853

4954
assert response is not None
@@ -65,20 +70,26 @@ def test_parse_file_filled_single_page_must_succeed(
6570
Upload a filled single-page JPEG and verify that common fields are present.
6671
"""
6772
input_path: Path = PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg"
68-
assert input_path.exists(), f"sample file missing: {input_path}"
6973

70-
input_doc = PathInput(input_path)
71-
options = InferenceParameters(findoc_model_id)
74+
input_source = PathInput(input_path)
75+
params = InferenceParameters(
76+
model_id=findoc_model_id,
77+
rag=False,
78+
raw_text=False,
79+
polygon=False,
80+
confidence=False,
81+
)
7282

7383
response: InferenceResponse = v2_client.enqueue_and_get_inference(
74-
input_doc, options
84+
input_source, params
7585
)
7686

7787
assert response is not None
7888
assert response.inference is not None
7989

8090
assert response.inference.file is not None
8191
assert response.inference.file.name == "default_sample.jpg"
92+
assert response.inference.file.page_count == 1
8293

8394
assert response.inference.model is not None
8495
assert response.inference.model.id == findoc_model_id
@@ -96,13 +107,12 @@ def test_invalid_uuid_must_throw_error_422(v2_client: ClientV2) -> None:
96107
Using an invalid model identifier must trigger a 422 HTTP error.
97108
"""
98109
input_path: Path = FILE_TYPES_DIR / "pdf" / "multipage_cut-2.pdf"
99-
assert input_path.exists()
100110

101-
input_doc = PathInput(input_path)
102-
options = InferenceParameters("INVALID MODEL ID")
111+
input_source = PathInput(input_path)
112+
params = InferenceParameters(model_id="INVALID MODEL ID")
103113

104114
with pytest.raises(MindeeHTTPErrorV2) as exc_info:
105-
v2_client.enqueue_inference(input_doc, options)
115+
v2_client.enqueue_inference(input_source, params)
106116

107117
exc: MindeeHTTPErrorV2 = exc_info.value
108118
assert exc.status == 422
@@ -119,10 +129,16 @@ def test_url_input_source_must_not_raise_errors(
119129
"""
120130
url = os.getenv("MINDEE_V2_SE_TESTS_BLANK_PDF_URL")
121131

122-
input_doc = UrlInputSource(url)
123-
options = InferenceParameters(findoc_model_id)
132+
input_source = UrlInputSource(url)
133+
params = InferenceParameters(
134+
model_id=findoc_model_id,
135+
rag=False,
136+
raw_text=False,
137+
polygon=False,
138+
confidence=False,
139+
)
124140
response: InferenceResponse = v2_client.enqueue_and_get_inference(
125-
input_doc, options
141+
input_source, params
126142
)
127143
assert response is not None
128144
assert response.inference is not None

0 commit comments

Comments
 (0)