From 8861e36ee4f43b16cafd6a509cb7f7facecc3116 Mon Sep 17 00:00:00 2001 From: Mike Soennichsen Date: Wed, 29 Apr 2026 13:13:33 -0700 Subject: [PATCH 1/2] feat: add save_to to classify and section methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the smart save_to wire-up (PR #85) for the new classify and section endpoints. Both sync and async variants accept save_to: str | Path | None, with the same directory + full-.json-path behavior already shipped for parse, extract, and split. Reuses the existing _get_input_filename and _save_response helpers — no new utility code. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/landingai_ade/_client.py | 64 +++++++++++++++++++++-- tests/test_save_to.py | 98 ++++++++++++++++++++++++++++++++++-- 2 files changed, 155 insertions(+), 7 deletions(-) diff --git a/src/landingai_ade/_client.py b/src/landingai_ade/_client.py index cb8c846..8ec1269 100644 --- a/src/landingai_ade/_client.py +++ b/src/landingai_ade/_client.py @@ -324,6 +324,7 @@ def classify( document: Optional[FileTypes] | Omit = omit, document_url: Optional[str] | Omit = omit, model: Optional[str] | Omit = omit, + save_to: str | Path | None = None, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -356,6 +357,11 @@ def classify( model: Classification model version. Defaults to the latest. + save_to: Optional output path. If a directory, auto-generates the filename + (e.g. {input_file}_classify_output.json, or classify_output.json when no + input filename is available). If a full path ending in .json, saves there + directly. Parent directories are created automatically. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -364,6 +370,10 @@ def classify( timeout: Override the client-level default timeout for this request, in seconds """ + # Store original inputs for filename extraction + original_document = document + original_document_url = document_url + body = deepcopy_with_paths( { "classes": classes, @@ -378,7 +388,7 @@ def classify( # sent to the server will contain a `boundary` parameter, e.g. # multipart/form-data; boundary=---abc-- extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} - return self.post( + result = self.post( "/v1/ade/classify", body=maybe_transform(body, client_classify_params.ClientClassifyParams), files=files, @@ -387,6 +397,10 @@ def classify( ), cast_to=ClassifyResponse, ) + if save_to: + filename = _get_input_filename(original_document, original_document_url) + _save_response(save_to, filename, "classify", result) + return result def extract( self, @@ -667,6 +681,7 @@ def section( markdown: Union[FileTypes, str, None] | Omit = omit, markdown_url: Optional[str] | Omit = omit, model: Optional[str] | Omit = omit, + save_to: str | Path | None = None, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -696,6 +711,11 @@ def section( model: Section model version. Defaults to latest. + save_to: Optional output path. If a directory, auto-generates the filename + (e.g. {input_file}_section_output.json, or section_output.json when no + input filename is available). If a full path ending in .json, saves there + directly. Parent directories are created automatically. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -704,6 +724,10 @@ def section( timeout: Override the client-level default timeout for this request, in seconds """ + # Store original inputs for filename extraction + original_markdown = markdown + original_markdown_url = markdown_url + body = deepcopy_with_paths( { "guidelines": guidelines, @@ -718,7 +742,7 @@ def section( # sent to the server will contain a `boundary` parameter, e.g. # multipart/form-data; boundary=---abc-- extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} - return self.post( + result = self.post( "/v1/ade/section", body=maybe_transform(body, client_section_params.ClientSectionParams), files=files, @@ -727,6 +751,10 @@ def section( ), cast_to=SectionResponse, ) + if save_to: + filename = _get_input_filename(original_markdown, original_markdown_url) + _save_response(save_to, filename, "section", result) + return result def split( self, @@ -1014,6 +1042,7 @@ async def classify( document: Optional[FileTypes] | Omit = omit, document_url: Optional[str] | Omit = omit, model: Optional[str] | Omit = omit, + save_to: str | Path | None = None, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -1046,6 +1075,11 @@ async def classify( model: Classification model version. Defaults to the latest. + save_to: Optional output path. If a directory, auto-generates the filename + (e.g. {input_file}_classify_output.json, or classify_output.json when no + input filename is available). If a full path ending in .json, saves there + directly. Parent directories are created automatically. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -1054,6 +1088,10 @@ async def classify( timeout: Override the client-level default timeout for this request, in seconds """ + # Store original inputs for filename extraction + original_document = document + original_document_url = document_url + body = deepcopy_with_paths( { "classes": classes, @@ -1068,7 +1106,7 @@ async def classify( # sent to the server will contain a `boundary` parameter, e.g. # multipart/form-data; boundary=---abc-- extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} - return await self.post( + result = await self.post( "/v1/ade/classify", body=await async_maybe_transform(body, client_classify_params.ClientClassifyParams), files=files, @@ -1077,6 +1115,10 @@ async def classify( ), cast_to=ClassifyResponse, ) + if save_to: + filename = _get_input_filename(original_document, original_document_url) + _save_response(save_to, filename, "classify", result) + return result async def extract( self, @@ -1358,6 +1400,7 @@ async def section( markdown: Union[FileTypes, str, None] | Omit = omit, markdown_url: Optional[str] | Omit = omit, model: Optional[str] | Omit = omit, + save_to: str | Path | None = None, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -1387,6 +1430,11 @@ async def section( model: Section model version. Defaults to latest. + save_to: Optional output path. If a directory, auto-generates the filename + (e.g. {input_file}_section_output.json, or section_output.json when no + input filename is available). If a full path ending in .json, saves there + directly. Parent directories are created automatically. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -1395,6 +1443,10 @@ async def section( timeout: Override the client-level default timeout for this request, in seconds """ + # Store original inputs for filename extraction + original_markdown = markdown + original_markdown_url = markdown_url + body = deepcopy_with_paths( { "guidelines": guidelines, @@ -1409,7 +1461,7 @@ async def section( # sent to the server will contain a `boundary` parameter, e.g. # multipart/form-data; boundary=---abc-- extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} - return await self.post( + result = await self.post( "/v1/ade/section", body=await async_maybe_transform(body, client_section_params.ClientSectionParams), files=files, @@ -1418,6 +1470,10 @@ async def section( ), cast_to=SectionResponse, ) + if save_to: + filename = _get_input_filename(original_markdown, original_markdown_url) + _save_response(save_to, filename, "section", result) + return result async def split( self, diff --git a/tests/test_save_to.py b/tests/test_save_to.py index 37517e4..2546775 100644 --- a/tests/test_save_to.py +++ b/tests/test_save_to.py @@ -8,7 +8,7 @@ import pytest -from landingai_ade import AsyncLandingAIADE +from landingai_ade import AsyncLandingAIADE, LandingAIADE from landingai_ade._client import _save_response, _get_input_filename from landingai_ade._exceptions import LandingAiadeError @@ -128,7 +128,7 @@ def test_correct_filename_format(self, tmp_path: Path) -> None: mock_result = MagicMock() mock_result.to_json.return_value = "{}" - for method in ["parse", "extract", "split"]: + for method in ["parse", "extract", "split", "classify", "section"]: _save_response(tmp_path, "myinput", method, mock_result) expected_file = tmp_path / f"myinput_{method}_output.json" assert expected_file.exists(), f"Expected {expected_file} to exist" @@ -165,7 +165,7 @@ def test_output_filename_skips_redundant_prefix(self, tmp_path: Path) -> None: mock_result = MagicMock() mock_result.to_json.return_value = "{}" - for method in ["parse", "extract", "split"]: + for method in ["parse", "extract", "split", "classify", "section"]: _save_response(tmp_path, "output", method, mock_result) expected = tmp_path / f"{method}_output.json" assert expected.exists(), f"Expected {expected} to exist" @@ -266,6 +266,62 @@ async def test_async_split_save_to(self, tmp_path: Path, mock_response: MagicMoc assert (tmp_path / "doc_split_output.json").exists() + @pytest.mark.asyncio + async def test_async_classify_save_to_directory(self, tmp_path: Path, mock_response: MagicMock) -> None: + from unittest.mock import AsyncMock, patch + + async with AsyncLandingAIADE(apikey="test-key", base_url="http://localhost") as client: + with patch.object(client, "post", new_callable=AsyncMock, return_value=mock_response): + await client.classify( + classes=[{"class": "invoice"}], + document=Path("/path/to/doc.pdf"), + save_to=tmp_path, + ) + + assert (tmp_path / "doc_classify_output.json").exists() + + @pytest.mark.asyncio + async def test_async_classify_save_to_json_path(self, tmp_path: Path, mock_response: MagicMock) -> None: + from unittest.mock import AsyncMock, patch + + output_file = tmp_path / "custom.json" + async with AsyncLandingAIADE(apikey="test-key", base_url="http://localhost") as client: + with patch.object(client, "post", new_callable=AsyncMock, return_value=mock_response): + await client.classify( + classes=[{"class": "invoice"}], + document_url="https://example.com/doc.pdf", + save_to=output_file, + ) + + assert output_file.exists() + + @pytest.mark.asyncio + async def test_async_section_save_to_directory(self, tmp_path: Path, mock_response: MagicMock) -> None: + from unittest.mock import AsyncMock, patch + + async with AsyncLandingAIADE(apikey="test-key", base_url="http://localhost") as client: + with patch.object(client, "post", new_callable=AsyncMock, return_value=mock_response): + await client.section( + markdown=Path("/path/to/doc.md"), + save_to=tmp_path, + ) + + assert (tmp_path / "doc_section_output.json").exists() + + @pytest.mark.asyncio + async def test_async_section_save_to_with_string_input(self, tmp_path: Path, mock_response: MagicMock) -> None: + """Section commonly receives raw markdown strings — filename should fall back to 'output'.""" + from unittest.mock import AsyncMock, patch + + async with AsyncLandingAIADE(apikey="test-key", base_url="http://localhost") as client: + with patch.object(client, "post", new_callable=AsyncMock, return_value=mock_response): + await client.section( + markdown="# Heading\n\nbody content", + save_to=tmp_path, + ) + + assert (tmp_path / "section_output.json").exists() + @pytest.mark.asyncio async def test_async_no_save_when_save_to_none(self, tmp_path: Path, mock_response: MagicMock) -> None: from unittest.mock import AsyncMock, patch @@ -276,3 +332,39 @@ async def test_async_no_save_when_save_to_none(self, tmp_path: Path, mock_respon assert result is mock_response assert not list(tmp_path.iterdir()) + + +class TestSyncSaveTo: + """Tests that sync client methods accept save_to and save correctly for classify/section.""" + + @pytest.fixture + def mock_response(self) -> MagicMock: + mock = MagicMock() + mock.to_json.return_value = '{"result": "ok"}' + return mock + + def test_sync_classify_save_to_directory(self, tmp_path: Path, mock_response: MagicMock) -> None: + from unittest.mock import patch + + client = LandingAIADE(apikey="test-key", base_url="http://localhost") + with patch.object(client, "post", return_value=mock_response): + client.classify( + classes=[{"class": "invoice"}], + document=Path("/path/to/doc.pdf"), + save_to=tmp_path, + ) + + assert (tmp_path / "doc_classify_output.json").exists() + + def test_sync_section_save_to_json_path(self, tmp_path: Path, mock_response: MagicMock) -> None: + from unittest.mock import patch + + output_file = tmp_path / "toc.json" + client = LandingAIADE(apikey="test-key", base_url="http://localhost") + with patch.object(client, "post", return_value=mock_response): + client.section( + markdown=Path("/path/to/doc.md"), + save_to=output_file, + ) + + assert output_file.exists() From 4a799ed6ce4d1a87fb3b4ef1e7b275eb39fc9f99 Mon Sep 17 00:00:00 2001 From: Mike Soennichsen Date: Wed, 29 Apr 2026 13:47:53 -0700 Subject: [PATCH 2/2] chore: fix import ordering in test_save_to.py Ruff was complaining about an unsorted import block in tests/test_save_to.py. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_save_to.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_save_to.py b/tests/test_save_to.py index 2546775..cda93b5 100644 --- a/tests/test_save_to.py +++ b/tests/test_save_to.py @@ -8,7 +8,7 @@ import pytest -from landingai_ade import AsyncLandingAIADE, LandingAIADE +from landingai_ade import LandingAIADE, AsyncLandingAIADE from landingai_ade._client import _save_response, _get_input_filename from landingai_ade._exceptions import LandingAiadeError