From facb79b404138eb194b67d5fa111c13e62cf4863 Mon Sep 17 00:00:00 2001 From: mdevolde Date: Sat, 27 Jun 2026 12:39:17 +0200 Subject: [PATCH 1/2] test: split tests in categs, add bench tests --- pyproject.toml | 25 +++- pytest.ini | 5 + tests/benchmarks/__init__.py | 1 + tests/benchmarks/conftest.py | 17 +++ tests/benchmarks/test_bench_check.py | 81 ++++++++++++ tests/integration/__init__.py | 1 + tests/integration/conftest.py | 17 +++ tests/{ => integration}/test_api_public.py | 8 +- tests/{ => integration}/test_cli.py | 127 +++++-------------- tests/{ => integration}/test_config.py | 35 +---- tests/integration/test_download.py | 94 ++++++++++++++ tests/{ => integration}/test_match.py | 10 +- tests/{ => integration}/test_server_local.py | 2 +- tests/property/__init__.py | 1 + tests/property/conftest.py | 17 +++ tests/property/test_prop_config.py | 85 +++++++++++++ tests/property/test_prop_safe_zip.py | 74 +++++++++++ tests/property/test_prop_utils.py | 20 +++ tests/unit/__init__.py | 1 + tests/unit/conftest.py | 17 +++ tests/unit/test_cli_args.py | 60 +++++++++ tests/unit/test_config_validation.py | 37 ++++++ tests/{ => unit}/test_download.py | 101 +-------------- tests/{ => unit}/test_safe_zip.py | 2 +- uv.lock | 48 +++++++ 25 files changed, 650 insertions(+), 236 deletions(-) create mode 100644 tests/benchmarks/__init__.py create mode 100644 tests/benchmarks/conftest.py create mode 100644 tests/benchmarks/test_bench_check.py create mode 100644 tests/integration/__init__.py create mode 100644 tests/integration/conftest.py rename tests/{ => integration}/test_api_public.py (93%) rename tests/{ => integration}/test_cli.py (68%) rename tests/{ => integration}/test_config.py (81%) create mode 100644 tests/integration/test_download.py rename tests/{ => integration}/test_match.py (95%) rename tests/{ => integration}/test_server_local.py (98%) create mode 100644 tests/property/__init__.py create mode 100644 tests/property/conftest.py create mode 100644 tests/property/test_prop_config.py create mode 100644 tests/property/test_prop_safe_zip.py create mode 100644 tests/property/test_prop_utils.py create mode 100644 tests/unit/__init__.py create mode 100644 tests/unit/conftest.py create mode 100644 tests/unit/test_cli_args.py create mode 100644 tests/unit/test_config_validation.py rename tests/{ => unit}/test_download.py (82%) rename tests/{ => unit}/test_safe_zip.py (99%) diff --git a/pyproject.toml b/pyproject.toml index 47957ec..182c0cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,9 @@ changelog = "https://github.com/jxmorris12/language_tool_python/blob/master/CHAN [dependency-groups] tests = [ "pytest", + "pytest-benchmark", "pytest-cov", + "hypothesis", ] docs = [ @@ -141,9 +143,10 @@ ignore = [ ] [tool.ruff.lint.per-file-ignores] -"tests/*.py" = [ - "S101", # Need to use assert statements in tests - "SLF001" # Need to use private members of the library for testing +"tests/**/*.py" = [ + "S101", # Need to use assert statements in tests + "SLF001", # Need to use private members of the library for testing + "RUF001", # LanguageTool output contains typographic quotes (‘’“”) ] "src/language_tool_python/__main__.py" = ["T201"] # Allow usage of print in the CLI entry point @@ -170,3 +173,19 @@ warn_return_any = true warn_unreachable = true warn_unused_configs = true warn_unused_ignores = true + +[[tool.mypy.overrides]] +module = ["tests.benchmarks.*"] +# pytest-benchmark is untyped; relax Any restrictions for benchmark files only +disallow_any_unimported = false +disallow_any_expr = false +disallow_any_explicit = false +disallow_any_decorated = false + +[[tool.mypy.overrides]] +module = ["tests.property.*"] +# hypothesis is untyped; relax Any restrictions for property test files only +disallow_any_unimported = false +disallow_any_expr = false +disallow_any_explicit = false +disallow_any_decorated = false diff --git a/pytest.ini b/pytest.ini index 8170a2f..2da9216 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,6 +1,11 @@ [pytest] addopts = -vra --cov=src --cov-report=html --cov-report=xml testpaths = tests +markers = + unit: fast, isolated tests with no external dependencies + integration: tests that require a live LanguageTool server or network + property: property-based tests using Hypothesis + perf: performance benchmark tests using pytest-benchmark [coverage:run] source = src diff --git a/tests/benchmarks/__init__.py b/tests/benchmarks/__init__.py new file mode 100644 index 0000000..1dff63c --- /dev/null +++ b/tests/benchmarks/__init__.py @@ -0,0 +1 @@ +"""Benchmark tests for the language_tool_python library.""" diff --git a/tests/benchmarks/conftest.py b/tests/benchmarks/conftest.py new file mode 100644 index 0000000..6cebb40 --- /dev/null +++ b/tests/benchmarks/conftest.py @@ -0,0 +1,17 @@ +"""Configuration for the benchmark test suite.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + + +def pytest_collection_modifyitems( + items: list[pytest.Item], +) -> None: + """Apply the 'perf' marker to all tests collected from this directory.""" + benchmarks_dir = Path(__file__).parent + for item in items: + if item.path.is_relative_to(benchmarks_dir): + item.add_marker(pytest.mark.perf) diff --git a/tests/benchmarks/test_bench_check.py b/tests/benchmarks/test_bench_check.py new file mode 100644 index 0000000..2cca935 --- /dev/null +++ b/tests/benchmarks/test_bench_check.py @@ -0,0 +1,81 @@ +"""Benchmark tests for LanguageTool grammar checking performance. + +Run with: pytest tests/benchmarks/ -v +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +import language_tool_python + +if TYPE_CHECKING: + from collections.abc import Generator + + from pytest_benchmark.fixture import BenchmarkFixture + +_SHORT_TEXT = "This is a sentence with some erors in it. " +_MEDIUM_TEXT = (_SHORT_TEXT * 20).strip() +_LONG_TEXT = (_SHORT_TEXT * 100).strip() + + +@pytest.fixture(scope="module") +def tool() -> Generator[language_tool_python.LanguageTool, None, None]: + """Provide a LanguageTool instance shared across benchmarks in this module.""" + with language_tool_python.LanguageTool("en-US") as t: + yield t + + +@pytest.fixture(scope="module") +def cached_tool() -> Generator[language_tool_python.LanguageTool, None, None]: + """Provide a pipeline-caching LanguageTool instance for cache benchmarks.""" + with language_tool_python.LanguageTool( + "en-US", + config={"cacheSize": 1000, "pipelineCaching": True}, + ) as t: + yield t + + +def test_bench_check_short_text( + benchmark: BenchmarkFixture, + tool: language_tool_python.LanguageTool, +) -> None: + """Benchmark grammar checking on a short sentence (~38 characters).""" + benchmark(tool.check, _SHORT_TEXT) + + +def test_bench_check_medium_text( + benchmark: BenchmarkFixture, + tool: language_tool_python.LanguageTool, +) -> None: + """Benchmark grammar checking on medium-length text (~840 characters).""" + benchmark(tool.check, _MEDIUM_TEXT) + + +def test_bench_check_long_text( + benchmark: BenchmarkFixture, + tool: language_tool_python.LanguageTool, +) -> None: + """Benchmark grammar checking on long text (~4200 characters).""" + benchmark(tool.check, _LONG_TEXT) + + +def test_bench_correct_short_text( + benchmark: BenchmarkFixture, + tool: language_tool_python.LanguageTool, +) -> None: + """Benchmark automatic text correction on a short sentence.""" + benchmark(tool.correct, _SHORT_TEXT) + + +def test_bench_check_with_pipeline_cache( + benchmark: BenchmarkFixture, + cached_tool: language_tool_python.LanguageTool, +) -> None: + """Benchmark grammar checking with pipeline caching enabled. + + Compare with test_bench_check_short_text to measure cache speedup. + """ + benchmark(cached_tool.check, _SHORT_TEXT) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..7ac8aa9 --- /dev/null +++ b/tests/integration/__init__.py @@ -0,0 +1 @@ +"""Integration tests for the language_tool_python library.""" diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py new file mode 100644 index 0000000..3628ef0 --- /dev/null +++ b/tests/integration/conftest.py @@ -0,0 +1,17 @@ +"""Configuration for the integration test suite.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + + +def pytest_collection_modifyitems( + items: list[pytest.Item], +) -> None: + """Apply the 'integration' marker to all tests collected from this directory.""" + integration_dir = Path(__file__).parent + for item in items: + if item.path.is_relative_to(integration_dir): + item.add_marker(pytest.mark.integration) diff --git a/tests/test_api_public.py b/tests/integration/test_api_public.py similarity index 93% rename from tests/test_api_public.py rename to tests/integration/test_api_public.py index 1788741..307c5d6 100644 --- a/tests/test_api_public.py +++ b/tests/integration/test_api_public.py @@ -1,4 +1,4 @@ -"""Tests for the public API functionality.""" +"""Integration tests for the public API functionality.""" import pytest @@ -34,7 +34,7 @@ def test_remote_es() -> None: 'INCORRECT_EXPRESSIONS', 'rule_issue_type': 'grammar', 'sentence': 'LanguageTool le ayudará a afrentar algunas dificultades propias de la escritura.'}), Match({'rule_id': 'PRON_HABER_PARTICIPIO', - 'message': 'El v. \u2018haber\u2019 se escribe con hache.', + 'message': 'El v. ‘haber’ se escribe con hache.', 'replacements': ['ha'], 'offset_in_context': 43, 'context': '...ificultades propias de la escritura. Se a hecho un esfuerzo para detectar errores...', 'offset': 107, 'error_length': 1, 'category': @@ -50,8 +50,8 @@ def test_remote_es() -> None: 'misspelling', 'sentence': 'Se a hecho un esfuerzo para detectar errores tipográficos, ortograficos y incluso gramaticales.'}), Match({'rule_id': 'Y_E_O_U', 'message': 'Cuando precede a palabras - que comienzan por \u2018i\u2019, la conjunción \u2018y\u2019 se - transforma en \u2018e\u2019.', 'replacements': ['e'], + que comienzan por ‘i’, la conjunción ‘y’ se + transforma en ‘e’.', 'replacements': ['e'], 'offset_in_context': 43, 'context': '...ctar errores tipográficos, ortograficos y incluso gramaticales. También algunos e...', 'offset': 176, 'error_length': 1, 'category': 'GRAMMAR', 'rule_issue_type': diff --git a/tests/test_cli.py b/tests/integration/test_cli.py similarity index 68% rename from tests/test_cli.py rename to tests/integration/test_cli.py index 18da908..d101f66 100644 --- a/tests/test_cli.py +++ b/tests/integration/test_cli.py @@ -1,4 +1,4 @@ -"""Tests for the command-line interface (CLI) functionality.""" +"""Integration tests for the CLI using real LanguageTool server instances.""" import io import sys @@ -7,7 +7,39 @@ import pytest import language_tool_python -from language_tool_python.__main__ import main, parse_args +from language_tool_python.__main__ import main + + +def main_with_stdin(argv: list[str], stdin: str) -> int: + """Execute the main CLI with simulated stdin input. + + :param argv: Command-line arguments to pass to the main function. + :param stdin: Input text to simulate as stdin. + :return: Exit code returned by the main function. + :rtype: int + """ + old_stdin = sys.stdin + sys.stdin = io.StringIO(stdin) + try: + return main(argv) + finally: + sys.stdin = old_stdin + + +@pytest.fixture(scope="module") +def remote_server() -> Generator[tuple[str, int], None, None]: + """Fixture that provides a remote LanguageTool server for testing. + + This fixture initializes a LanguageTool instance and yields its host and port, + ensuring proper cleanup after all tests in the module complete. + + :return: A tuple containing the server host and port (host, port). + :rtype: Generator[Tuple[str, int], None, None] + """ + with language_tool_python.LanguageTool("en-US") as tool: + host = tool._host + port = tool._port + yield host, port @pytest.mark.parametrize( @@ -89,22 +121,6 @@ def test_cli_exit_codes( assert code != 0 -@pytest.fixture(scope="module") -def remote_server() -> Generator[tuple[str, int], None, None]: - """Fixture that provides a remote LanguageTool server for testing. - - This fixture initializes a LanguageTool instance and yields its host and port, - ensuring proper cleanup after all tests in the module complete. - - :return: A tuple containing the server host and port (host, port). - :rtype: Generator[Tuple[str, int], None, None] - """ - with language_tool_python.LanguageTool("en-US") as tool: - host = tool._host - port = tool._port - yield host, port - - def test_cli_remote_ok(remote_server: tuple[str, int]) -> None: """Test the CLI with a remote server using valid input text. @@ -155,78 +171,3 @@ def test_cli_remote_error(remote_server: tuple[str, int]) -> None: "This is noot okay.\n", ) assert code != 0 - - -def test_parse_args_enabled_only_with_enable_categories() -> None: - """Test that --enabled-only is accepted when only --enable-categories is provided. - - :raises AssertionError: If parse_args raises an error for this valid combination. - """ - args = parse_args(["-l", "en-US", "--enabled-only", "-E", "TYPOS", "file.txt"]) - assert args.enabled_only is True - assert args.enable_categories == {"TYPOS"} - - -def test_parse_args_enabled_only_rejects_disable_categories() -> None: - """Test that --enabled-only cannot be combined with --disable-categories. - - :raises SystemExit: Expected, as argparse calls sys.exit on error. - """ - with pytest.raises(SystemExit): - parse_args( - ["-l", "en-US", "--enabled-only", "-e", "RULE", "-D", "TYPOS", "file.txt"] - ) - - -def test_parse_args_enabled_only_requires_enable_or_enable_categories() -> None: - """Test that --enabled-only requires at least --enable or --enable-categories. - - :raises SystemExit: Expected, as argparse calls sys.exit on error. - """ - with pytest.raises(SystemExit): - parse_args(["-l", "en-US", "--enabled-only", "file.txt"]) - - -def test_parse_args_categories() -> None: - """Test that --disable-categories and --enable-categories are parsed correctly. - - :raises AssertionError: If the parsed category sets do not match the expected - values. - """ - args = parse_args( - ["-l", "en-US", "-D", "TYPOS,GRAMMAR", "-E", "PUNCTUATION", "file.txt"] - ) - assert args.disable_categories == {"TYPOS", "GRAMMAR"} - assert args.enable_categories == {"PUNCTUATION"} - - -def test_parse_args_categories_multiple_flags() -> None: - """Test that repeated -D/-E flags accumulate into the same set. - - :raises AssertionError: If the category sets do not accumulate correctly. - """ - args = parse_args( - ["-l", "en-US", "-D", "TYPOS", "-D", "GRAMMAR", "-E", "PUNCTUATION", "file.txt"] - ) - assert args.disable_categories == {"TYPOS", "GRAMMAR"} - assert args.enable_categories == {"PUNCTUATION"} - - -def main_with_stdin(argv: list[str], stdin: str) -> int: - """Execute the main CLI with simulated stdin input. - - This utility function temporarily replaces sys.stdin with a StringIO object - containing the provided input, executes the main CLI function, and then restores the - original stdin. - - :param argv: Command-line arguments to pass to the main function. - :param stdin: Input text to simulate as stdin. - :return: Exit code returned by the main function. - :rtype: int - """ - old_stdin = sys.stdin - sys.stdin = io.StringIO(stdin) - try: - return main(argv) - finally: - sys.stdin = old_stdin diff --git a/tests/test_config.py b/tests/integration/test_config.py similarity index 81% rename from tests/test_config.py rename to tests/integration/test_config.py index 087d5d3..3a2ba42 100644 --- a/tests/test_config.py +++ b/tests/integration/test_config.py @@ -1,4 +1,4 @@ -"""Tests for the configuration options of LanguageTool.""" +"""Integration tests for LanguageTool configuration options (require a local server).""" import re import time @@ -6,7 +6,6 @@ import pytest import language_tool_python -from language_tool_python.config_file import ConfigValue, LanguageToolConfig from language_tool_python.exceptions import LanguageToolError @@ -175,35 +174,3 @@ def test_disabled_rule_in_config() -> None: text = "He realised that the organization was in jeopardy." matches = tool.check(text) assert len(matches) == 0 - - -@pytest.mark.parametrize( - "config", - [ - {"blockedReferrers": "example.com\ntrustXForwardForHeader=true"}, - {"disabledRuleIds": ["MORFOLOGIK_RULE_EN_US", "SAFE\rrequestLimit=0"]}, - {"lang-en\ntrustXForwardForHeader": "true"}, - {"lang-en": "custom-word\nrequestLimit=0"}, - ], -) -def test_config_rejects_line_break_injection(config: dict[str, ConfigValue]) -> None: - """Test that config serialization cannot be escaped with CR/LF characters.""" - with pytest.raises(ValueError, match="cannot contain line breaks"): - LanguageToolConfig(config) - - -@pytest.mark.parametrize( - "config", - [ - {"blockedReferrers": "example.com\\"}, - {"disabledRuleIds": ["MORFOLOGIK_RULE_EN_US", "SAFE\\"]}, - {"lang-en\\": "true"}, - {"lang-en": "custom-word\\"}, - ], -) -def test_config_rejects_odd_trailing_backslashes( - config: dict[str, ConfigValue], -) -> None: - """Test that config serialization cannot escape the line ending with a backslash.""" - with pytest.raises(ValueError, match="odd number of backslashes"): - LanguageToolConfig(config) diff --git a/tests/integration/test_download.py b/tests/integration/test_download.py new file mode 100644 index 0000000..fd74f50 --- /dev/null +++ b/tests/integration/test_download.py @@ -0,0 +1,94 @@ +"""Integration tests for LanguageTool download and version management (real network).""" + +from datetime import datetime, timedelta, timezone + +import pytest + +import language_tool_python +from language_tool_python.exceptions import LanguageToolError, PathError + + +def test_install_inexistent_version() -> None: + """Test errors when downloading a non-existent LanguageTool version. + + This test verifies that the tool correctly handles invalid version numbers by + raising a LanguageToolError when trying to initialize with a version that does not + exist. + + :raises AssertionError: If LanguageToolError is not raised for an invalid version. + """ + with pytest.raises(LanguageToolError): + language_tool_python.LanguageTool(language_tool_download_version="0.0") + + +def test_install_too_old_version() -> None: + """Test that attempting to download a too-old LanguageTool version raises an error. + + This test verifies that the tool correctly handles versions that are no longer + supported by raising a PathError when trying to initialize with an outdated version. + + :raises AssertionError: If PathError is not raised for a too-old version. + """ + with pytest.raises(PathError): + language_tool_python.LanguageTool(language_tool_download_version="3.9") + + +def test_inexistent_language() -> None: + """Test that creating a LanguageTag with an invalid language code raises an error. + + This test verifies that the LanguageTag constructor correctly validates language + codes and raises a ValueError when given a language code that is not supported. + + :raises AssertionError: If ValueError is not raised for an invalid language code. + """ + with ( + language_tool_python.LanguageTool("en-US") as tool, + pytest.raises(ValueError, match="unsupported language"), + ): + language_tool_python.LanguageTag("xx-XX", tool._get_languages()) + + +def test_install_oldest_supported_version() -> None: + """Test that downloading the oldest supported LanguageTool version works correctly. + + This test verifies that the tool can successfully download and initialize with the + oldest version that is still supported. + + :raises AssertionError: If the tool fails to initialize with the oldest supported + version. + """ + try: + with language_tool_python.LanguageTool( + "en-US", + language_tool_download_version="4.0", + ) as tool: + assert tool.language_tool_download_version == "4.0" + except LanguageToolError: + pytest.fail("Failed to download or initialize the oldest supported version.") + + +def test_install_snapshot_version() -> None: + """Test that downloading the snapshot version of LanguageTool works correctly. + + This test verifies that the tool can successfully download and initialize with the + snapshot of yesterday. + + :raises AssertionError: If the tool fails to initialize with the snapshot version. + """ + try: + with language_tool_python.LanguageTool( + "en-US", + language_tool_download_version=( + (datetime.now(timezone.utc) - timedelta(days=3)).strftime("%Y%m%d") + ), + ) as tool: + assert tool.language_tool_download_version == ( + datetime.now(timezone.utc) - timedelta(days=3) + ).strftime("%Y%m%d") + except LanguageToolError: + pytest.skip( + ( + "Failed to download or initialize the snapshot version. This may be " + "due to a missing snapshot for the expected date." + ), + ) diff --git a/tests/test_match.py b/tests/integration/test_match.py similarity index 95% rename from tests/test_match.py rename to tests/integration/test_match.py index a81713d..074f5f8 100644 --- a/tests/test_match.py +++ b/tests/integration/test_match.py @@ -1,4 +1,4 @@ -"""Tests for the Match functionality of LanguageTool.""" +"""Integration tests for the Match functionality of LanguageTool.""" from typing import TypedDict @@ -122,15 +122,15 @@ def test_match() -> None: expected format. """ with language_tool_python.LanguageTool("en-US") as tool: - text = "A sentence with a error in the Hitchhiker\u2019s Guide tot he Galaxy" + text = "A sentence with a error in the Hitchhiker’s Guide tot he Galaxy" matches = tool.check(text) assert len(matches) == EXPECTED_MATCH_COUNT assert str(matches[0]) == ( "Offset 16, length 1, Rule ID: EN_A_VS_AN\n" - "Message: Use “an” instead of \u2018a\u2019 if the following word starts " - "with a vowel sound, e.g. \u2018an article\u2019, \u2018an hour\u2019.\n" + "Message: Use “an” instead of ‘a’ if the following word starts " + "with a vowel sound, e.g. ‘an article’, ‘an hour’.\n" "Suggestion: an\n" - "A sentence with a error in the Hitchhiker\u2019s Guide tot he ..." + "A sentence with a error in the Hitchhiker’s Guide tot he ..." "\n ^" ) diff --git a/tests/test_server_local.py b/tests/integration/test_server_local.py similarity index 98% rename from tests/test_server_local.py rename to tests/integration/test_server_local.py index 3731952..31b4b47 100644 --- a/tests/test_server_local.py +++ b/tests/integration/test_server_local.py @@ -1,4 +1,4 @@ -"""Tests for the local server functionality of LanguageTool.""" +"""Integration tests for the local server functionality of LanguageTool.""" from __future__ import annotations diff --git a/tests/property/__init__.py b/tests/property/__init__.py new file mode 100644 index 0000000..2e5bdfe --- /dev/null +++ b/tests/property/__init__.py @@ -0,0 +1 @@ +"""Property-based tests for the language_tool_python library.""" diff --git a/tests/property/conftest.py b/tests/property/conftest.py new file mode 100644 index 0000000..6297f26 --- /dev/null +++ b/tests/property/conftest.py @@ -0,0 +1,17 @@ +"""Configuration for the property-based test suite.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + + +def pytest_collection_modifyitems( + items: list[pytest.Item], +) -> None: + """Apply the 'property' marker to all tests collected from this directory.""" + property_dir = Path(__file__).parent + for item in items: + if item.path.is_relative_to(property_dir): + item.add_marker(pytest.mark.property) diff --git a/tests/property/test_prop_config.py b/tests/property/test_prop_config.py new file mode 100644 index 0000000..41b95a9 --- /dev/null +++ b/tests/property/test_prop_config.py @@ -0,0 +1,85 @@ +"""Property-based tests for LanguageToolConfig input validation. + +These tests use Hypothesis to verify that injection-protection invariants +hold for any input, not just the handwritten examples in unit tests. +""" + +import pytest +from hypothesis import given, settings +from hypothesis import strategies as st + +from language_tool_python.config_file import LanguageToolConfig + +_LINEBREAK_CHARS = ["\n", "\r", "\r\n"] + + +@given( + before=st.text(), + linebreak=st.sampled_from(_LINEBREAK_CHARS), + after=st.text(), +) +@settings(max_examples=200) +def test_prop_config_value_with_linebreak_always_raises( + before: str, + linebreak: str, + after: str, +) -> None: + """Any config value containing CR or LF must raise ValueError. + + The string is constructed as ``before + linebreak + after`` to guarantee + the presence of a line-break character without relying on filter(). + + :param before: Arbitrary text before the line-break. + :param linebreak: A CR, LF, or CRLF sequence. + :param after: Arbitrary text after the line-break. + :raises AssertionError: If ValueError is not raised. + """ + value = before + linebreak + after + with pytest.raises(ValueError, match="line breaks"): + LanguageToolConfig({"blockedReferrers": value}) + + +@given( + prefix=st.text(alphabet=st.characters(blacklist_characters="\r\n\\")), + count=st.integers(min_value=1, max_value=5), +) +@settings(max_examples=200) +def test_prop_config_odd_trailing_backslashes_always_raise( + prefix: str, + count: int, +) -> None: + r"""Any config value ending with an odd number of backslashes must raise ValueError. + + The value is constructed as ``prefix + '\\\\' * (2*count - 1)`` to guarantee + the trailing backslash count is always odd (1, 3, 5, 7, or 9). + + :param prefix: A string with no backslashes or line-break characters. + :param count: Determines the odd backslash count: ``2*count - 1``. + :raises AssertionError: If ValueError is not raised. + """ + value = prefix + "\\" * (2 * count - 1) + with pytest.raises(ValueError, match="backslash"): + LanguageToolConfig({"blockedReferrers": value}) + + +@given( + key_before=st.text(alphabet=st.characters(blacklist_characters="\r\n")), + linebreak=st.sampled_from(_LINEBREAK_CHARS), + key_after=st.text(alphabet=st.characters(blacklist_characters="\r\n")), +) +@settings(max_examples=200) +def test_prop_config_key_with_linebreak_always_raises( + key_before: str, + linebreak: str, + key_after: str, +) -> None: + """Any config key containing CR or LF must raise ValueError. + + :param key_before: Text before the line-break in the key. + :param linebreak: A CR, LF, or CRLF sequence. + :param key_after: Text after the line-break in the key. + :raises AssertionError: If ValueError is not raised. + """ + key = key_before + linebreak + key_after + with pytest.raises(ValueError, match="line breaks"): + LanguageToolConfig({key: "valid_value"}) diff --git a/tests/property/test_prop_safe_zip.py b/tests/property/test_prop_safe_zip.py new file mode 100644 index 0000000..ae3f992 --- /dev/null +++ b/tests/property/test_prop_safe_zip.py @@ -0,0 +1,74 @@ +"""Property-based tests for the safe ZIP extractor path-traversal protection.""" + +import contextlib +import io +import shutil +import uuid +import zipfile +from collections.abc import Iterator +from contextlib import contextmanager +from pathlib import Path + +import pytest +from hypothesis import given, settings +from hypothesis import strategies as st + +from language_tool_python._internals.safe_zip import SafeZipExtractor +from language_tool_python.exceptions import PathError + +_TRAVERSAL_PREFIXES = ["../", "..\\", "/", "C:/", "..\\..\\"] + + +def _make_zip_payload(files: dict[str, bytes]) -> bytes: + """Create an in-memory ZIP payload for testing.""" + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + for name, data in files.items(): + zf.writestr(name, data) + return buf.getvalue() + + +@contextmanager +def _temp_dir() -> Iterator[Path]: + """Create a temporary dir inside the project workspace to avoid perm issues.""" + root = Path.cwd() / ".test_prop_safe_zip_tmp" + path = root / uuid.uuid4().hex + path.mkdir(parents=True) + try: + yield path + finally: + shutil.rmtree(path, ignore_errors=True) + with contextlib.suppress(OSError): + root.rmdir() + + +@given( + traversal=st.sampled_from(_TRAVERSAL_PREFIXES), + suffix=st.text( + alphabet=st.characters(whitelist_categories=("Ll", "Lu", "Nd")), + min_size=1, + ), +) +@settings(max_examples=300) +def test_prop_safe_zip_path_traversal_always_rejected( + traversal: str, + suffix: str, +) -> None: + """Any ZIP member whose name begins with a path-traversal prefix must be rejected. + + Checks that ``SafeZipExtractor`` raises ``PathError`` for filenames like + ``../evil``, ``/etc/passwd``, or ``C:/Windows/file`` regardless of the suffix. + + :param traversal: A path-traversal prefix (e.g. ``../``, ``/``). + :param suffix: Alphanumeric suffix appended after the traversal prefix. + :raises AssertionError: If ``PathError`` is not raised for the unsafe member name. + """ + filename = traversal + suffix + payload = _make_zip_payload({filename: b"payload"}) + + with ( + _temp_dir() as dest, + zipfile.ZipFile(io.BytesIO(payload)) as zf, + pytest.raises(PathError, match="Unsafe ZIP member"), + ): + SafeZipExtractor().extractall(zf, dest) diff --git a/tests/property/test_prop_utils.py b/tests/property/test_prop_utils.py new file mode 100644 index 0000000..4bc7f20 --- /dev/null +++ b/tests/property/test_prop_utils.py @@ -0,0 +1,20 @@ +"""Property-based tests for the LanguageTool utility functions.""" + +from hypothesis import given, settings +from hypothesis import strategies as st + +import language_tool_python + + +@given(text=st.text()) +@settings(max_examples=500) +def test_prop_correct_with_empty_matches_is_identity(text: str) -> None: + """correct(text, []) must always return the original text unchanged. + + This verifies the fundamental contract that applying zero corrections + is a no-op, regardless of the text content (empty, unicode, emojis...). + + :param text: Arbitrary string generated by Hypothesis. + :raises AssertionError: If the corrected text differs from the input. + """ + assert language_tool_python.utils.correct(text, []) == text diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..1733800 --- /dev/null +++ b/tests/unit/__init__.py @@ -0,0 +1 @@ +"""Unit tests for the language_tool_python library.""" diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 0000000..65fcecd --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,17 @@ +"""Configuration for the unit test suite.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + + +def pytest_collection_modifyitems( + items: list[pytest.Item], +) -> None: + """Apply the 'unit' marker to all tests collected from this directory.""" + unit_dir = Path(__file__).parent + for item in items: + if item.path.is_relative_to(unit_dir): + item.add_marker(pytest.mark.unit) diff --git a/tests/unit/test_cli_args.py b/tests/unit/test_cli_args.py new file mode 100644 index 0000000..6f79066 --- /dev/null +++ b/tests/unit/test_cli_args.py @@ -0,0 +1,60 @@ +"""Unit tests for the CLI argument parser.""" + +import pytest + +from language_tool_python.__main__ import parse_args + + +def test_parse_args_enabled_only_with_enable_categories() -> None: + """Test that --enabled-only is accepted when only --enable-categories is provided. + + :raises AssertionError: If parse_args raises an error for this valid combination. + """ + args = parse_args(["-l", "en-US", "--enabled-only", "-E", "TYPOS", "file.txt"]) + assert args.enabled_only is True + assert args.enable_categories == {"TYPOS"} + + +def test_parse_args_enabled_only_rejects_disable_categories() -> None: + """Test that --enabled-only cannot be combined with --disable-categories. + + :raises SystemExit: Expected, as argparse calls sys.exit on error. + """ + with pytest.raises(SystemExit): + parse_args( + ["-l", "en-US", "--enabled-only", "-e", "RULE", "-D", "TYPOS", "file.txt"] + ) + + +def test_parse_args_enabled_only_requires_enable_or_enable_categories() -> None: + """Test that --enabled-only requires at least --enable or --enable-categories. + + :raises SystemExit: Expected, as argparse calls sys.exit on error. + """ + with pytest.raises(SystemExit): + parse_args(["-l", "en-US", "--enabled-only", "file.txt"]) + + +def test_parse_args_categories() -> None: + """Test that --disable-categories and --enable-categories are parsed correctly. + + :raises AssertionError: If the parsed category sets do not match the expected + values. + """ + args = parse_args( + ["-l", "en-US", "-D", "TYPOS,GRAMMAR", "-E", "PUNCTUATION", "file.txt"] + ) + assert args.disable_categories == {"TYPOS", "GRAMMAR"} + assert args.enable_categories == {"PUNCTUATION"} + + +def test_parse_args_categories_multiple_flags() -> None: + """Test that repeated -D/-E flags accumulate into the same set. + + :raises AssertionError: If the category sets do not accumulate correctly. + """ + args = parse_args( + ["-l", "en-US", "-D", "TYPOS", "-D", "GRAMMAR", "-E", "PUNCTUATION", "file.txt"] + ) + assert args.disable_categories == {"TYPOS", "GRAMMAR"} + assert args.enable_categories == {"PUNCTUATION"} diff --git a/tests/unit/test_config_validation.py b/tests/unit/test_config_validation.py new file mode 100644 index 0000000..aa46834 --- /dev/null +++ b/tests/unit/test_config_validation.py @@ -0,0 +1,37 @@ +"""Unit tests for LanguageToolConfig input validation and injection protection.""" + +import pytest + +from language_tool_python.config_file import ConfigValue, LanguageToolConfig + + +@pytest.mark.parametrize( + "config", + [ + {"blockedReferrers": "example.com\ntrustXForwardForHeader=true"}, + {"disabledRuleIds": ["MORFOLOGIK_RULE_EN_US", "SAFE\rrequestLimit=0"]}, + {"lang-en\ntrustXForwardForHeader": "true"}, + {"lang-en": "custom-word\nrequestLimit=0"}, + ], +) +def test_config_rejects_line_break_injection(config: dict[str, ConfigValue]) -> None: + """Test that config serialization cannot be escaped with CR/LF characters.""" + with pytest.raises(ValueError, match="cannot contain line breaks"): + LanguageToolConfig(config) + + +@pytest.mark.parametrize( + "config", + [ + {"blockedReferrers": "example.com\\"}, + {"disabledRuleIds": ["MORFOLOGIK_RULE_EN_US", "SAFE\\"]}, + {"lang-en\\": "true"}, + {"lang-en": "custom-word\\"}, + ], +) +def test_config_rejects_odd_trailing_backslashes( + config: dict[str, ConfigValue], +) -> None: + """Test that config serialization cannot escape the line ending with a backslash.""" + with pytest.raises(ValueError, match="odd number of backslashes"): + LanguageToolConfig(config) diff --git a/tests/test_download.py b/tests/unit/test_download.py similarity index 82% rename from tests/test_download.py rename to tests/unit/test_download.py index d6de0f0..ad8ec2d 100644 --- a/tests/test_download.py +++ b/tests/unit/test_download.py @@ -1,4 +1,7 @@ -"""Tests for the download/language functionality of LanguageTool.""" +"""Unit tests for download logic, URL construction, HTTP handling, and integrity checks. + +These tests use mocks and monkeypatching to avoid real network requests. +""" import contextlib import hashlib @@ -10,7 +13,7 @@ import zipfile from collections.abc import Iterator from contextlib import contextmanager -from datetime import datetime, timedelta, timezone +from datetime import datetime, timezone from pathlib import Path from unittest.mock import patch @@ -23,7 +26,7 @@ _LTP_MAX_DOWNLOAD_BYTES_ENV_VAR, LocalLanguageTool, ) -from language_tool_python.exceptions import LanguageToolError, PathError +from language_tool_python.exceptions import PathError EXPECTED_DOWNLOAD_BYTES_OVERRIDE = 123 @@ -87,52 +90,9 @@ def workspace_temp_dir() -> Iterator[Path]: root.rmdir() -def test_install_inexistent_version() -> None: - """Test errors when downloading a non-existent LanguageTool version. - - This test verifies that the tool correctly handles invalid version numbers by - raising a LanguageToolError when trying to initialize with a version that does not - exist. - - :raises AssertionError: If LanguageToolError is not raised for an invalid version. - """ - with pytest.raises(LanguageToolError): - language_tool_python.LanguageTool(language_tool_download_version="0.0") - - -def test_install_too_old_version() -> None: - """Test that attempting to download a too-old LanguageTool version raises an error. - - This test verifies that the tool correctly handles versions that are no longer - supported by raising a PathError when trying to initialize with an outdated version. - - :raises AssertionError: If PathError is not raised for a too-old version. - """ - with pytest.raises(PathError): - language_tool_python.LanguageTool(language_tool_download_version="3.9") - - -def test_inexistent_language() -> None: - """Test that creating a LanguageTag with an invalid language code raises an error. - - This test verifies that the LanguageTag constructor correctly validates language - codes and raises a ValueError when given a language code that is not supported. - - :raises AssertionError: If ValueError is not raised for an invalid language code. - """ - with ( - language_tool_python.LanguageTool("en-US") as tool, - pytest.raises(ValueError, match="unsupported language"), - ): - language_tool_python.LanguageTag("xx-XX", tool._get_languages()) - - def test_http_get_403_forbidden() -> None: """Test that http_get raises PathError when receiving a 403 Forbidden status code. - This test verifies that the function correctly handles forbidden access errors when - attempting to download files. - :raises AssertionError: If PathError is not raised for a 403 status code. """ mock_response = MockDownloadResponse(b"", status_code=403) @@ -153,9 +113,6 @@ def test_http_get_403_forbidden() -> None: def test_http_get_other_error_codes() -> None: """Test PathError handling for unexpected HTTP status codes. - This test verifies that the function correctly handles different HTTP error codes - like 500 (Internal Server Error), 503 (Service Unavailable), etc. - :raises AssertionError: If PathError is not raised for error status codes. """ error_codes = [500, 502, 503, 504] @@ -562,49 +519,3 @@ def test_latest_snapshot_download_renames_archive_root_to_current_date( local_language_tool.download() get_mock.assert_not_called() - - -def test_install_oldest_supported_version() -> None: - """Test that downloading the oldest supported LanguageTool version works correctly. - - This test verifies that the tool can successfully download and initialize with the - oldest version that is still supported. - - :raises AssertionError: If the tool fails to initialize with the oldest supported - version. - """ - try: - with language_tool_python.LanguageTool( - "en-US", - language_tool_download_version="4.0", - ) as tool: - assert tool.language_tool_download_version == "4.0" - except LanguageToolError: - pytest.fail("Failed to download or initialize the oldest supported version.") - - -def test_install_snapshot_version() -> None: - """Test that downloading the snapshot version of LanguageTool works correctly. - - This test verifies that the tool can successfully download and initialize with the - snapshot of yesterday. - - :raises AssertionError: If the tool fails to initialize with the snapshot version. - """ - try: - with language_tool_python.LanguageTool( - "en-US", - language_tool_download_version=( - (datetime.now(timezone.utc) - timedelta(days=3)).strftime("%Y%m%d") - ), - ) as tool: - assert tool.language_tool_download_version == ( - datetime.now(timezone.utc) - timedelta(days=3) - ).strftime("%Y%m%d") - except LanguageToolError: - pytest.skip( - ( - "Failed to download or initialize the snapshot version. This may be " - "due to a missing snapshot for the expected date." - ), - ) diff --git a/tests/test_safe_zip.py b/tests/unit/test_safe_zip.py similarity index 99% rename from tests/test_safe_zip.py rename to tests/unit/test_safe_zip.py index 9741845..328c7d4 100644 --- a/tests/test_safe_zip.py +++ b/tests/unit/test_safe_zip.py @@ -1,4 +1,4 @@ -"""Tests for safe ZIP extraction.""" +"""Unit tests for safe ZIP extraction.""" import contextlib import hashlib diff --git a/uv.lock b/uv.lock index dcdd4ab..07ecf5e 100644 --- a/uv.lock +++ b/uv.lock @@ -392,6 +392,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f4/b2/50e9b292b5cac13e9e81272c7171301abc753a60460d21505b606e15cf21/furo-2025.12.19-py3-none-any.whl", hash = "sha256:bb0ead5309f9500130665a26bee87693c41ce4dbdff864dbfb6b0dae4673d24f", size = 339262, upload-time = "2025-12-19T17:34:38.905Z" }, ] +[[package]] +name = "hypothesis" +version = "6.155.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "sortedcontainers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/10/aa/9a91a4addf285702a98713da44b3581799539426436617bfb8914478c166/hypothesis-6.155.6.tar.gz", hash = "sha256:7569e1897690336c85d49d8391b49ec6ab83d951009515bfc29faebbac286cf5", size = 478038, upload-time = "2026-06-19T13:21:23.379Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/a9/4c17e962c2e9cbc314bb579ed2e2b2da45d7b6b942aab6948d14d85abfea/hypothesis-6.155.6-py3-none-any.whl", hash = "sha256:a96d9a29f6bbc8ccac39dd84e140892da76765464929f401a4181b90c20c9ad1", size = 544521, upload-time = "2026-06-19T13:21:20.934Z" }, +] + [[package]] name = "idna" version = "3.18" @@ -457,7 +470,9 @@ quality = [ { name = "ruff" }, ] tests = [ + { name = "hypothesis" }, { name = "pytest" }, + { name = "pytest-benchmark" }, { name = "pytest-cov" }, ] types = [ @@ -486,7 +501,9 @@ quality = [ { name = "ruff", specifier = "==0.15.16" }, ] tests = [ + { name = "hypothesis" }, { name = "pytest" }, + { name = "pytest-benchmark" }, { name = "pytest-cov" }, ] types = [ @@ -788,6 +805,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" }, ] +[[package]] +name = "py-cpuinfo" +version = "9.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/37/a8/d832f7293ebb21690860d2e01d8115e5ff6f2ae8bbdc953f0eb0fa4bd2c7/py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690", size = 104716, upload-time = "2022-10-25T20:38:06.303Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" }, +] + [[package]] name = "pygments" version = "2.20.0" @@ -815,6 +841,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/24/25/1de2678b631f5a49215c6c96fff41ba892b0a34df68d6d80292b1b48aa7f/pytest-9.1.1-py3-none-any.whl", hash = "sha256:37a86b45efb9a47a61a36449063e8e18d0cab3161329fc099eb21783169c4f0c", size = 386536, upload-time = "2026-06-19T10:58:31.347Z" }, ] +[[package]] +name = "pytest-benchmark" +version = "5.2.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "py-cpuinfo" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/24/34/9f732b76456d64faffbef6232f1f9dbec7a7c4999ff46282fa418bd1af66/pytest_benchmark-5.2.3.tar.gz", hash = "sha256:deb7317998a23c650fd4ff76e1230066a76cb45dcece0aca5607143c619e7779", size = 341340, upload-time = "2025-11-09T18:48:43.215Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/29/e756e715a48959f1c0045342088d7ca9762a2f509b945f362a316e9412b7/pytest_benchmark-5.2.3-py3-none-any.whl", hash = "sha256:bc839726ad20e99aaa0d11a127445457b4219bdb9e80a1afc4b51da7f96b0803", size = 45255, upload-time = "2025-11-09T18:48:39.765Z" }, +] + [[package]] name = "pytest-cov" version = "7.1.0" @@ -887,6 +926,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4c/07/2ebca9b11fb9be7340a818d8d6f63feaebb146be2c4afbd6061701d6df6e/snowballstemmer-3.1.1-py3-none-any.whl", hash = "sha256:7e207fa178741da09cdee59d3ecec3827ad5f92b1fc5c9ff3755b639f71f5752", size = 104164, upload-time = "2026-06-03T00:56:38.614Z" }, ] +[[package]] +name = "sortedcontainers" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" }, +] + [[package]] name = "soupsieve" version = "2.8.4" From 42c9c4e7c43ea9fb5d9d2a1ec06bc02831a88fd1 Mon Sep 17 00:00:00 2001 From: mdevolde Date: Sat, 27 Jun 2026 19:19:17 +0200 Subject: [PATCH 2/2] test: add unit tests to increase coverage --- .github/workflows/test.yml | 2 +- .gitignore | 3 + pyproject.toml | 14 +- pytest.ini | 2 +- src/language_tool_python/__main__.py | 4 +- src/language_tool_python/_internals/compat.py | 4 +- src/language_tool_python/config_file.py | 2 +- src/language_tool_python/download_lt.py | 2 +- tests/unit/test_api_types.py | 64 +++ tests/unit/test_cli_unit.py | 195 ++++++++ tests/unit/test_config_unit.py | 235 ++++++++++ tests/unit/test_download_unit.py | 305 +++++++++++++ tests/unit/test_internals_utils.py | 224 ++++++++++ tests/unit/test_language_tag.py | 168 +++++++ tests/unit/test_match.py | 421 ++++++++++++++++++ tests/unit/test_utils.py | 218 +++++++++ 16 files changed, 1843 insertions(+), 20 deletions(-) create mode 100644 tests/unit/test_api_types.py create mode 100644 tests/unit/test_cli_unit.py create mode 100644 tests/unit/test_config_unit.py create mode 100644 tests/unit/test_download_unit.py create mode 100644 tests/unit/test_internals_utils.py create mode 100644 tests/unit/test_language_tag.py create mode 100644 tests/unit/test_match.py create mode 100644 tests/unit/test_utils.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a0cd765..3a5ca41 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,7 +35,7 @@ jobs: - os: ubuntu-26.04 python-version: "3.14" - os: ubuntu-26.04 - python-version: "3.15.0-beta.2" + python-version: "3.15.0-beta.3" - os: macos-26 python-version: "3.14" - os: windows-2025 diff --git a/.gitignore b/.gitignore index e15106e..3f03577 100644 --- a/.gitignore +++ b/.gitignore @@ -204,6 +204,9 @@ cython_debug/ # Ruff stuff: .ruff_cache/ +# Pytest tmp_path base directory (project-relative to avoid Windows temp permission issues) +.pytest_tmp/ + # PyPI configuration file .pypirc diff --git a/pyproject.toml b/pyproject.toml index 182c0cd..e540e9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -174,18 +174,8 @@ warn_unreachable = true warn_unused_configs = true warn_unused_ignores = true -[[tool.mypy.overrides]] -module = ["tests.benchmarks.*"] -# pytest-benchmark is untyped; relax Any restrictions for benchmark files only -disallow_any_unimported = false -disallow_any_expr = false -disallow_any_explicit = false -disallow_any_decorated = false - [[tool.mypy.overrides]] module = ["tests.property.*"] -# hypothesis is untyped; relax Any restrictions for property test files only -disallow_any_unimported = false -disallow_any_expr = false -disallow_any_explicit = false +# hypothesis decorators contain Any expressions, so we need to disable the following checks for tests using hypothesis disallow_any_decorated = false +disallow_any_expr = false diff --git a/pytest.ini b/pytest.ini index 2da9216..cbd524a 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,5 @@ [pytest] -addopts = -vra --cov=src --cov-report=html --cov-report=xml +addopts = -vra --cov=src --cov-report=html --cov-report=xml --basetemp=.pytest_tmp testpaths = tests markers = unit: fast, isolated tests with no external dependencies diff --git a/src/language_tool_python/__main__.py b/src/language_tool_python/__main__.py index 8ee561a..8300df2 100644 --- a/src/language_tool_python/__main__.py +++ b/src/language_tool_python/__main__.py @@ -58,7 +58,7 @@ def _read_project_version(pyproject: Path) -> str: __version__ = version("language_tool_python") # If the package is not installed in the environment, # read the version from pyproject.toml -except PackageNotFoundError: +except PackageNotFoundError: # pragma: no cover project_root = Path(__file__).resolve().parent.parent pyproject = project_root / "pyproject.toml" __version__ = _read_project_version(pyproject) @@ -258,7 +258,7 @@ def __call__( cli_args.disable_categories.update(rule_values) elif self.dest == "enable_categories": cli_args.enable_categories.update(rule_values) - else: + else: # pragma: no cover err = f"unexpected rules destination: {self.dest}" raise ValueError(err) diff --git a/src/language_tool_python/_internals/compat.py b/src/language_tool_python/_internals/compat.py index be623b1..9dc0631 100644 --- a/src/language_tool_python/_internals/compat.py +++ b/src/language_tool_python/_internals/compat.py @@ -13,11 +13,11 @@ if sys.version_info >= (3, 11): from tomllib import loads as toml_loads else: - from tomli import loads as toml_loads + from tomli import loads as toml_loads # pragma: no cover if sys.version_info >= (3, 13): from warnings import deprecated else: - from typing_extensions import deprecated + from typing_extensions import deprecated # pragma: no cover __all__ = ["deprecated", "toml_loads"] diff --git a/src/language_tool_python/config_file.py b/src/language_tool_python/config_file.py index 8b73dbe..c7e703a 100644 --- a/src/language_tool_python/config_file.py +++ b/src/language_tool_python/config_file.py @@ -158,7 +158,7 @@ def _path_validator(v: PathLike[str] | str) -> None: if not p.exists(): err = f"path does not exist: {p}" raise PathError(err) - if not p.is_file() and not p.is_dir(): + if not p.is_file() and not p.is_dir(): # pragma: no cover err = f"path is not a file/directory: {p}" raise PathError(err) diff --git a/src/language_tool_python/download_lt.py b/src/language_tool_python/download_lt.py index 65defa0..1ec3330 100644 --- a/src/language_tool_python/download_lt.py +++ b/src/language_tool_python/download_lt.py @@ -385,7 +385,7 @@ def download(self) -> None: :raises NotImplementedError: Always, unless implemented by a subclass. """ - raise NotImplementedError + raise NotImplementedError # pragma: no cover def _get_remote_zip( self, diff --git a/tests/unit/test_api_types.py b/tests/unit/test_api_types.py new file mode 100644 index 0000000..af2b7ce --- /dev/null +++ b/tests/unit/test_api_types.py @@ -0,0 +1,64 @@ +"""Unit tests for _internals/api_types.py TypeGuard helpers.""" + +from language_tool_python._internals.api_types import ( + is_check_response, + is_language_info, +) + + +def test_is_language_info_valid() -> None: + """Accepts a well-formed LanguageInfo dict.""" + assert is_language_info({"code": "en", "longCode": "en-US", "name": "English"}) + + +def test_is_language_info_not_dict() -> None: + """Rejects non-dict values.""" + assert not is_language_info("not a dict") + assert not is_language_info(42) + assert not is_language_info(None) + assert not is_language_info(["code", "longCode", "name"]) + + +def test_is_language_info_missing_field() -> None: + """Rejects dicts with missing required fields.""" + assert not is_language_info({"code": "en", "longCode": "en-US"}) + assert not is_language_info({"code": "en", "name": "English"}) + assert not is_language_info({}) + + +def test_is_language_info_wrong_type() -> None: + """Rejects dicts with non-string field values.""" + assert not is_language_info({"code": 1, "longCode": "en-US", "name": "English"}) + assert not is_language_info({"code": "en", "longCode": None, "name": "English"}) + + +def test_is_check_response_valid() -> None: + """Accepts a well-formed CheckResponse dict.""" + assert is_check_response( + { + "matches": [], + "language": {"code": "en"}, + "warnings": {"incompleteResults": False}, + } + ) + + +def test_is_check_response_not_dict() -> None: + """Rejects non-dict values.""" + assert not is_check_response("not a dict") + assert not is_check_response(None) + assert not is_check_response(123) + + +def test_is_check_response_missing_field() -> None: + """Rejects dicts with missing required fields.""" + assert not is_check_response({"matches": [], "language": {}}) + assert not is_check_response({"matches": [], "warnings": {}}) + assert not is_check_response({}) + + +def test_is_check_response_wrong_type() -> None: + """Rejects dicts with wrong field types.""" + assert not is_check_response({"matches": "[]", "language": {}, "warnings": {}}) + assert not is_check_response({"matches": [], "language": "en", "warnings": {}}) + assert not is_check_response({"matches": [], "language": {}, "warnings": "none"}) diff --git a/tests/unit/test_cli_unit.py b/tests/unit/test_cli_unit.py new file mode 100644 index 0000000..33af934 --- /dev/null +++ b/tests/unit/test_cli_unit.py @@ -0,0 +1,195 @@ +"""Unit tests for the CLI helper functions in __main__.py.""" + +from __future__ import annotations + +import io +from pathlib import Path + +import pytest + +from language_tool_python.__main__ import ( + CliArgs, + _read_project_version, + get_input_text, + get_remote_server, + get_rules, + get_text, + parse_args, + print_exception, +) + + +class TestGetRules: + """Tests for the get_rules() rule-string parser.""" + + def test_comma_separated(self) -> None: + """Comma-separated rule IDs are returned as a set.""" + assert get_rules("RULE_A,RULE_B") == {"RULE_A", "RULE_B"} + + def test_uppercases(self) -> None: + """Rule IDs are uppercased.""" + assert get_rules("rule_a") == {"RULE_A"} + + def test_hyphen_allowed(self) -> None: + """Hyphens inside rule IDs are preserved.""" + assert get_rules("MORFOLOGIK-RULE") == {"MORFOLOGIK-RULE"} + + def test_whitespace_separated(self) -> None: + """Whitespace-separated rule IDs are each returned.""" + assert get_rules("RULE_A RULE_B") == {"RULE_A", "RULE_B"} + + def test_empty_string(self) -> None: + """Empty input returns an empty set.""" + assert get_rules("") == set() + + +class TestParseArgsEnabledOnly: + """Tests for the --enabled-only CLI argument validation.""" + + def test_enabled_only_with_disable_raises(self) -> None: + """--enabled-only combined with --disable causes SystemExit.""" + with pytest.raises(SystemExit): + parse_args( + [ + "-l", + "en-US", + "--enabled-only", + "-e", + "RULE", + "-d", + "OTHER", + "file.txt", + ] + ) + + def test_enabled_only_with_enable_passes(self) -> None: + """--enabled-only with --enable is accepted.""" + args = parse_args(["-l", "en-US", "--enabled-only", "-e", "RULE", "file.txt"]) + assert args.enabled_only is True + assert "RULE" in args.enable + + +class TestGetRemoteServer: + """Tests for the get_remote_server() URL builder.""" + + def _args(self, host: str | None = None, port: str | None = None) -> CliArgs: + """Build a minimal CliArgs with only remote_host/remote_port set.""" + args = CliArgs() + args.remote_host = host + args.remote_port = port + return args + + def test_no_host_returns_none(self) -> None: + """Returns None when no remote host is set.""" + assert get_remote_server(self._args()) is None + + def test_host_without_port(self) -> None: + """Returns the host name alone when no port is given.""" + assert get_remote_server(self._args(host="localhost")) == "localhost" + + def test_host_with_port(self) -> None: + """Returns host:port when both are provided.""" + result = get_remote_server(self._args(host="localhost", port="8081")) + assert result == "localhost:8081" + + +class TestPrintException: + """Tests for the print_exception() stderr printer.""" + + def test_without_debug_prints_to_stderr( + self, capsys: pytest.CaptureFixture[str] + ) -> None: + """Without debug=True, only the message is printed to stderr.""" + print_exception(ValueError("test error"), debug=False) + assert "test error" in capsys.readouterr().err + + def test_with_debug_prints_traceback( + self, capsys: pytest.CaptureFixture[str] + ) -> None: + """With debug=True, the full traceback is printed to stderr.""" + try: + msg = "original error" + raise ValueError(msg) + except ValueError: + print_exception(ValueError("current error"), debug=True) + captured = capsys.readouterr() + assert "ValueError" in captured.err + + +class TestGetText: + """Tests for the get_text() file reader.""" + + def test_reads_file(self, tmp_path: Path) -> None: + """File content is returned as-is when no ignore pattern is given.""" + f = tmp_path / "test.txt" + f.write_text("hello world\n", encoding="utf-8") + result = get_text(str(f), encoding="utf-8", ignore=None) + assert result == "hello world\n" + + def test_ignore_replaces_matching_lines(self, tmp_path: Path) -> None: + """Lines matching the ignore regex are replaced with a newline.""" + f = tmp_path / "test.txt" + f.write_text("keep this\n# skip this\nkeep too\n", encoding="utf-8") + result = get_text(str(f), encoding="utf-8", ignore=r"#.*") + assert "# skip this" not in result + assert "keep this" in result + assert "keep too" in result + + def test_no_ignore_keeps_all(self, tmp_path: Path) -> None: + """All lines are kept when no ignore pattern is set.""" + f = tmp_path / "test.txt" + f.write_text("line1\nline2\n", encoding="utf-8") + result = get_text(str(f), encoding=None, ignore=None) + assert result == "line1\nline2\n" + + +class TestGetInputText: + """Tests for the get_input_text() stdin/file dispatcher.""" + + def _args( + self, ignore_lines: str | None = None, encoding: str | None = None + ) -> CliArgs: + """Build a minimal CliArgs with only ignore_lines/encoding set.""" + args = CliArgs() + args.ignore_lines = ignore_lines + args.encoding = encoding + return args + + def test_reads_from_file(self, tmp_path: Path) -> None: + """Regular filename is read from disk.""" + f = tmp_path / "input.txt" + f.write_text("test content", encoding="utf-8") + result = get_input_text(str(f), self._args()) + assert result == "test content" + + def test_reads_from_stdin(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Filename '-' reads from stdin.""" + monkeypatch.setattr("sys.stdin", io.StringIO("stdin content")) + result = get_input_text("-", self._args()) + assert result == "stdin content" + + def test_stdin_with_ignore_lines(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Matching lines from stdin are suppressed when ignore_lines is set.""" + monkeypatch.setattr("sys.stdin", io.StringIO("keep\n# skip\nkeep2\n")) + result = get_input_text("-", self._args(ignore_lines=r"#.*")) + assert "# skip" not in result + assert "keep" in result + + def test_uses_encoding(self, tmp_path: Path) -> None: + """Non-UTF-8 files are decoded with the specified encoding.""" + f = tmp_path / "latin.txt" + content = "caf\xe9" + f.write_bytes(content.encode("latin-1")) + result = get_input_text(str(f), self._args(encoding="latin-1")) + assert "caf" in result + + +class TestReadProjectVersion: + """Tests for _read_project_version().""" + + def test_reads_version_from_pyproject(self) -> None: + """Version string is read from the project's pyproject.toml.""" + pyproject = Path(__file__).parent.parent.parent / "pyproject.toml" + version = _read_project_version(pyproject) + assert isinstance(version, str) + assert version.count(".") >= 1 diff --git a/tests/unit/test_config_unit.py b/tests/unit/test_config_unit.py new file mode 100644 index 0000000..2005a0d --- /dev/null +++ b/tests/unit/test_config_unit.py @@ -0,0 +1,235 @@ +"""Unit tests for config_file.py encoders, validators, and LanguageToolConfig.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from language_tool_python.config_file import ( + LanguageToolConfig, + _bool_encoder, + _comma_list_encoder, + _encode_config, + _int_encoder, + _is_lang_key, + _number_encoder, + _path_encoder, + _path_validator, +) +from language_tool_python.exceptions import PathError + + +class TestBoolEncoder: + """Tests for the _bool_encoder() function.""" + + def test_true(self) -> None: + """True is encoded as the string 'true'.""" + assert _bool_encoder(v=True) == "true" + + def test_false(self) -> None: + """False is encoded as the string 'false'.""" + assert _bool_encoder(v=False) == "false" + + def test_truthy_int(self) -> None: + """A truthy integer is encoded as 'true'.""" + assert _bool_encoder(1) == "true" + + def test_falsy_int(self) -> None: + """A falsy integer is encoded as 'false'.""" + assert _bool_encoder(0) == "false" + + +class TestIntEncoder: + """Tests for the _int_encoder() function.""" + + def test_positive(self) -> None: + """A positive integer is converted to its decimal string.""" + assert _int_encoder(42) == "42" + + def test_zero(self) -> None: + """Zero is converted to '0'.""" + assert _int_encoder(0) == "0" + + +class TestNumberEncoder: + """Tests for the _number_encoder() function.""" + + def test_integer(self) -> None: + """An integer value is rendered as a float string.""" + assert _number_encoder(5) == "5.0" + + def test_float(self) -> None: + """A float value is rendered with its decimal part.""" + assert _number_encoder(3.14) == "3.14" + + +class TestCommaListEncoder: + """Tests for the _comma_list_encoder() function.""" + + def test_string_passthrough(self) -> None: + """A plain string is returned unchanged.""" + assert _comma_list_encoder("a,b,c") == "a,b,c" + + def test_list_joined(self) -> None: + """A list of strings is joined with commas.""" + assert _comma_list_encoder(["a", "b", "c"]) == "a,b,c" + + def test_tuple_joined(self) -> None: + """A tuple of strings is joined with commas.""" + assert _comma_list_encoder(("x", "y")) == "x,y" + + def test_single_item(self) -> None: + """A single-element list returns the element without a comma.""" + assert _comma_list_encoder(["only"]) == "only" + + +class TestPathEncoder: + """Tests for the _path_encoder() function.""" + + def test_path_object(self, tmp_path: Path) -> None: + """A Path object is encoded to a string containing the path components.""" + result = _path_encoder(tmp_path / "model") + assert "model" in result + + def test_backslash_escaped(self) -> None: + """Windows backslashes in path strings are escaped or converted.""" + p = Path("C:\\Users\\test\\model") + result = _path_encoder(p) + assert "\\\\" in result or "/" in result + + +class TestPathValidator: + """Tests for the _path_validator() function.""" + + def test_existing_file(self, tmp_path: Path) -> None: + """An existing file path passes validation without error.""" + f = tmp_path / "file.txt" + f.write_text("content") + _path_validator(f) + + def test_existing_directory(self, tmp_path: Path) -> None: + """An existing directory path passes validation without error.""" + _path_validator(tmp_path) + + def test_nonexistent_raises(self, tmp_path: Path) -> None: + """A path that does not exist raises PathError.""" + with pytest.raises(PathError, match="does not exist"): + _path_validator(tmp_path / "nonexistent.txt") + + +class TestIsLangKey: + """Tests for the _is_lang_key() predicate.""" + + def test_lang_code_format(self) -> None: + """A key of the form 'lang-XX' is recognized as a language key.""" + assert _is_lang_key("lang-en") is True + + def test_lang_code_dict_path_format(self) -> None: + """A key of the form 'lang-XX-dictPath' is recognized as a language key.""" + assert _is_lang_key("lang-en-dictPath") is True + + def test_not_lang_prefix(self) -> None: + """A key without the 'lang-' prefix is not a language key.""" + assert _is_lang_key("cacheSize") is False + + def test_lang_only_no_code(self) -> None: + """'lang-' with no language code is not a valid language key.""" + assert _is_lang_key("lang-") is False + + def test_lang_too_many_parts(self) -> None: + """A key with more than three parts is not a valid language key.""" + assert _is_lang_key("lang-en-dictPath-extra") is False + + +class TestEncodeConfig: + """Tests for the _encode_config() dict encoder.""" + + def test_int_option(self) -> None: + """An integer option value is encoded as its decimal string.""" + result = _encode_config({"cacheSize": 1000}) + assert result == {"cacheSize": "1000"} + + def test_bool_option(self) -> None: + """A boolean option value is encoded as 'true' or 'false'.""" + result = _encode_config({"pipelineCaching": True}) + assert result == {"pipelineCaching": "true"} + + def test_number_option(self) -> None: + """A float option value is encoded as its float string.""" + result = _encode_config({"maxErrorsPerWordRate": 0.5}) + assert result == {"maxErrorsPerWordRate": "0.5"} + + def test_list_option(self) -> None: + """A list option value is encoded as a comma-separated string.""" + result = _encode_config({"blockedReferrers": ["a.com", "b.com"]}) + assert result == {"blockedReferrers": "a.com,b.com"} + + def test_lang_code_option(self) -> None: + """A language-code option is passed through without modification.""" + result = _encode_config({"lang-en": "custom-word"}) + assert result == {"lang-en": "custom-word"} + + def test_lang_dict_path_option(self, tmp_path: Path) -> None: + """A language dict-path option is accepted when the path exists.""" + result = _encode_config({"lang-en-dictPath": str(tmp_path)}) + assert "lang-en-dictPath" in result + + def test_unknown_key_raises(self) -> None: + """An unrecognized config key raises ValueError.""" + with pytest.raises(ValueError, match="unexpected key"): + _encode_config({"unknownKey": "value"}) + + def test_wrong_type_raises(self) -> None: + """A value of the wrong type for a known key raises TypeError.""" + with pytest.raises(TypeError, match="invalid type"): + _encode_config({"cacheSize": "not_an_int"}) + + def test_path_validator_called(self, tmp_path: Path) -> None: + """A path-type config option with a nonexistent path raises PathError.""" + nonexistent = tmp_path / "no_such_model" + with pytest.raises(PathError, match="does not exist"): + _encode_config({"languageModel": str(nonexistent)}) + + +class TestLanguageToolConfig: + """Tests for the LanguageToolConfig class.""" + + def test_empty_config_raises(self) -> None: + """Constructing with an empty dict raises ValueError.""" + with pytest.raises(ValueError, match="cannot be empty"): + LanguageToolConfig({}) + + def test_valid_config_creates_file(self) -> None: + """A valid config creates a temporary .properties file on disk.""" + cfg = LanguageToolConfig({"cacheSize": 500}) + assert cfg.path + assert Path(cfg.path).exists() + + def test_config_file_content(self) -> None: + """The .properties file contains the expected key=value pair.""" + cfg = LanguageToolConfig({"cacheSize": 500}) + content = Path(cfg.path).read_text(encoding="utf-8") + assert "cacheSize=500" in content + + def test_multiple_options(self) -> None: + """Multiple config options all appear in the .properties file.""" + cfg = LanguageToolConfig({"cacheSize": 100, "pipelineCaching": True}) + content = Path(cfg.path).read_text(encoding="utf-8") + assert "cacheSize=100" in content + assert "pipelineCaching=true" in content + + def test_config_dict_stored(self) -> None: + """The encoded config is stored on the .config attribute.""" + cfg = LanguageToolConfig({"cacheSize": 200}) + assert cfg.config == {"cacheSize": "200"} + + def test_boolean_config(self) -> None: + """A boolean config value is encoded as 'true' or 'false'.""" + cfg = LanguageToolConfig({"premiumOnly": False}) + assert cfg.config == {"premiumOnly": "false"} + + def test_list_config(self) -> None: + """A list config value is encoded as a comma-separated string.""" + cfg = LanguageToolConfig({"disabledRuleIds": ["RULE_A", "RULE_B"]}) + assert cfg.config["disabledRuleIds"] == "RULE_A,RULE_B" diff --git a/tests/unit/test_download_unit.py b/tests/unit/test_download_unit.py new file mode 100644 index 0000000..9111496 --- /dev/null +++ b/tests/unit/test_download_unit.py @@ -0,0 +1,305 @@ +"""Unit tests for download_lt.py helpers (no network, no Java required). + +Note: test_download.py calls importlib.reload(download_lt) which invalidates +static class imports. We access classes via the module object (updated in-place +by reload) to ensure isinstance checks work regardless of test ordering. +""" + +from __future__ import annotations + +import re +from typing import TYPE_CHECKING + +import pytest + +import language_tool_python.download_lt as _dl +from language_tool_python.exceptions import PathError + +if TYPE_CHECKING: + from pathlib import Path + +_JAVA_8_MINOR = 8 +_JAVA_17_MAJOR = 17 +_JAVA_21_MAJOR = 21 +_SHA256_HEX_LENGTH = 64 +_KIBIBYTE = 1024 + + +def return_42(_: object) -> int: + """Return 42, used for monkeypatching.""" + return 42 + + +class TestLoadsManifest: + """Tests for the _loads_manifest() TOML parser.""" + + def test_valid_toml_returns_dict(self) -> None: + """Valid TOML input returns a dict.""" + result = _dl._loads_manifest('[hashes]\n"6.8" = "abc"\n') + assert isinstance(result, dict) + + def test_empty_toml(self) -> None: + """Empty TOML input returns an empty dict.""" + result = _dl._loads_manifest("") + assert result == {} + + +class TestLoadExpectedDownloadSha256: + """Tests for _load_expected_download_sha256().""" + + def test_valid_manifest(self) -> None: + """A well-formed hash entry is parsed to version → hash mapping.""" + sha = "a" * _SHA256_HEX_LENGTH + result = _dl._load_expected_download_sha256(f'"6.8" = "{sha}"\n') + assert result["6.8"] == sha + + def test_non_dict_raises(self, monkeypatch: pytest.MonkeyPatch) -> None: + """A manifest that does not parse to a dict raises PathError.""" + monkeypatch.setattr( + "language_tool_python.download_lt._loads_manifest", + return_42, + ) + with pytest.raises(PathError, match="expected a TOML table"): + _dl._load_expected_download_sha256("anything") + + def test_non_string_value_raises(self) -> None: + """A non-string hash value in the manifest raises PathError.""" + with pytest.raises(PathError, match="expected string keys and values"): + _dl._load_expected_download_sha256('"6.8" = 42\n') + + +class TestValidateDownloadSize: + """Tests for the _validate_download_size() Content-Length checker.""" + + def test_none_returns_none(self) -> None: + """None input (missing header) returns None.""" + assert _dl._validate_download_size(None) is None + + def test_valid_size(self) -> None: + """A numeric size string is converted to an int.""" + assert _dl._validate_download_size("1024") == _KIBIBYTE + + def test_zero_is_valid(self) -> None: + """Zero is a valid content-length.""" + assert _dl._validate_download_size("0") == 0 + + def test_invalid_string_raises(self) -> None: + """A non-numeric string raises PathError.""" + with pytest.raises(PathError, match="Invalid Content-Length"): + _dl._validate_download_size("notanumber") + + def test_negative_raises(self) -> None: + """A negative value raises PathError.""" + with pytest.raises(PathError, match="Invalid Content-Length"): + _dl._validate_download_size("-1") + + def test_too_large_raises(self) -> None: + """A size exceeding the maximum raises PathError.""" + with pytest.raises(PathError, match="Refusing to download"): + _dl._validate_download_size(str(512 * 1024 * 1024 + 1)) + + +class TestParseJavaVersion: + """Tests for _parse_java_version() version string parsing.""" + + def test_old_format_quoted(self) -> None: + """The old 'java version "1.8.0_N"' format is parsed to (1, 8).""" + text = 'java version "1.8.0_292"' + major, minor = _dl._parse_java_version(text) + assert major == 1 + assert minor == _JAVA_8_MINOR + + def test_new_format_17(self) -> None: + """The new 'openjdk N.M.P' format is parsed to (17, 0).""" + text = "openjdk 17.0.1 2021-10-19" + major, minor = _dl._parse_java_version(text) + assert major == _JAVA_17_MAJOR + assert minor == 0 + + def test_new_format_21(self) -> None: + """The new quoted 'openjdk version "21.0.2"' format is parsed to (21, ...).""" + text = 'openjdk version "21.0.2" 2024-01-16' + major, _ = _dl._parse_java_version(text) + assert major == _JAVA_21_MAJOR + + def test_unparseable_raises(self) -> None: + """A string that matches no known pattern causes SystemExit.""" + with pytest.raises(SystemExit, match="Could not parse"): + _dl._parse_java_version("not a java version string") + + def test_multiline_output(self) -> None: + """Multiline java -version output is parsed from the first line.""" + text = ( + 'openjdk version "21.0.2" 2024-01-16\n' + "OpenJDK Runtime Environment (build 21.0.2+13)\n" + "OpenJDK 64-Bit Server VM (build 21.0.2+13, mixed mode, sharing)\n" + ) + major, _ = _dl._parse_java_version(text) + assert major == _JAVA_21_MAJOR + + +class TestLocalLanguageToolFromVersionName: + """Tests for LocalLanguageTool.from_version_name() factory method.""" + + def test_release_version(self) -> None: + """An 'X.Y' string returns a ReleaseLocalLanguageTool instance.""" + lt = _dl.LocalLanguageTool.from_version_name("6.8") + assert isinstance(lt, _dl.ReleaseLocalLanguageTool) + + def test_snapshot_date_version(self) -> None: + """A 'YYYYMMDD' string returns a SnapshotLocalLanguageTool instance.""" + lt = _dl.LocalLanguageTool.from_version_name("20240101") + assert isinstance(lt, _dl.SnapshotLocalLanguageTool) + + def test_snapshot_latest(self) -> None: + """'latest' returns a SnapshotLocalLanguageTool instance.""" + lt = _dl.LocalLanguageTool.from_version_name("latest") + assert isinstance(lt, _dl.SnapshotLocalLanguageTool) + + def test_unknown_format_raises(self) -> None: + """An unrecognized version string raises ValueError.""" + with pytest.raises(ValueError, match="Unknown LanguageTool version"): + _dl.LocalLanguageTool.from_version_name("unknown-format") + + def test_default_version(self) -> None: + """Calling without arguments returns the default release version.""" + lt = _dl.LocalLanguageTool.from_version_name() + assert isinstance(lt, _dl.ReleaseLocalLanguageTool) + + +class TestLocalLanguageToolFromPath: + """Tests for LocalLanguageTool.from_path() directory-name parser.""" + + def test_valid_release_path(self, tmp_path: Path) -> None: + """A 'LanguageTool-X.Y' directory name returns a ReleaseLocalLanguageTool.""" + d = tmp_path / "LanguageTool-6.8" + lt = _dl.LocalLanguageTool.from_path(d) + assert isinstance(lt, _dl.ReleaseLocalLanguageTool) + + def test_valid_snapshot_path(self, tmp_path: Path) -> None: + """A 'LanguageTool-YYYYMMDD' directory returns a SnapshotLocalLanguageTool.""" + d = tmp_path / "LanguageTool-20240101" + lt = _dl.LocalLanguageTool.from_path(d) + assert isinstance(lt, _dl.SnapshotLocalLanguageTool) + + def test_invalid_path_raises(self, tmp_path: Path) -> None: + """A directory name without the expected pattern raises ValueError.""" + d = tmp_path / "not-a-lt-dir" + with pytest.raises(ValueError, match="Could not determine"): + _dl.LocalLanguageTool.from_path(d) + + +class TestReleaseLocalLanguageTool: + """Tests for ReleaseLocalLanguageTool attributes and ordering.""" + + def test_version_name(self) -> None: + """The version_name attribute reflects the version given at construction.""" + lt = _dl.ReleaseLocalLanguageTool("6.8") + assert lt.version_name == "6.8" + + def test_eq(self) -> None: + """Two instances with the same version are equal.""" + a = _dl.ReleaseLocalLanguageTool("6.8") + b = _dl.ReleaseLocalLanguageTool("6.8") + assert a == b + + def test_neq(self) -> None: + """Instances with different versions are not equal.""" + a = _dl.ReleaseLocalLanguageTool("6.8") + b = _dl.ReleaseLocalLanguageTool("6.7") + assert a != b + + def test_lt(self) -> None: + """An older version is less than a newer version.""" + old = _dl.ReleaseLocalLanguageTool("6.7") + new = _dl.ReleaseLocalLanguageTool("6.8") + assert old < new + + def test_hash(self) -> None: + """Equal instances produce the same hash.""" + a = _dl.ReleaseLocalLanguageTool("6.8") + b = _dl.ReleaseLocalLanguageTool("6.8") + assert hash(a) == hash(b) + + def test_in_set(self) -> None: + """Duplicate instances collapse to one element in a set.""" + s = {_dl.ReleaseLocalLanguageTool("6.8"), _dl.ReleaseLocalLanguageTool("6.8")} + assert len(s) == 1 + + def test_download_url_new_version(self) -> None: + """The download URL for a recent version contains the version string.""" + lt = _dl.ReleaseLocalLanguageTool("6.8") + assert "6.8" in lt.download_url + + def test_download_url_old_version_uses_archive(self) -> None: + """The download URL for an old version also contains the version string.""" + lt = _dl.ReleaseLocalLanguageTool("4.0") + assert "4.0" in lt.download_url + + +class TestSnapshotLocalLanguageTool: + """Tests for SnapshotLocalLanguageTool attributes and equality.""" + + def test_version_name_date(self) -> None: + """A date-format version name is stored as-is.""" + lt = _dl.SnapshotLocalLanguageTool("20240101") + assert lt.version_name == "20240101" + + def test_version_name_latest_expands_to_date(self) -> None: + """'latest' expands to an 8-digit date string.""" + lt = _dl.SnapshotLocalLanguageTool("latest") + assert re.match(r"^\d{8}$", lt.version_name) + + def test_eq(self) -> None: + """Two instances with the same date are equal.""" + a = _dl.SnapshotLocalLanguageTool("20240101") + b = _dl.SnapshotLocalLanguageTool("20240101") + assert a == b + + def test_neq(self) -> None: + """Instances with different dates are not equal.""" + a = _dl.SnapshotLocalLanguageTool("20240101") + b = _dl.SnapshotLocalLanguageTool("20240201") + assert a != b + + def test_hash(self) -> None: + """Equal instances produce the same hash.""" + a = _dl.SnapshotLocalLanguageTool("20240101") + b = _dl.SnapshotLocalLanguageTool("20240101") + assert hash(a) == hash(b) + + +class TestGetZipHash: + """Tests for _get_zip_hash() SHA-256 lookup.""" + + def test_bypass_env_returns_none_with_warning( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """LTP_BYPASS_VERIFIED_DOWNLOADS=true skips verification with a warning.""" + monkeypatch.setenv("LTP_BYPASS_VERIFIED_DOWNLOADS", "true") + with pytest.warns(RuntimeWarning, match="bypassed"): + result = _dl._get_zip_hash("6.8") + assert result is None + + def test_known_version_returns_hash(self) -> None: + """A version present in the integrity manifest returns a 64-char hex hash.""" + if not _dl._EXPECTED_DOWNLOAD_SHA256: + pytest.skip("No known hashes in manifest") + version_name = next(iter(_dl._EXPECTED_DOWNLOAD_SHA256)) + result = _dl._get_zip_hash(version_name) + assert result is not None + assert len(result) == _SHA256_HEX_LENGTH + + def test_unknown_version_returns_none( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """A version absent from the manifest returns None.""" + monkeypatch.delenv("LTP_BYPASS_VERIFIED_DOWNLOADS", raising=False) + result = _dl._get_zip_hash("0.0") + assert result is None + + def test_invalid_hash_in_env_raises(self, monkeypatch: pytest.MonkeyPatch) -> None: + """An invalid SHA-256 value in LTP_DOWNLOAD_SHA256 raises PathError.""" + monkeypatch.setenv("LTP_DOWNLOAD_SHA256", "not-a-valid-sha256") + with pytest.raises(PathError, match="Invalid SHA-256"): + _dl._get_zip_hash("6.8") diff --git a/tests/unit/test_internals_utils.py b/tests/unit/test_internals_utils.py new file mode 100644 index 0000000..7a41cb3 --- /dev/null +++ b/tests/unit/test_internals_utils.py @@ -0,0 +1,224 @@ +"""Unit tests for language_tool_python._internals.utils.""" + +from __future__ import annotations + +import subprocess +import sys +import time +from typing import TYPE_CHECKING + +import psutil +import pytest + +from language_tool_python._internals.utils import ( + get_env_float, + get_env_int, + get_language_tool_download_path, + get_locale_language, + kill_process_force, + parse_url, + version_tuple, +) +from language_tool_python.exceptions import PathError + +if TYPE_CHECKING: + from pathlib import Path + +_DEFAULT_INT = 42 +_ENV_INT_VALUE = 100 +_DEFAULT_FLOAT = 1.5 + + +class TestParseUrl: + """Tests for parse_url() scheme normalisation.""" + + def test_full_url_unchanged(self) -> None: + """A complete http URL is returned as-is.""" + assert parse_url("http://localhost:8081") == "http://localhost:8081" + + def test_https_url_unchanged(self) -> None: + """A complete https URL is returned as-is.""" + assert parse_url("https://example.com") == "https://example.com" + + def test_adds_http_scheme(self) -> None: + """A host:port string without a scheme gets http:// prepended.""" + result = parse_url("localhost:8081") + assert result.startswith("http://") + assert "localhost" in result + + def test_canonical_form(self) -> None: + """An already-complete URL with trailing slash is returned unchanged.""" + assert parse_url("http://localhost:8081/") == "http://localhost:8081/" + + +class TestGetEnvInt: + """Tests for get_env_int() environment variable reader.""" + + def test_returns_default_when_absent(self, monkeypatch: pytest.MonkeyPatch) -> None: + """The default is returned when the variable is not set.""" + monkeypatch.delenv("TEST_INT_VAR", raising=False) + assert get_env_int("TEST_INT_VAR", _DEFAULT_INT) == _DEFAULT_INT + + def test_reads_valid_value(self, monkeypatch: pytest.MonkeyPatch) -> None: + """A valid integer string in the environment is returned as an int.""" + monkeypatch.setenv("TEST_INT_VAR", str(_ENV_INT_VALUE)) + assert get_env_int("TEST_INT_VAR", 0) == _ENV_INT_VALUE + + def test_raises_on_non_integer(self, monkeypatch: pytest.MonkeyPatch) -> None: + """A non-numeric string raises PathError.""" + monkeypatch.setenv("TEST_INT_VAR", "notanint") + with pytest.raises(PathError, match="Invalid integer"): + get_env_int("TEST_INT_VAR", 0) + + def test_raises_on_zero(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Zero is not a valid positive integer and raises PathError.""" + monkeypatch.setenv("TEST_INT_VAR", "0") + with pytest.raises(PathError, match="Invalid integer"): + get_env_int("TEST_INT_VAR", 0) + + def test_raises_on_negative(self, monkeypatch: pytest.MonkeyPatch) -> None: + """A negative integer string raises PathError.""" + monkeypatch.setenv("TEST_INT_VAR", "-5") + with pytest.raises(PathError, match="Invalid integer"): + get_env_int("TEST_INT_VAR", 0) + + +class TestGetEnvFloat: + """Tests for get_env_float() environment variable reader.""" + + def test_returns_default_when_absent(self, monkeypatch: pytest.MonkeyPatch) -> None: + """The default is returned when the variable is not set.""" + monkeypatch.delenv("TEST_FLOAT_VAR", raising=False) + assert get_env_float("TEST_FLOAT_VAR", _DEFAULT_FLOAT) == _DEFAULT_FLOAT + + def test_reads_valid_value(self, monkeypatch: pytest.MonkeyPatch) -> None: + """A valid float string is returned as a float.""" + monkeypatch.setenv("TEST_FLOAT_VAR", "3.14") + assert get_env_float("TEST_FLOAT_VAR", 0.0) == pytest.approx(3.14) + + def test_raises_on_non_float(self, monkeypatch: pytest.MonkeyPatch) -> None: + """A non-numeric string raises PathError.""" + monkeypatch.setenv("TEST_FLOAT_VAR", "notafloat") + with pytest.raises(PathError, match="Invalid float"): + get_env_float("TEST_FLOAT_VAR", 0.0) + + def test_raises_on_zero(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Zero is not a valid positive float and raises PathError.""" + monkeypatch.setenv("TEST_FLOAT_VAR", "0.0") + with pytest.raises(PathError, match="Invalid float"): + get_env_float("TEST_FLOAT_VAR", 1.0) + + def test_raises_on_negative(self, monkeypatch: pytest.MonkeyPatch) -> None: + """A negative float string raises PathError.""" + monkeypatch.setenv("TEST_FLOAT_VAR", "-1.0") + with pytest.raises(PathError, match="Invalid float"): + get_env_float("TEST_FLOAT_VAR", 1.0) + + def test_raises_on_inf(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Infinity is not a valid positive float and raises PathError.""" + monkeypatch.setenv("TEST_FLOAT_VAR", "inf") + with pytest.raises(PathError, match="Invalid float"): + get_env_float("TEST_FLOAT_VAR", 1.0) + + +class TestGetLanguageToolDownloadPath: + """Tests for get_language_tool_download_path() path resolver.""" + + def test_returns_path( + self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path + ) -> None: + """The returned path exists and is a directory.""" + monkeypatch.setenv("LTP_PATH", str(tmp_path)) + path = get_language_tool_download_path() + assert path.exists() + assert path.is_dir() + + def test_creates_directory( + self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path + ) -> None: + """A non-existent directory under LTP_PATH is created on first use.""" + new_dir = tmp_path / "new_subdir" + monkeypatch.setenv("LTP_PATH", str(new_dir)) + path = get_language_tool_download_path() + assert path.exists() + + def test_default_path_in_home(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Without LTP_PATH, the default path contains 'language_tool_python'.""" + monkeypatch.delenv("LTP_PATH", raising=False) + path = get_language_tool_download_path() + assert "language_tool_python" in str(path) + + +class TestGetLocaleLanguage: + """Tests for get_locale_language() system locale lookup.""" + + def test_returns_string(self) -> None: + """The function returns a non-empty string.""" + result = get_locale_language() + assert isinstance(result, str) + assert len(result) > 0 + + +class TestKillProcessForce: + """Tests for kill_process_force() process terminator.""" + + def test_raises_when_no_args(self) -> None: + """Calling with neither pid nor proc raises ValueError.""" + with pytest.raises(ValueError, match="Must pass either pid or proc"): + kill_process_force() + + def test_kills_by_pid(self) -> None: + """A process is terminated when its pid is given.""" + proc = subprocess.Popen( + [sys.executable, "-c", "import time; time.sleep(60)"], + ) + kill_process_force(pid=proc.pid) + proc.wait(timeout=5) + + def test_kills_by_proc(self) -> None: + """A process is terminated when a psutil.Process object is given.""" + proc = subprocess.Popen( + [sys.executable, "-c", "import time; time.sleep(60)"], + ) + ps_proc = psutil.Process(proc.pid) + kill_process_force(proc=ps_proc) + proc.wait(timeout=5) + + def test_kills_process_with_children(self) -> None: + """A process and its children are all terminated.""" + parent = subprocess.Popen( + [ + sys.executable, + "-c", + ( + "import subprocess, sys, time; " + "subprocess.Popen([sys.executable, '-c', " + "'import time; time.sleep(60)']); " + "time.sleep(60)" + ), + ], + ) + time.sleep(0.3) + kill_process_force(pid=parent.pid) + parent.wait(timeout=10) + + def test_nonexistent_pid_is_silent(self) -> None: + """A nonexistent pid is silently ignored.""" + kill_process_force(pid=999999999) + + +class TestVersionTuple: + """Tests for version_tuple() version string parser.""" + + def test_parses_version(self) -> None: + """A 'X.Y' version string is parsed to a (X, Y) int tuple.""" + assert version_tuple("6.8") == (6, 8) + + def test_parses_version_with_zeros(self) -> None: + """A 'X.0' version string is parsed correctly.""" + assert version_tuple("4.0") == (4, 0) + + def test_raises_on_invalid_format(self) -> None: + """A version string without a dot raises ValueError.""" + with pytest.raises(ValueError, match="not enough values"): + version_tuple("invalid") diff --git a/tests/unit/test_language_tag.py b/tests/unit/test_language_tag.py new file mode 100644 index 0000000..7ac4d3e --- /dev/null +++ b/tests/unit/test_language_tag.py @@ -0,0 +1,168 @@ +"""Unit tests for LanguageTag normalization and comparison.""" + +import pytest + +from language_tool_python.language_tag import LanguageTag + +_LANGS = ["en-US", "en-GB", "en", "de-DE", "fr-FR", "pt-BR"] + +_SET_SIZE_TWO = 2 + + +def _tag(tag: str, languages: list[str] = _LANGS) -> LanguageTag: + """Construct a LanguageTag against _LANGS by default.""" + return LanguageTag(tag, languages) + + +class TestInit: + """Tests for basic LanguageTag initialization and normalization.""" + + def test_exact_match(self) -> None: + """An exact match in the language list is returned unchanged.""" + lt = _tag("en-US") + assert lt.normalized_tag == "en-US" + + def test_underscore_normalized_to_dash(self) -> None: + """Underscore locale separators are converted to dashes.""" + lt = _tag("en_US") + assert lt.normalized_tag == "en-US" + + def test_case_insensitive(self) -> None: + """Tag lookup is case-insensitive.""" + lt = _tag("EN-us") + assert lt.normalized_tag == "en-US" + + def test_tag_stored(self) -> None: + """The original (pre-normalization) tag is preserved.""" + lt = _tag("en-US") + assert lt.tag == "en-US" + + def test_languages_stored(self) -> None: + """The language list is accessible on the tag object.""" + lt = _tag("en-US") + assert "en-US" in lt.languages + + +class TestNormalizePosix: + """Tests for POSIX/C locale fallback behaviour.""" + + def test_c_locale_falls_back_to_en_us(self) -> None: + """'C' locale resolves to en-US when available.""" + lt = _tag("C") + assert lt.normalized_tag == "en-US" + + def test_posix_locale_falls_back_to_en_us(self) -> None: + """'POSIX' locale resolves to en-US when available.""" + lt = _tag("POSIX") + assert lt.normalized_tag == "en-US" + + def test_c_dot_variant(self) -> None: + """'C.UTF-8' resolves to en-US when available.""" + lt = _tag("C.UTF-8") + assert lt.normalized_tag == "en-US" + + def test_posix_prefers_en_gb_when_no_en_us(self) -> None: + """'C' locale falls back to en-GB when en-US is absent.""" + lt = LanguageTag("C", ["en-GB", "fr-FR"]) + assert lt.normalized_tag == "en-GB" + + def test_posix_falls_to_en_when_no_en_us_or_gb(self) -> None: + """'C' locale falls back to bare 'en' when no regional variant exists.""" + lt = LanguageTag("C", ["en", "fr-FR"]) + assert lt.normalized_tag == "en" + + def test_posix_raises_when_no_english(self) -> None: + """'C' locale raises ValueError when no English variant is available.""" + with pytest.raises(ValueError, match="unsupported language"): + LanguageTag("C", ["de-DE", "fr-FR"]) + + +class TestNormalizeFallback: + """Tests for regex-based region-stripping fallback.""" + + def test_language_only_matches_base(self) -> None: + """A bare language code matches the base language entry.""" + lt = _tag("en") + assert lt.normalized_tag == "en" + + def test_regex_fallback_to_base_language(self) -> None: + """An exact-match tag is returned as-is.""" + lt = _tag("pt-BR") + assert lt.normalized_tag == "pt-BR" + + def test_regex_fallback_strips_region(self) -> None: + """A tag with an unavailable region falls back to the base language.""" + lt = LanguageTag("en-AU", ["en", "de-DE"]) + assert lt.normalized_tag == "en" + + def test_empty_tag_raises(self) -> None: + """An empty tag string raises ValueError.""" + with pytest.raises(ValueError, match="empty language tag"): + _tag("") + + def test_unsupported_tag_raises(self) -> None: + """A tag with no match raises ValueError.""" + with pytest.raises(ValueError, match="unsupported language"): + _tag("zz-ZZ") + + def test_unmatched_pattern_raises(self) -> None: + """A non-language-like string raises ValueError.""" + with pytest.raises(ValueError, match="unsupported language"): + _tag("123invalid") + + +class TestComparisons: + """Tests for LanguageTag equality, ordering, and hashing.""" + + def test_eq_same_tag(self) -> None: + """Two tags with the same value are equal.""" + assert _tag("en-US") == _tag("en-US") + + def test_eq_with_string(self) -> None: + """A LanguageTag equals its normalized string.""" + assert _tag("en-US") == "en-US" + + def test_eq_not_equal(self) -> None: + """Tags with different values are not equal.""" + assert _tag("en-US") != _tag("de-DE") + + def test_eq_not_implemented_for_non_str(self) -> None: + """Comparing with a non-string returns NotImplemented.""" + assert _tag("en-US").__eq__(42) is NotImplemented + + def test_lt_ordering(self) -> None: + """Tags are ordered lexicographically by their normalized value.""" + assert _tag("de-DE") < _tag("en-US") + + def test_lt_not_implemented_for_non_str(self) -> None: + """Less-than comparison with a non-string returns NotImplemented.""" + assert _tag("en-US").__lt__(42) is NotImplemented + + def test_hash_equal_tags(self) -> None: + """Equal tags produce the same hash.""" + assert hash(_tag("en-US")) == hash(_tag("en-US")) + + def test_hash_different_tags(self) -> None: + """Different tags produce different hashes (high probability).""" + assert hash(_tag("en-US")) != hash(_tag("de-DE")) + + def test_in_set(self) -> None: + """Two distinct tags result in a two-element set.""" + s = {_tag("en-US"), _tag("de-DE")} + assert len(s) == _SET_SIZE_TWO + + +class TestStrRepr: + """Tests for LanguageTag string representations.""" + + def test_str_returns_normalized(self) -> None: + """str() returns the normalized tag.""" + assert str(_tag("en-US")) == "en-US" + + def test_repr_format(self) -> None: + """repr() uses the canonical angle-bracket format.""" + assert repr(_tag("en-US")) == '' + + def test_total_ordering_gt(self) -> None: + """Greater-than comparison works via total_ordering.""" + assert _tag("en-US") > _tag("de-DE") diff --git a/tests/unit/test_match.py b/tests/unit/test_match.py new file mode 100644 index 0000000..fdccbc8 --- /dev/null +++ b/tests/unit/test_match.py @@ -0,0 +1,421 @@ +"""Unit tests for the Match class and related helpers.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +from language_tool_python.match import ( + Match, + _four_byte_char_positions, + _get_match_ordered_dict, + is_check_match, +) + +if TYPE_CHECKING: + from language_tool_python._internals.api_types import CheckMatch + +_DEFAULT_OFFSET = 8 +_DEFAULT_LENGTH = 4 +_DEFAULT_CONTEXT_OFFSET = 8 +_NUM_MATCH_FIELDS = 10 + + +def _make_attrib( # noqa: PLR0913 + *, + message: str = "Possible spelling mistake.", + short_message: str = "Spelling mistake", + replacements: list[str] | None = None, + offset: int = 8, + length: int = 4, + context_text: str = "This is noot okay.", + context_offset: int = 8, + sentence: str = "This is noot okay.", + rule_id: str = "MORFOLOGIK_RULE_EN_US", + rule_desc: str = "Possible spelling mistake", + issue_type: str = "misspelling", + category_id: str = "TYPOS", + category_name: str = "Possible Typo", +) -> CheckMatch: + repl_list: list[str] = replacements if replacements is not None else ["not", "noon"] + return { + "message": message, + "shortMessage": short_message, + "replacements": [{"value": r} for r in repl_list], + "offset": offset, + "length": length, + "context": {"text": context_text, "offset": context_offset, "length": length}, + "sentence": sentence, + "type": {"typeName": "Other"}, + "rule": { + "id": rule_id, + "description": rule_desc, + "issueType": issue_type, + "category": {"id": category_id, "name": category_name}, + }, + "ignoreForIncompleteSentence": False, + "contextForSureMatch": 0, + } + + +def _make_match(text: str = "This is noot okay.", **kwargs: object) -> Match: + return Match(_make_attrib(**kwargs), text) # type: ignore[arg-type] + + +class TestMatchInit: + """Tests for Match.__init__() attribute mapping.""" + + def test_basic_attributes(self) -> None: + """Default attributes are populated correctly from the attrib dict.""" + m = _make_match() + assert m.rule_id == "MORFOLOGIK_RULE_EN_US" + assert m.message == "Possible spelling mistake." + assert m.replacements == ["not", "noon"] + assert m.offset == _DEFAULT_OFFSET + assert m.error_length == _DEFAULT_LENGTH + assert m.category == "TYPOS" + assert m.rule_issue_type == "misspelling" + assert m.sentence == "This is noot okay." + + def test_context_attributes(self) -> None: + """Context text and offset are set from the nested context dict.""" + m = _make_match() + assert m.context == "This is noot okay." + assert m.offset_in_context == _DEFAULT_CONTEXT_OFFSET + + def test_unicode_normalization(self) -> None: + """Message text is NFKC-normalized on construction.""" + # "fi" (U+FB01 LATIN SMALL LIGATURE FI) → "fi" + m = _make_match(message="find the error") + assert m.message == "find the error" + + def test_four_byte_char_adjustment(self) -> None: + """A 4-byte emoji before the match shifts the Python offset by 1.""" + # "🌅" at position 0 is 1 Python char but 2 Java chars + # Java offset 3 → Python offset 2 ("🌅 he" → 'h' is at index 2) + text = "🌅 hello world" + attrib = _make_attrib( + offset=3, + length=5, + context_text="🌅 hello world", + context_offset=3, + sentence="🌅 hello world", + ) + m = Match(attrib, text) + adjusted_offset = 2 + assert m.offset == adjusted_offset + + def test_no_adjustment_without_four_byte_chars(self) -> None: + """Offsets are unchanged when no 4-byte characters precede the match.""" + text = "Hello world today" + expected_offset = 6 + m = Match( + _make_attrib( + offset=expected_offset, + length=5, + context_text=text, + context_offset=expected_offset, + sentence=text, + ), + text, + ) + assert m.offset == expected_offset + + def test_same_text_reuses_cache(self) -> None: + """Two matches on the same text share the cached position list.""" + text = "Same text here." + explicit_offset = 5 + m1 = Match(_make_attrib(context_text=text, sentence=text), text) + m2 = Match( + _make_attrib( + context_text=text, + sentence=text, + offset=explicit_offset, + length=_DEFAULT_LENGTH, + context_offset=explicit_offset, + ), + text, + ) + assert text == Match.PREVIOUS_MATCHES_TEXT + assert m1.offset == _DEFAULT_OFFSET + assert m2.offset == explicit_offset + + +class TestFourByteCharPositions: + """Tests for _four_byte_char_positions() helper.""" + + def test_empty_string(self) -> None: + """An empty string has no 4-byte char positions.""" + assert _four_byte_char_positions("") == [] + + def test_ascii_only(self) -> None: + """A pure-ASCII string has no 4-byte char positions.""" + assert _four_byte_char_positions("hello") == [] + + def test_emoji_at_start(self) -> None: + """An emoji at position 0 is reported at index 0.""" + assert _four_byte_char_positions("🌅abc") == [0] + + def test_multiple_emojis(self) -> None: + """Two consecutive emojis are reported at their Python indices.""" + positions = _four_byte_char_positions("🌅🎉abc") + assert positions == [0, 2] + + def test_emoji_in_middle(self) -> None: + """An emoji in the middle of ASCII text is reported at the correct index.""" + positions = _four_byte_char_positions("ab🌅cd") + assert positions == [2] + + +class TestMatchOrderedDict: + """Tests for _get_match_ordered_dict() field-type registry.""" + + def test_returns_all_keys(self) -> None: + """All expected field names are returned in order.""" + d = _get_match_ordered_dict() + expected_keys = [ + "rule_id", + "message", + "replacements", + "offset_in_context", + "context", + "offset", + "error_length", + "category", + "rule_issue_type", + "sentence", + ] + assert list(d.keys()) == expected_keys + + def test_value_types(self) -> None: + """Field types are the expected Python built-ins.""" + d = _get_match_ordered_dict() + assert d["offset"] is int + assert d["rule_id"] is str + assert d["replacements"] is list + + +class TestIsCheckMatch: + """Tests for the is_check_match() type-guard.""" + + def test_valid_check_match(self) -> None: + """A fully populated attrib dict is recognised as a CheckMatch.""" + assert is_check_match(_make_attrib()) + + def test_not_dict(self) -> None: + """Non-dict values are rejected.""" + assert not is_check_match("not a dict") + assert not is_check_match(None) + assert not is_check_match(42) + + def test_missing_field(self) -> None: + """A dict missing a required field is rejected.""" + attrib = dict(_make_attrib()) + del attrib["message"] + assert not is_check_match(attrib) + + def test_wrong_type(self) -> None: + """A dict with a field of the wrong type is rejected.""" + attrib = dict(_make_attrib()) + attrib["offset"] = "not_an_int" + assert not is_check_match(attrib) + + +class TestMatchStr: + """Tests for Match.__str__() human-readable formatter.""" + + def test_str_contains_rule_id(self) -> None: + """The rule ID is present in the string representation.""" + m = _make_match() + s = str(m) + assert "MORFOLOGIK_RULE_EN_US" in s + + def test_str_contains_message(self) -> None: + """The error message is present in the string representation.""" + m = _make_match() + assert "Possible spelling mistake" in str(m) + + def test_str_contains_suggestions(self) -> None: + """Replacement suggestions are present in the string representation.""" + m = _make_match() + assert "not" in str(m) + + def test_str_no_message_skips_message_line(self) -> None: + """A match with no message omits the Message line.""" + m = _make_match(message="") + assert "Message" not in str(m) + + def test_str_no_replacements_skips_suggestion(self) -> None: + """A match with no replacements omits the Suggestion line.""" + m = _make_match(replacements=[]) + assert "Suggestion" not in str(m) + + +class TestMatchRepr: + """Tests for Match.__repr__() machine-readable formatter.""" + + def test_repr_contains_class_name(self) -> None: + """The class name 'Match(' appears in the repr.""" + m = _make_match() + assert "Match(" in repr(m) + + def test_repr_contains_rule_id(self) -> None: + """The rule ID appears in the repr.""" + m = _make_match() + assert "MORFOLOGIK_RULE_EN_US" in repr(m) + + +class TestMatchedText: + """Tests for the matched_text property.""" + + def test_matched_text_extracts_correctly(self) -> None: + """matched_text returns the exact text slice at offset/length.""" + m = _make_match() + assert m.matched_text == "noot" + + +class TestGetLineAndColumn: + """Tests for Match.get_line_and_column().""" + + def test_single_line(self) -> None: + """A single-line text returns line 1 and a positive column.""" + text = "This is noot okay." + m = _make_match(text=text) + line, col = m.get_line_and_column(text) + assert line == 1 + assert col > 0 + + def test_context_not_in_text_raises(self) -> None: + """Passing unrelated text raises ValueError.""" + m = _make_match() + with pytest.raises(ValueError, match="does not match the context"): + m.get_line_and_column("completely different text here blah blah") + + +class TestSelectReplacement: + """Tests for Match.select_replacement() replacement narrower.""" + + def test_select_valid_index(self) -> None: + """Selecting index 1 keeps only the second replacement.""" + m = _make_match() + m.select_replacement(1) + assert m.replacements == ["noon"] + + def test_select_first(self) -> None: + """Selecting index 0 keeps only the first replacement.""" + m = _make_match() + m.select_replacement(0) + assert m.replacements == ["not"] + + def test_negative_index_raises(self) -> None: + """A negative index raises ValueError.""" + m = _make_match() + with pytest.raises(ValueError, match="numbered from 0"): + m.select_replacement(-1) + + def test_out_of_bounds_raises(self) -> None: + """An out-of-range index raises ValueError.""" + m = _make_match() + with pytest.raises(ValueError, match="numbered from 0"): + m.select_replacement(99) + + def test_no_replacements_raises(self) -> None: + """Selecting when there are no replacements raises ValueError.""" + m = _make_match(replacements=[]) + with pytest.raises(ValueError, match="no suggestions"): + m.select_replacement(0) + + +class TestMatchComparisons: + """Tests for Match equality, ordering, and NotImplemented handling.""" + + def test_eq_equal_matches(self) -> None: + """Two matches built from the same attrib dict are equal.""" + m1 = _make_match() + m2 = _make_match() + assert m1 == m2 + + def test_eq_different_offset(self) -> None: + """Matches with different offsets are not equal.""" + m1 = _make_match() + m2 = _make_match(offset=0, context_offset=0) + assert m1 != m2 + + def test_eq_not_implemented_for_non_match(self) -> None: + """Comparing a Match with a non-Match returns NotImplemented.""" + m = _make_match() + assert m.__eq__("not a match") is NotImplemented + + def test_lt(self) -> None: + """A match at an earlier offset is less than one at a later offset.""" + text = "This is noot okay, and also baaad." + m_early = Match( + _make_attrib( + offset=0, + length=_DEFAULT_LENGTH, + context_text=text, + context_offset=0, + sentence=text, + ), + text, + ) + m_later = Match( + _make_attrib( + offset=_DEFAULT_OFFSET, + length=_DEFAULT_LENGTH, + context_text=text, + context_offset=_DEFAULT_OFFSET, + sentence=text, + ), + text, + ) + assert m_early < m_later + + def test_lt_not_implemented_for_non_match(self) -> None: + """Less-than comparison with a non-Match returns NotImplemented.""" + m = _make_match() + assert m.__lt__("not a match") is NotImplemented + + +class TestMatchIter: + """Tests for Match.__iter__() field-value iterator.""" + + def test_iter_yields_all_values(self) -> None: + """Iterating a match yields exactly _NUM_MATCH_FIELDS values.""" + m = _make_match() + values = list(m) + assert len(values) == _NUM_MATCH_FIELDS + + def test_iter_first_is_rule_id(self) -> None: + """The first value yielded by the iterator is the rule_id.""" + m = _make_match() + assert next(iter(m)) == "MORFOLOGIK_RULE_EN_US" + + +class TestMatchSetAttr: + """Tests for Match.__setattr__() type-coercing setter.""" + + def test_setattr_known_key_coerces_type(self) -> None: + """Setting a known field with a string coerces it to the declared type.""" + m = _make_match() + new_offset = 5 + m.offset = "5" # type: ignore[assignment] + assert m.offset == new_offset + assert isinstance(m.offset, int) + + def test_setattr_unknown_key_is_ignored(self) -> None: + """Setting an unknown field is silently ignored.""" + m = _make_match() + m.__setattr__("nonexistent_key", "value") + assert not hasattr(m, "nonexistent_key") + + +class TestMatchGetAttr: + """Tests for Match.__getattr__() unknown-attribute guard.""" + + def test_getattr_unknown_key_raises(self) -> None: + """Accessing an unknown attribute raises AttributeError.""" + m = _make_match() + with pytest.raises(AttributeError, match="no attribute"): + _ = m.completely_unknown diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py new file mode 100644 index 0000000..aa44343 --- /dev/null +++ b/tests/unit/test_utils.py @@ -0,0 +1,218 @@ +"""Unit tests for language_tool_python.utils (classify_matches, correct).""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +from language_tool_python.match import Match +from language_tool_python.utils import TextStatus, classify_matches, correct + +if TYPE_CHECKING: + from language_tool_python._internals.api_types import CheckMatch + + +def _make_match( + rule_id: str = "RULE", + offset: int = 0, + length: int = 4, + replacements: list[str] | None = None, +) -> Match: + attrib: CheckMatch = { + "message": "Error", + "shortMessage": "", + "replacements": [{"value": r} for r in (replacements or [])], + "offset": offset, + "length": length, + "context": {"text": "text here.", "offset": offset, "length": length}, + "sentence": "text here.", + "type": {"typeName": "Other"}, + "rule": { + "id": rule_id, + "description": "desc", + "issueType": "misspelling", + "category": {"id": "TYPOS", "name": "Typos"}, + }, + "ignoreForIncompleteSentence": False, + "contextForSureMatch": 0, + } + return Match(attrib, "text here.") + + +class TestClassifyMatches: + """Tests for classify_matches() match-set status classifier.""" + + def test_no_matches_returns_correct(self) -> None: + """An empty match list is classified as CORRECT.""" + assert classify_matches([]) == TextStatus.CORRECT + + def test_matches_with_replacements_returns_faulty(self) -> None: + """A match that has a replacement is classified as FAULTY.""" + m = _make_match(replacements=["fix"]) + assert classify_matches([m]) == TextStatus.FAULTY + + def test_matches_without_replacements_returns_garbage(self) -> None: + """A match without any replacement is classified as GARBAGE.""" + m = _make_match(replacements=[]) + assert classify_matches([m]) == TextStatus.GARBAGE + + def test_mixed_filters_to_faulty(self) -> None: + """A mix of matches with and without replacements is classified as FAULTY.""" + m_with = _make_match(replacements=["fix"]) + m_without = _make_match(replacements=[]) + assert classify_matches([m_with, m_without]) == TextStatus.FAULTY + + def test_all_without_replacements_is_garbage(self) -> None: + """Multiple matches all lacking replacements are classified as GARBAGE.""" + matches = [_make_match(replacements=[]) for _ in range(3)] + assert classify_matches(matches) == TextStatus.GARBAGE + + +class TestCorrect: + """Tests for correct() auto-correction function.""" + + def test_no_matches_returns_unchanged(self) -> None: + """Text with no matches is returned unchanged.""" + assert correct("hello world", []) == "hello world" + + def test_single_correction(self) -> None: + """A single match with a replacement is applied to the text.""" + m = _make_match(offset=0, length=4, replacements=["text"]) + result = correct("text here.", [m]) + assert result == "text here." + + def test_correction_replaces_error(self) -> None: + """A misspelled word is replaced by the first suggested correction.""" + text = "Helo world" + attrib: CheckMatch = { + "message": "Misspelling", + "shortMessage": "", + "replacements": [{"value": "Hello"}], + "offset": 0, + "length": 4, + "context": {"text": text, "offset": 0, "length": 4}, + "sentence": text, + "type": {"typeName": "Other"}, + "rule": { + "id": "SPELL", + "description": "Spelling", + "issueType": "misspelling", + "category": {"id": "TYPOS", "name": "Typos"}, + }, + "ignoreForIncompleteSentence": False, + "contextForSureMatch": 0, + } + m = Match(attrib, text) + result = correct(text, [m]) + assert result == "Hello world" + + def test_match_without_replacement_is_skipped(self) -> None: + """A match with no replacement leaves the text unchanged.""" + m = _make_match(offset=0, length=4, replacements=[]) + assert correct("text here.", [m]) == "text here." + + def test_overlapping_match_skips_mismatched_error(self) -> None: + """The second of two overlapping matches is skipped when offset drifts.""" + # First match replaces "aa" (offset 0, len 2) with "xxxxxx" (longer). + # Second match overlaps at offset 1, len 2 ("ab"). After the first + # replacement expands the text, the second match's expected text no + # longer sits at the right position → continue branch is hit. + text = "aabbc" + attrib1: CheckMatch = { + "message": "e", + "shortMessage": "", + "replacements": [{"value": "xxxxxx"}], + "offset": 0, + "length": 2, + "context": {"text": text, "offset": 0, "length": 2}, + "sentence": text, + "type": {"typeName": "Other"}, + "rule": { + "id": "R", + "description": "d", + "issueType": "misspelling", + "category": {"id": "C", "name": "C"}, + }, + "ignoreForIncompleteSentence": False, + "contextForSureMatch": 0, + } + attrib2: CheckMatch = { + "message": "e", + "shortMessage": "", + "replacements": [{"value": "y"}], + "offset": 1, + "length": 2, + "context": {"text": text, "offset": 1, "length": 2}, + "sentence": text, + "type": {"typeName": "Other"}, + "rule": { + "id": "R", + "description": "d", + "issueType": "misspelling", + "category": {"id": "C", "name": "C"}, + }, + "ignoreForIncompleteSentence": False, + "contextForSureMatch": 0, + } + m1 = Match(attrib1, text) + m2 = Match(attrib2, text) + result = correct(text, [m1, m2]) + assert result == "xxxxxxbbc" + + def test_correct_adjusts_offset_for_length_change(self) -> None: + """A length-changing replacement shifts the offset for subsequent matches.""" + text = "A b c" + attrib1: CheckMatch = { + "message": "err", + "shortMessage": "", + "replacements": [{"value": "AAA"}], + "offset": 0, + "length": 1, + "context": {"text": text, "offset": 0, "length": 1}, + "sentence": text, + "type": {"typeName": "Other"}, + "rule": { + "id": "R", + "description": "d", + "issueType": "misspelling", + "category": {"id": "C", "name": "C"}, + }, + "ignoreForIncompleteSentence": False, + "contextForSureMatch": 0, + } + attrib2: CheckMatch = { + "message": "err", + "shortMessage": "", + "replacements": [{"value": "BBB"}], + "offset": 2, + "length": 1, + "context": {"text": text, "offset": 2, "length": 1}, + "sentence": text, + "type": {"typeName": "Other"}, + "rule": { + "id": "R", + "description": "d", + "issueType": "misspelling", + "category": {"id": "C", "name": "C"}, + }, + "ignoreForIncompleteSentence": False, + "contextForSureMatch": 0, + } + m1 = Match(attrib1, text) + m2 = Match(attrib2, text) + result = correct(text, [m1, m2]) + assert result == "AAA BBB c" + + +@pytest.mark.parametrize( + ("status", "value"), + [ + (TextStatus.CORRECT, "correct"), + (TextStatus.FAULTY, "faulty"), + (TextStatus.GARBAGE, "garbage"), + ], +) +def test_text_status_values(status: TextStatus, value: str) -> None: + """TextStatus enum values match expected strings.""" + assert status.value == value