From facb79b404138eb194b67d5fa111c13e62cf4863 Mon Sep 17 00:00:00 2001
From: mdevolde <martin.devolder2@gmail.com>
Date: Sat, 27 Jun 2026 12:39:17 +0200
Subject: [PATCH 1/2] test: split tests in categs, add bench tests

---
 pyproject.toml                               |  25 +++-
 pytest.ini                                   |   5 +
 tests/benchmarks/__init__.py                 |   1 +
 tests/benchmarks/conftest.py                 |  17 +++
 tests/benchmarks/test_bench_check.py         |  81 ++++++++++++
 tests/integration/__init__.py                |   1 +
 tests/integration/conftest.py                |  17 +++
 tests/{ => integration}/test_api_public.py   |   8 +-
 tests/{ => integration}/test_cli.py          | 127 +++++--------------
 tests/{ => integration}/test_config.py       |  35 +----
 tests/integration/test_download.py           |  94 ++++++++++++++
 tests/{ => integration}/test_match.py        |  10 +-
 tests/{ => integration}/test_server_local.py |   2 +-
 tests/property/__init__.py                   |   1 +
 tests/property/conftest.py                   |  17 +++
 tests/property/test_prop_config.py           |  85 +++++++++++++
 tests/property/test_prop_safe_zip.py         |  74 +++++++++++
 tests/property/test_prop_utils.py            |  20 +++
 tests/unit/__init__.py                       |   1 +
 tests/unit/conftest.py                       |  17 +++
 tests/unit/test_cli_args.py                  |  60 +++++++++
 tests/unit/test_config_validation.py         |  37 ++++++
 tests/{ => unit}/test_download.py            | 101 +--------------
 tests/{ => unit}/test_safe_zip.py            |   2 +-
 uv.lock                                      |  48 +++++++
 25 files changed, 650 insertions(+), 236 deletions(-)
 create mode 100644 tests/benchmarks/__init__.py
 create mode 100644 tests/benchmarks/conftest.py
 create mode 100644 tests/benchmarks/test_bench_check.py
 create mode 100644 tests/integration/__init__.py
 create mode 100644 tests/integration/conftest.py
 rename tests/{ => integration}/test_api_public.py (93%)
 rename tests/{ => integration}/test_cli.py (68%)
 rename tests/{ => integration}/test_config.py (81%)
 create mode 100644 tests/integration/test_download.py
 rename tests/{ => integration}/test_match.py (95%)
 rename tests/{ => integration}/test_server_local.py (98%)
 create mode 100644 tests/property/__init__.py
 create mode 100644 tests/property/conftest.py
 create mode 100644 tests/property/test_prop_config.py
 create mode 100644 tests/property/test_prop_safe_zip.py
 create mode 100644 tests/property/test_prop_utils.py
 create mode 100644 tests/unit/__init__.py
 create mode 100644 tests/unit/conftest.py
 create mode 100644 tests/unit/test_cli_args.py
 create mode 100644 tests/unit/test_config_validation.py
 rename tests/{ => unit}/test_download.py (82%)
 rename tests/{ => unit}/test_safe_zip.py (99%)

diff --git a/pyproject.toml b/pyproject.toml
index 47957ec..182c0cd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,7 +42,9 @@ changelog = "https://github.com/jxmorris12/language_tool_python/blob/master/CHAN
 [dependency-groups]
 tests = [
     "pytest",
+    "pytest-benchmark",
     "pytest-cov",
+    "hypothesis",
 ]
 
 docs = [
@@ -141,9 +143,10 @@ ignore = [
 ]
 
 [tool.ruff.lint.per-file-ignores]
-"tests/*.py" = [
-    "S101",  # Need to use assert statements in tests
-    "SLF001" # Need to use private members of the library for testing
+"tests/**/*.py" = [
+    "S101",   # Need to use assert statements in tests
+    "SLF001", # Need to use private members of the library for testing
+    "RUF001", # LanguageTool output contains typographic quotes (‘’“”)
 ]
 "src/language_tool_python/__main__.py" = ["T201"] # Allow usage of print in the CLI entry point
 
@@ -170,3 +173,19 @@ warn_return_any = true
 warn_unreachable = true
 warn_unused_configs = true
 warn_unused_ignores = true
+
+[[tool.mypy.overrides]]
+module = ["tests.benchmarks.*"]
+# pytest-benchmark is untyped; relax Any restrictions for benchmark files only
+disallow_any_unimported = false
+disallow_any_expr = false
+disallow_any_explicit = false
+disallow_any_decorated = false
+
+[[tool.mypy.overrides]]
+module = ["tests.property.*"]
+# hypothesis is untyped; relax Any restrictions for property test files only
+disallow_any_unimported = false
+disallow_any_expr = false
+disallow_any_explicit = false
+disallow_any_decorated = false
diff --git a/pytest.ini b/pytest.ini
index 8170a2f..2da9216 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,6 +1,11 @@
 [pytest]
 addopts = -vra --cov=src --cov-report=html --cov-report=xml
 testpaths = tests
+markers =
+    unit: fast, isolated tests with no external dependencies
+    integration: tests that require a live LanguageTool server or network
+    property: property-based tests using Hypothesis
+    perf: performance benchmark tests using pytest-benchmark
 
 [coverage:run]
 source = src
diff --git a/tests/benchmarks/__init__.py b/tests/benchmarks/__init__.py
new file mode 100644
index 0000000..1dff63c
--- /dev/null
+++ b/tests/benchmarks/__init__.py
@@ -0,0 +1 @@
+"""Benchmark tests for the language_tool_python library."""
diff --git a/tests/benchmarks/conftest.py b/tests/benchmarks/conftest.py
new file mode 100644
index 0000000..6cebb40
--- /dev/null
+++ b/tests/benchmarks/conftest.py
@@ -0,0 +1,17 @@
+"""Configuration for the benchmark test suite."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+
+def pytest_collection_modifyitems(
+    items: list[pytest.Item],
+) -> None:
+    """Apply the 'perf' marker to all tests collected from this directory."""
+    benchmarks_dir = Path(__file__).parent
+    for item in items:
+        if item.path.is_relative_to(benchmarks_dir):
+            item.add_marker(pytest.mark.perf)
diff --git a/tests/benchmarks/test_bench_check.py b/tests/benchmarks/test_bench_check.py
new file mode 100644
index 0000000..2cca935
--- /dev/null
+++ b/tests/benchmarks/test_bench_check.py
@@ -0,0 +1,81 @@
+"""Benchmark tests for LanguageTool grammar checking performance.
+
+Run with: pytest tests/benchmarks/ -v
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+import language_tool_python
+
+if TYPE_CHECKING:
+    from collections.abc import Generator
+
+    from pytest_benchmark.fixture import BenchmarkFixture
+
+_SHORT_TEXT = "This is a sentence with some erors in it. "
+_MEDIUM_TEXT = (_SHORT_TEXT * 20).strip()
+_LONG_TEXT = (_SHORT_TEXT * 100).strip()
+
+
+@pytest.fixture(scope="module")
+def tool() -> Generator[language_tool_python.LanguageTool, None, None]:
+    """Provide a LanguageTool instance shared across benchmarks in this module."""
+    with language_tool_python.LanguageTool("en-US") as t:
+        yield t
+
+
+@pytest.fixture(scope="module")
+def cached_tool() -> Generator[language_tool_python.LanguageTool, None, None]:
+    """Provide a pipeline-caching LanguageTool instance for cache benchmarks."""
+    with language_tool_python.LanguageTool(
+        "en-US",
+        config={"cacheSize": 1000, "pipelineCaching": True},
+    ) as t:
+        yield t
+
+
+def test_bench_check_short_text(
+    benchmark: BenchmarkFixture,
+    tool: language_tool_python.LanguageTool,
+) -> None:
+    """Benchmark grammar checking on a short sentence (~38 characters)."""
+    benchmark(tool.check, _SHORT_TEXT)
+
+
+def test_bench_check_medium_text(
+    benchmark: BenchmarkFixture,
+    tool: language_tool_python.LanguageTool,
+) -> None:
+    """Benchmark grammar checking on medium-length text (~840 characters)."""
+    benchmark(tool.check, _MEDIUM_TEXT)
+
+
+def test_bench_check_long_text(
+    benchmark: BenchmarkFixture,
+    tool: language_tool_python.LanguageTool,
+) -> None:
+    """Benchmark grammar checking on long text (~4200 characters)."""
+    benchmark(tool.check, _LONG_TEXT)
+
+
+def test_bench_correct_short_text(
+    benchmark: BenchmarkFixture,
+    tool: language_tool_python.LanguageTool,
+) -> None:
+    """Benchmark automatic text correction on a short sentence."""
+    benchmark(tool.correct, _SHORT_TEXT)
+
+
+def test_bench_check_with_pipeline_cache(
+    benchmark: BenchmarkFixture,
+    cached_tool: language_tool_python.LanguageTool,
+) -> None:
+    """Benchmark grammar checking with pipeline caching enabled.
+
+    Compare with test_bench_check_short_text to measure cache speedup.
+    """
+    benchmark(cached_tool.check, _SHORT_TEXT)
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 0000000..7ac8aa9
--- /dev/null
+++ b/tests/integration/__init__.py
@@ -0,0 +1 @@
+"""Integration tests for the language_tool_python library."""
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
new file mode 100644
index 0000000..3628ef0
--- /dev/null
+++ b/tests/integration/conftest.py
@@ -0,0 +1,17 @@
+"""Configuration for the integration test suite."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+
+def pytest_collection_modifyitems(
+    items: list[pytest.Item],
+) -> None:
+    """Apply the 'integration' marker to all tests collected from this directory."""
+    integration_dir = Path(__file__).parent
+    for item in items:
+        if item.path.is_relative_to(integration_dir):
+            item.add_marker(pytest.mark.integration)
diff --git a/tests/test_api_public.py b/tests/integration/test_api_public.py
similarity index 93%
rename from tests/test_api_public.py
rename to tests/integration/test_api_public.py
index 1788741..307c5d6 100644
--- a/tests/test_api_public.py
+++ b/tests/integration/test_api_public.py
@@ -1,4 +1,4 @@
-"""Tests for the public API functionality."""
+"""Integration tests for the public API functionality."""
 
 import pytest
 
@@ -34,7 +34,7 @@ def test_remote_es() -> None:
                    'INCORRECT_EXPRESSIONS', 'rule_issue_type': 'grammar', 'sentence':
                    'LanguageTool le ayudará a afrentar algunas dificultades propias de
                    la escritura.'}), Match({'rule_id': 'PRON_HABER_PARTICIPIO',
-                   'message': 'El v. \u2018haber\u2019 se escribe con hache.',
+                   'message': 'El v. ‘haber’ se escribe con hache.',
                    'replacements': ['ha'], 'offset_in_context': 43, 'context':
                    '...ificultades propias de la escritura. Se a hecho un esfuerzo para
                    detectar errores...', 'offset': 107, 'error_length': 1, 'category':
@@ -50,8 +50,8 @@ def test_remote_es() -> None:
                    'misspelling', 'sentence': 'Se a hecho un esfuerzo para detectar
                    errores tipográficos, ortograficos y incluso gramaticales.'}),
                    Match({'rule_id': 'Y_E_O_U', 'message': 'Cuando precede a palabras
-                   que comienzan por \u2018i\u2019, la conjunción \u2018y\u2019 se
-                   transforma en \u2018e\u2019.', 'replacements': ['e'],
+                   que comienzan por ‘i’, la conjunción ‘y’ se
+                   transforma en ‘e’.', 'replacements': ['e'],
                    'offset_in_context': 43, 'context': '...ctar errores tipográficos,
                    ortograficos y incluso gramaticales. También algunos e...', 'offset':
                    176, 'error_length': 1, 'category': 'GRAMMAR', 'rule_issue_type':
diff --git a/tests/test_cli.py b/tests/integration/test_cli.py
similarity index 68%
rename from tests/test_cli.py
rename to tests/integration/test_cli.py
index 18da908..d101f66 100644
--- a/tests/test_cli.py
+++ b/tests/integration/test_cli.py
@@ -1,4 +1,4 @@
-"""Tests for the command-line interface (CLI) functionality."""
+"""Integration tests for the CLI using real LanguageTool server instances."""
 
 import io
 import sys
@@ -7,7 +7,39 @@
 import pytest
 
 import language_tool_python
-from language_tool_python.__main__ import main, parse_args
+from language_tool_python.__main__ import main
+
+
+def main_with_stdin(argv: list[str], stdin: str) -> int:
+    """Execute the main CLI with simulated stdin input.
+
+    :param argv: Command-line arguments to pass to the main function.
+    :param stdin: Input text to simulate as stdin.
+    :return: Exit code returned by the main function.
+    :rtype: int
+    """
+    old_stdin = sys.stdin
+    sys.stdin = io.StringIO(stdin)
+    try:
+        return main(argv)
+    finally:
+        sys.stdin = old_stdin
+
+
+@pytest.fixture(scope="module")
+def remote_server() -> Generator[tuple[str, int], None, None]:
+    """Fixture that provides a remote LanguageTool server for testing.
+
+    This fixture initializes a LanguageTool instance and yields its host and port,
+    ensuring proper cleanup after all tests in the module complete.
+
+    :return: A tuple containing the server host and port (host, port).
+    :rtype: Generator[Tuple[str, int], None, None]
+    """
+    with language_tool_python.LanguageTool("en-US") as tool:
+        host = tool._host
+        port = tool._port
+        yield host, port
 
 
 @pytest.mark.parametrize(
@@ -89,22 +121,6 @@ def test_cli_exit_codes(
         assert code != 0
 
 
-@pytest.fixture(scope="module")
-def remote_server() -> Generator[tuple[str, int], None, None]:
-    """Fixture that provides a remote LanguageTool server for testing.
-
-    This fixture initializes a LanguageTool instance and yields its host and port,
-    ensuring proper cleanup after all tests in the module complete.
-
-    :return: A tuple containing the server host and port (host, port).
-    :rtype: Generator[Tuple[str, int], None, None]
-    """
-    with language_tool_python.LanguageTool("en-US") as tool:
-        host = tool._host
-        port = tool._port
-        yield host, port
-
-
 def test_cli_remote_ok(remote_server: tuple[str, int]) -> None:
     """Test the CLI with a remote server using valid input text.
 
@@ -155,78 +171,3 @@ def test_cli_remote_error(remote_server: tuple[str, int]) -> None:
         "This is noot okay.\n",
     )
     assert code != 0
-
-
-def test_parse_args_enabled_only_with_enable_categories() -> None:
-    """Test that --enabled-only is accepted when only --enable-categories is provided.
-
-    :raises AssertionError: If parse_args raises an error for this valid combination.
-    """
-    args = parse_args(["-l", "en-US", "--enabled-only", "-E", "TYPOS", "file.txt"])
-    assert args.enabled_only is True
-    assert args.enable_categories == {"TYPOS"}
-
-
-def test_parse_args_enabled_only_rejects_disable_categories() -> None:
-    """Test that --enabled-only cannot be combined with --disable-categories.
-
-    :raises SystemExit: Expected, as argparse calls sys.exit on error.
-    """
-    with pytest.raises(SystemExit):
-        parse_args(
-            ["-l", "en-US", "--enabled-only", "-e", "RULE", "-D", "TYPOS", "file.txt"]
-        )
-
-
-def test_parse_args_enabled_only_requires_enable_or_enable_categories() -> None:
-    """Test that --enabled-only requires at least --enable or --enable-categories.
-
-    :raises SystemExit: Expected, as argparse calls sys.exit on error.
-    """
-    with pytest.raises(SystemExit):
-        parse_args(["-l", "en-US", "--enabled-only", "file.txt"])
-
-
-def test_parse_args_categories() -> None:
-    """Test that --disable-categories and --enable-categories are parsed correctly.
-
-    :raises AssertionError: If the parsed category sets do not match the expected
-    values.
-    """
-    args = parse_args(
-        ["-l", "en-US", "-D", "TYPOS,GRAMMAR", "-E", "PUNCTUATION", "file.txt"]
-    )
-    assert args.disable_categories == {"TYPOS", "GRAMMAR"}
-    assert args.enable_categories == {"PUNCTUATION"}
-
-
-def test_parse_args_categories_multiple_flags() -> None:
-    """Test that repeated -D/-E flags accumulate into the same set.
-
-    :raises AssertionError: If the category sets do not accumulate correctly.
-    """
-    args = parse_args(
-        ["-l", "en-US", "-D", "TYPOS", "-D", "GRAMMAR", "-E", "PUNCTUATION", "file.txt"]
-    )
-    assert args.disable_categories == {"TYPOS", "GRAMMAR"}
-    assert args.enable_categories == {"PUNCTUATION"}
-
-
-def main_with_stdin(argv: list[str], stdin: str) -> int:
-    """Execute the main CLI with simulated stdin input.
-
-    This utility function temporarily replaces sys.stdin with a StringIO object
-    containing the provided input, executes the main CLI function, and then restores the
-    original stdin.
-
-    :param argv: Command-line arguments to pass to the main function.
-    :param stdin: Input text to simulate as stdin.
-    :return: Exit code returned by the main function.
-    :rtype: int
-    """
-    old_stdin = sys.stdin
-    sys.stdin = io.StringIO(stdin)
-    try:
-        return main(argv)
-    finally:
-        sys.stdin = old_stdin
diff --git a/tests/test_config.py b/tests/integration/test_config.py
similarity index 81%
rename from tests/test_config.py
rename to tests/integration/test_config.py
index 087d5d3..3a2ba42 100644
--- a/tests/test_config.py
+++ b/tests/integration/test_config.py
@@ -1,4 +1,4 @@
-"""Tests for the configuration options of LanguageTool."""
+"""Integration tests for LanguageTool configuration options (require a local server)."""
 
 import re
 import time
@@ -6,7 +6,6 @@
 import pytest
 
 import language_tool_python
-from language_tool_python.config_file import ConfigValue, LanguageToolConfig
 from language_tool_python.exceptions import LanguageToolError
 
 
@@ -175,35 +174,3 @@ def test_disabled_rule_in_config() -> None:
         text = "He realised that the organization was in jeopardy."
         matches = tool.check(text)
         assert len(matches) == 0
-
-
-@pytest.mark.parametrize(
-    "config",
-    [
-        {"blockedReferrers": "example.com\ntrustXForwardForHeader=true"},
-        {"disabledRuleIds": ["MORFOLOGIK_RULE_EN_US", "SAFE\rrequestLimit=0"]},
-        {"lang-en\ntrustXForwardForHeader": "true"},
-        {"lang-en": "custom-word\nrequestLimit=0"},
-    ],
-)
-def test_config_rejects_line_break_injection(config: dict[str, ConfigValue]) -> None:
-    """Test that config serialization cannot be escaped with CR/LF characters."""
-    with pytest.raises(ValueError, match="cannot contain line breaks"):
-        LanguageToolConfig(config)
-
-
-@pytest.mark.parametrize(
-    "config",
-    [
-        {"blockedReferrers": "example.com\\"},
-        {"disabledRuleIds": ["MORFOLOGIK_RULE_EN_US", "SAFE\\"]},
-        {"lang-en\\": "true"},
-        {"lang-en": "custom-word\\"},
-    ],
-)
-def test_config_rejects_odd_trailing_backslashes(
-    config: dict[str, ConfigValue],
-) -> None:
-    """Test that config serialization cannot escape the line ending with a backslash."""
-    with pytest.raises(ValueError, match="odd number of backslashes"):
-        LanguageToolConfig(config)
diff --git a/tests/integration/test_download.py b/tests/integration/test_download.py
new file mode 100644
index 0000000..fd74f50
--- /dev/null
+++ b/tests/integration/test_download.py
@@ -0,0 +1,94 @@
+"""Integration tests for LanguageTool download and version management (real network)."""
+
+from datetime import datetime, timedelta, timezone
+
+import pytest
+
+import language_tool_python
+from language_tool_python.exceptions import LanguageToolError, PathError
+
+
+def test_install_inexistent_version() -> None:
+    """Test errors when downloading a non-existent LanguageTool version.
+
+    This test verifies that the tool correctly handles invalid version numbers by
+    raising a LanguageToolError when trying to initialize with a version that does not
+    exist.
+
+    :raises AssertionError: If LanguageToolError is not raised for an invalid version.
+    """
+    with pytest.raises(LanguageToolError):
+        language_tool_python.LanguageTool(language_tool_download_version="0.0")
+
+
+def test_install_too_old_version() -> None:
+    """Test that attempting to download a too-old LanguageTool version raises an error.
+
+    This test verifies that the tool correctly handles versions that are no longer
+    supported by raising a PathError when trying to initialize with an outdated version.
+
+    :raises AssertionError: If PathError is not raised for a too-old version.
+    """
+    with pytest.raises(PathError):
+        language_tool_python.LanguageTool(language_tool_download_version="3.9")
+
+
+def test_inexistent_language() -> None:
+    """Test that creating a LanguageTag with an invalid language code raises an error.
+
+    This test verifies that the LanguageTag constructor correctly validates language
+    codes and raises a ValueError when given a language code that is not supported.
+
+    :raises AssertionError: If ValueError is not raised for an invalid language code.
+    """
+    with (
+        language_tool_python.LanguageTool("en-US") as tool,
+        pytest.raises(ValueError, match="unsupported language"),
+    ):
+        language_tool_python.LanguageTag("xx-XX", tool._get_languages())
+
+
+def test_install_oldest_supported_version() -> None:
+    """Test that downloading the oldest supported LanguageTool version works correctly.
+
+    This test verifies that the tool can successfully download and initialize with the
+    oldest version that is still supported.
+
+    :raises AssertionError: If the tool fails to initialize with the oldest supported
+        version.
+    """
+    try:
+        with language_tool_python.LanguageTool(
+            "en-US",
+            language_tool_download_version="4.0",
+        ) as tool:
+            assert tool.language_tool_download_version == "4.0"
+    except LanguageToolError:
+        pytest.fail("Failed to download or initialize the oldest supported version.")
+
+
+def test_install_snapshot_version() -> None:
+    """Test that downloading the snapshot version of LanguageTool works correctly.
+
+    This test verifies that the tool can successfully download and initialize with the
+    snapshot of yesterday.
+
+    :raises AssertionError: If the tool fails to initialize with the snapshot version.
+    """
+    try:
+        with language_tool_python.LanguageTool(
+            "en-US",
+            language_tool_download_version=(
+                (datetime.now(timezone.utc) - timedelta(days=3)).strftime("%Y%m%d")
+            ),
+        ) as tool:
+            assert tool.language_tool_download_version == (
+                datetime.now(timezone.utc) - timedelta(days=3)
+            ).strftime("%Y%m%d")
+    except LanguageToolError:
+        pytest.skip(
+            (
+                "Failed to download or initialize the snapshot version. This may be "
+                "due to a missing snapshot for the expected date."
+            ),
+        )
diff --git a/tests/test_match.py b/tests/integration/test_match.py
similarity index 95%
rename from tests/test_match.py
rename to tests/integration/test_match.py
index a81713d..074f5f8 100644
--- a/tests/test_match.py
+++ b/tests/integration/test_match.py
@@ -1,4 +1,4 @@
-"""Tests for the Match functionality of LanguageTool."""
+"""Integration tests for the Match functionality of LanguageTool."""
 
 from typing import TypedDict
 
@@ -122,15 +122,15 @@ def test_match() -> None:
         expected format.
     """
     with language_tool_python.LanguageTool("en-US") as tool:
-        text = "A sentence with a error in the Hitchhiker\u2019s Guide tot he Galaxy"
+        text = "A sentence with a error in the Hitchhiker’s Guide tot he Galaxy"
         matches = tool.check(text)
         assert len(matches) == EXPECTED_MATCH_COUNT
         assert str(matches[0]) == (
             "Offset 16, length 1, Rule ID: EN_A_VS_AN\n"
-            "Message: Use “an” instead of \u2018a\u2019 if the following word starts "
-            "with a vowel sound, e.g. \u2018an article\u2019, \u2018an hour\u2019.\n"
+            "Message: Use “an” instead of ‘a’ if the following word starts "
+            "with a vowel sound, e.g. ‘an article’, ‘an hour’.\n"
             "Suggestion: an\n"
-            "A sentence with a error in the Hitchhiker\u2019s Guide tot he ..."
+            "A sentence with a error in the Hitchhiker’s Guide tot he ..."
             "\n                ^"
         )
 
diff --git a/tests/test_server_local.py b/tests/integration/test_server_local.py
similarity index 98%
rename from tests/test_server_local.py
rename to tests/integration/test_server_local.py
index 3731952..31b4b47 100644
--- a/tests/test_server_local.py
+++ b/tests/integration/test_server_local.py
@@ -1,4 +1,4 @@
-"""Tests for the local server functionality of LanguageTool."""
+"""Integration tests for the local server functionality of LanguageTool."""
 
 from __future__ import annotations
 
diff --git a/tests/property/__init__.py b/tests/property/__init__.py
new file mode 100644
index 0000000..2e5bdfe
--- /dev/null
+++ b/tests/property/__init__.py
@@ -0,0 +1 @@
+"""Property-based tests for the language_tool_python library."""
diff --git a/tests/property/conftest.py b/tests/property/conftest.py
new file mode 100644
index 0000000..6297f26
--- /dev/null
+++ b/tests/property/conftest.py
@@ -0,0 +1,17 @@
+"""Configuration for the property-based test suite."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+
+def pytest_collection_modifyitems(
+    items: list[pytest.Item],
+) -> None:
+    """Apply the 'property' marker to all tests collected from this directory."""
+    property_dir = Path(__file__).parent
+    for item in items:
+        if item.path.is_relative_to(property_dir):
+            item.add_marker(pytest.mark.property)
diff --git a/tests/property/test_prop_config.py b/tests/property/test_prop_config.py
new file mode 100644
index 0000000..41b95a9
--- /dev/null
+++ b/tests/property/test_prop_config.py
@@ -0,0 +1,85 @@
+"""Property-based tests for LanguageToolConfig input validation.
+
+These tests use Hypothesis to verify that injection-protection invariants
+hold for any input, not just the handwritten examples in unit tests.
+"""
+
+import pytest
+from hypothesis import given, settings
+from hypothesis import strategies as st
+
+from language_tool_python.config_file import LanguageToolConfig
+
+_LINEBREAK_CHARS = ["\n", "\r", "\r\n"]
+
+
+@given(
+    before=st.text(),
+    linebreak=st.sampled_from(_LINEBREAK_CHARS),
+    after=st.text(),
+)
+@settings(max_examples=200)
+def test_prop_config_value_with_linebreak_always_raises(
+    before: str,
+    linebreak: str,
+    after: str,
+) -> None:
+    """Any config value containing CR or LF must raise ValueError.
+
+    The string is constructed as ``before + linebreak + after`` to guarantee
+    the presence of a line-break character without relying on filter().
+
+    :param before: Arbitrary text before the line-break.
+    :param linebreak: A CR, LF, or CRLF sequence.
+    :param after: Arbitrary text after the line-break.
+    :raises AssertionError: If ValueError is not raised.
+    """
+    value = before + linebreak + after
+    with pytest.raises(ValueError, match="line breaks"):
+        LanguageToolConfig({"blockedReferrers": value})
+
+
+@given(
+    prefix=st.text(alphabet=st.characters(blacklist_characters="\r\n\\")),
+    count=st.integers(min_value=1, max_value=5),
+)
+@settings(max_examples=200)
+def test_prop_config_odd_trailing_backslashes_always_raise(
+    prefix: str,
+    count: int,
+) -> None:
+    r"""Any config value ending with an odd number of backslashes must raise ValueError.
+
+    The value is constructed as ``prefix + '\\\\' * (2*count - 1)`` to guarantee
+    the trailing backslash count is always odd (1, 3, 5, 7, or 9).
+
+    :param prefix: A string with no backslashes or line-break characters.
+    :param count: Determines the odd backslash count: ``2*count - 1``.
+    :raises AssertionError: If ValueError is not raised.
+    """
+    value = prefix + "\\" * (2 * count - 1)
+    with pytest.raises(ValueError, match="backslash"):
+        LanguageToolConfig({"blockedReferrers": value})
+
+
+@given(
+    key_before=st.text(alphabet=st.characters(blacklist_characters="\r\n")),
+    linebreak=st.sampled_from(_LINEBREAK_CHARS),
+    key_after=st.text(alphabet=st.characters(blacklist_characters="\r\n")),
+)
+@settings(max_examples=200)
+def test_prop_config_key_with_linebreak_always_raises(
+    key_before: str,
+    linebreak: str,
+    key_after: str,
+) -> None:
+    """Any config key containing CR or LF must raise ValueError.
+
+    :param key_before: Text before the line-break in the key.
+    :param linebreak: A CR, LF, or CRLF sequence.
+    :param key_after: Text after the line-break in the key.
+    :raises AssertionError: If ValueError is not raised.
+    """
+    key = key_before + linebreak + key_after
+    with pytest.raises(ValueError, match="line breaks"):
+        LanguageToolConfig({key: "valid_value"})
diff --git a/tests/property/test_prop_safe_zip.py b/tests/property/test_prop_safe_zip.py
new file mode 100644
index 0000000..ae3f992
--- /dev/null
+++ b/tests/property/test_prop_safe_zip.py
@@ -0,0 +1,74 @@
+"""Property-based tests for the safe ZIP extractor path-traversal protection."""
+
+import contextlib
+import io
+import shutil
+import uuid
+import zipfile
+from collections.abc import Iterator
+from contextlib import contextmanager
+from pathlib import Path
+
+import pytest
+from hypothesis import given, settings
+from hypothesis import strategies as st
+
+from language_tool_python._internals.safe_zip import SafeZipExtractor
+from language_tool_python.exceptions import PathError
+
+_TRAVERSAL_PREFIXES = ["../", "..\\", "/", "C:/", "..\\..\\"]
+
+
+def _make_zip_payload(files: dict[str, bytes]) -> bytes:
+    """Create an in-memory ZIP payload for testing."""
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, "w") as zf:
+        for name, data in files.items():
+            zf.writestr(name, data)
+    return buf.getvalue()
+
+
+@contextmanager
+def _temp_dir() -> Iterator[Path]:
+    """Create a temporary dir inside the project workspace to avoid perm issues."""
+    root = Path.cwd() / ".test_prop_safe_zip_tmp"
+    path = root / uuid.uuid4().hex
+    path.mkdir(parents=True)
+    try:
+        yield path
+    finally:
+        shutil.rmtree(path, ignore_errors=True)
+        with contextlib.suppress(OSError):
+            root.rmdir()
+
+
+@given(
+    traversal=st.sampled_from(_TRAVERSAL_PREFIXES),
+    suffix=st.text(
+        alphabet=st.characters(whitelist_categories=("Ll", "Lu", "Nd")),
+        min_size=1,
+    ),
+)
+@settings(max_examples=300)
+def test_prop_safe_zip_path_traversal_always_rejected(
+    traversal: str,
+    suffix: str,
+) -> None:
+    """Any ZIP member whose name begins with a path-traversal prefix must be rejected.
+
+    Checks that ``SafeZipExtractor`` raises ``PathError`` for filenames like
+    ``../evil``, ``/etc/passwd``, or ``C:/Windows/file`` regardless of the suffix.
+
+    :param traversal: A path-traversal prefix (e.g. ``../``, ``/``).
+    :param suffix: Alphanumeric suffix appended after the traversal prefix.
+    :raises AssertionError: If ``PathError`` is not raised for the unsafe member name.
+    """
+    filename = traversal + suffix
+    payload = _make_zip_payload({filename: b"payload"})
+
+    with (
+        _temp_dir() as dest,
+        zipfile.ZipFile(io.BytesIO(payload)) as zf,
+        pytest.raises(PathError, match="Unsafe ZIP member"),
+    ):
+        SafeZipExtractor().extractall(zf, dest)
diff --git a/tests/property/test_prop_utils.py b/tests/property/test_prop_utils.py
new file mode 100644
index 0000000..4bc7f20
--- /dev/null
+++ b/tests/property/test_prop_utils.py
@@ -0,0 +1,20 @@
+"""Property-based tests for the LanguageTool utility functions."""
+
+from hypothesis import given, settings
+from hypothesis import strategies as st
+
+import language_tool_python
+
+
+@given(text=st.text())
+@settings(max_examples=500)
+def test_prop_correct_with_empty_matches_is_identity(text: str) -> None:
+    """correct(text, []) must always return the original text unchanged.
+
+    This verifies the fundamental contract that applying zero corrections
+    is a no-op, regardless of the text content (empty, unicode, emojis...).
+
+    :param text: Arbitrary string generated by Hypothesis.
+    :raises AssertionError: If the corrected text differs from the input.
+    """
+    assert language_tool_python.utils.correct(text, []) == text
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
new file mode 100644
index 0000000..1733800
--- /dev/null
+++ b/tests/unit/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for the language_tool_python library."""
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
new file mode 100644
index 0000000..65fcecd
--- /dev/null
+++ b/tests/unit/conftest.py
@@ -0,0 +1,17 @@
+"""Configuration for the unit test suite."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+
+def pytest_collection_modifyitems(
+    items: list[pytest.Item],
+) -> None:
+    """Apply the 'unit' marker to all tests collected from this directory."""
+    unit_dir = Path(__file__).parent
+    for item in items:
+        if item.path.is_relative_to(unit_dir):
+            item.add_marker(pytest.mark.unit)
diff --git a/tests/unit/test_cli_args.py b/tests/unit/test_cli_args.py
new file mode 100644
index 0000000..6f79066
--- /dev/null
+++ b/tests/unit/test_cli_args.py
@@ -0,0 +1,60 @@
+"""Unit tests for the CLI argument parser."""
+
+import pytest
+
+from language_tool_python.__main__ import parse_args
+
+
+def test_parse_args_enabled_only_with_enable_categories() -> None:
+    """Test that --enabled-only is accepted when only --enable-categories is provided.
+
+    :raises AssertionError: If parse_args raises an error for this valid combination.
+    """
+    args = parse_args(["-l", "en-US", "--enabled-only", "-E", "TYPOS", "file.txt"])
+    assert args.enabled_only is True
+    assert args.enable_categories == {"TYPOS"}
+
+
+def test_parse_args_enabled_only_rejects_disable_categories() -> None:
+    """Test that --enabled-only cannot be combined with --disable-categories.
+
+    :raises SystemExit: Expected, as argparse calls sys.exit on error.
+    """
+    with pytest.raises(SystemExit):
+        parse_args(
+            ["-l", "en-US", "--enabled-only", "-e", "RULE", "-D", "TYPOS", "file.txt"]
+        )
+
+
+def test_parse_args_enabled_only_requires_enable_or_enable_categories() -> None:
+    """Test that --enabled-only requires at least --enable or --enable-categories.
+
+    :raises SystemExit: Expected, as argparse calls sys.exit on error.
+    """
+    with pytest.raises(SystemExit):
+        parse_args(["-l", "en-US", "--enabled-only", "file.txt"])
+
+
+def test_parse_args_categories() -> None:
+    """Test that --disable-categories and --enable-categories are parsed correctly.
+
+    :raises AssertionError: If the parsed category sets do not match the expected
+    values.
+    """
+    args = parse_args(
+        ["-l", "en-US", "-D", "TYPOS,GRAMMAR", "-E", "PUNCTUATION", "file.txt"]
+    )
+    assert args.disable_categories == {"TYPOS", "GRAMMAR"}
+    assert args.enable_categories == {"PUNCTUATION"}
+
+
+def test_parse_args_categories_multiple_flags() -> None:
+    """Test that repeated -D/-E flags accumulate into the same set.
+
+    :raises AssertionError: If the category sets do not accumulate correctly.
+    """
+    args = parse_args(
+        ["-l", "en-US", "-D", "TYPOS", "-D", "GRAMMAR", "-E", "PUNCTUATION", "file.txt"]
+    )
+    assert args.disable_categories == {"TYPOS", "GRAMMAR"}
+    assert args.enable_categories == {"PUNCTUATION"}
diff --git a/tests/unit/test_config_validation.py b/tests/unit/test_config_validation.py
new file mode 100644
index 0000000..aa46834
--- /dev/null
+++ b/tests/unit/test_config_validation.py
@@ -0,0 +1,37 @@
+"""Unit tests for LanguageToolConfig input validation and injection protection."""
+
+import pytest
+
+from language_tool_python.config_file import ConfigValue, LanguageToolConfig
+
+
+@pytest.mark.parametrize(
+    "config",
+    [
+        {"blockedReferrers": "example.com\ntrustXForwardForHeader=true"},
+        {"disabledRuleIds": ["MORFOLOGIK_RULE_EN_US", "SAFE\rrequestLimit=0"]},
+        {"lang-en\ntrustXForwardForHeader": "true"},
+        {"lang-en": "custom-word\nrequestLimit=0"},
+    ],
+)
+def test_config_rejects_line_break_injection(config: dict[str, ConfigValue]) -> None:
+    """Test that config serialization cannot be escaped with CR/LF characters."""
+    with pytest.raises(ValueError, match="cannot contain line breaks"):
+        LanguageToolConfig(config)
+
+
+@pytest.mark.parametrize(
+    "config",
+    [
+        {"blockedReferrers": "example.com\\"},
+        {"disabledRuleIds": ["MORFOLOGIK_RULE_EN_US", "SAFE\\"]},
+        {"lang-en\\": "true"},
+        {"lang-en": "custom-word\\"},
+    ],
+)
+def test_config_rejects_odd_trailing_backslashes(
+    config: dict[str, ConfigValue],
+) -> None:
+    """Test that config serialization cannot escape the line ending with a backslash."""
+    with pytest.raises(ValueError, match="odd number of backslashes"):
+        LanguageToolConfig(config)
diff --git a/tests/test_download.py b/tests/unit/test_download.py
similarity index 82%
rename from tests/test_download.py
rename to tests/unit/test_download.py
index d6de0f0..ad8ec2d 100644
--- a/tests/test_download.py
+++ b/tests/unit/test_download.py
@@ -1,4 +1,7 @@
-"""Tests for the download/language functionality of LanguageTool."""
+"""Unit tests for download logic, URL construction, HTTP handling, and integrity checks.
+
+These tests use mocks and monkeypatching to avoid real network requests.
+"""
 
 import contextlib
 import hashlib
@@ -10,7 +13,7 @@
 import zipfile
 from collections.abc import Iterator
 from contextlib import contextmanager
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timezone
 from pathlib import Path
 from unittest.mock import patch
 
@@ -23,7 +26,7 @@
     _LTP_MAX_DOWNLOAD_BYTES_ENV_VAR,
     LocalLanguageTool,
 )
-from language_tool_python.exceptions import LanguageToolError, PathError
+from language_tool_python.exceptions import PathError
 
 EXPECTED_DOWNLOAD_BYTES_OVERRIDE = 123
 
@@ -87,52 +90,9 @@ def workspace_temp_dir() -> Iterator[Path]:
             root.rmdir()
 
 
-def test_install_inexistent_version() -> None:
-    """Test errors when downloading a non-existent LanguageTool version.
-
-    This test verifies that the tool correctly handles invalid version numbers by
-    raising a LanguageToolError when trying to initialize with a version that does not
-    exist.
-
-    :raises AssertionError: If LanguageToolError is not raised for an invalid version.
-    """
-    with pytest.raises(LanguageToolError):
-        language_tool_python.LanguageTool(language_tool_download_version="0.0")
-
-
-def test_install_too_old_version() -> None:
-    """Test that attempting to download a too-old LanguageTool version raises an error.
-
-    This test verifies that the tool correctly handles versions that are no longer
-    supported by raising a PathError when trying to initialize with an outdated version.
-
-    :raises AssertionError: If PathError is not raised for a too-old version.
-    """
-    with pytest.raises(PathError):
-        language_tool_python.LanguageTool(language_tool_download_version="3.9")
-
-
-def test_inexistent_language() -> None:
-    """Test that creating a LanguageTag with an invalid language code raises an error.
-
-    This test verifies that the LanguageTag constructor correctly validates language
-    codes and raises a ValueError when given a language code that is not supported.
-
-    :raises AssertionError: If ValueError is not raised for an invalid language code.
-    """
-    with (
-        language_tool_python.LanguageTool("en-US") as tool,
-        pytest.raises(ValueError, match="unsupported language"),
-    ):
-        language_tool_python.LanguageTag("xx-XX", tool._get_languages())
-
-
 def test_http_get_403_forbidden() -> None:
     """Test that http_get raises PathError when receiving a 403 Forbidden status code.
 
-    This test verifies that the function correctly handles forbidden access errors when
-    attempting to download files.
-
     :raises AssertionError: If PathError is not raised for a 403 status code.
     """
     mock_response = MockDownloadResponse(b"", status_code=403)
@@ -153,9 +113,6 @@ def test_http_get_403_forbidden() -> None:
 def test_http_get_other_error_codes() -> None:
     """Test PathError handling for unexpected HTTP status codes.
 
-    This test verifies that the function correctly handles different HTTP error codes
-    like 500 (Internal Server Error), 503 (Service Unavailable), etc.
-
     :raises AssertionError: If PathError is not raised for error status codes.
     """
     error_codes = [500, 502, 503, 504]
@@ -562,49 +519,3 @@ def test_latest_snapshot_download_renames_archive_root_to_current_date(
             local_language_tool.download()
 
         get_mock.assert_not_called()
-
-
-def test_install_oldest_supported_version() -> None:
-    """Test that downloading the oldest supported LanguageTool version works correctly.
-
-    This test verifies that the tool can successfully download and initialize with the
-    oldest version that is still supported.
-
-    :raises AssertionError: If the tool fails to initialize with the oldest supported
-        version.
-    """
-    try:
-        with language_tool_python.LanguageTool(
-            "en-US",
-            language_tool_download_version="4.0",
-        ) as tool:
-            assert tool.language_tool_download_version == "4.0"
-    except LanguageToolError:
-        pytest.fail("Failed to download or initialize the oldest supported version.")
-
-
-def test_install_snapshot_version() -> None:
-    """Test that downloading the snapshot version of LanguageTool works correctly.
-
-    This test verifies that the tool can successfully download and initialize with the
-    snapshot of yesterday.
-
-    :raises AssertionError: If the tool fails to initialize with the snapshot version.
-    """
-    try:
-        with language_tool_python.LanguageTool(
-            "en-US",
-            language_tool_download_version=(
-                (datetime.now(timezone.utc) - timedelta(days=3)).strftime("%Y%m%d")
-            ),
-        ) as tool:
-            assert tool.language_tool_download_version == (
-                datetime.now(timezone.utc) - timedelta(days=3)
-            ).strftime("%Y%m%d")
-    except LanguageToolError:
-        pytest.skip(
-            (
-                "Failed to download or initialize the snapshot version. This may be "
-                "due to a missing snapshot for the expected date."
-            ),
-        )
diff --git a/tests/test_safe_zip.py b/tests/unit/test_safe_zip.py
similarity index 99%
rename from tests/test_safe_zip.py
rename to tests/unit/test_safe_zip.py
index 9741845..328c7d4 100644
--- a/tests/test_safe_zip.py
+++ b/tests/unit/test_safe_zip.py
@@ -1,4 +1,4 @@
-"""Tests for safe ZIP extraction."""
+"""Unit tests for safe ZIP extraction."""
 
 import contextlib
 import hashlib
diff --git a/uv.lock b/uv.lock
index dcdd4ab..07ecf5e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -392,6 +392,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f4/b2/50e9b292b5cac13e9e81272c7171301abc753a60460d21505b606e15cf21/furo-2025.12.19-py3-none-any.whl", hash = "sha256:bb0ead5309f9500130665a26bee87693c41ce4dbdff864dbfb6b0dae4673d24f", size = 339262, upload-time = "2025-12-19T17:34:38.905Z" },
 ]
 
+[[package]]
+name = "hypothesis"
+version = "6.155.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+    { name = "sortedcontainers" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/10/aa/9a91a4addf285702a98713da44b3581799539426436617bfb8914478c166/hypothesis-6.155.6.tar.gz", hash = "sha256:7569e1897690336c85d49d8391b49ec6ab83d951009515bfc29faebbac286cf5", size = 478038, upload-time = "2026-06-19T13:21:23.379Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9e/a9/4c17e962c2e9cbc314bb579ed2e2b2da45d7b6b942aab6948d14d85abfea/hypothesis-6.155.6-py3-none-any.whl", hash = "sha256:a96d9a29f6bbc8ccac39dd84e140892da76765464929f401a4181b90c20c9ad1", size = 544521, upload-time = "2026-06-19T13:21:20.934Z" },
+]
+
 [[package]]
 name = "idna"
 version = "3.18"
@@ -457,7 +470,9 @@ quality = [
     { name = "ruff" },
 ]
 tests = [
+    { name = "hypothesis" },
     { name = "pytest" },
+    { name = "pytest-benchmark" },
     { name = "pytest-cov" },
 ]
 types = [
@@ -486,7 +501,9 @@ quality = [
     { name = "ruff", specifier = "==0.15.16" },
 ]
 tests = [
+    { name = "hypothesis" },
     { name = "pytest" },
+    { name = "pytest-benchmark" },
     { name = "pytest-cov" },
 ]
 types = [
@@ -788,6 +805,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" },
 ]
 
+[[package]]
+name = "py-cpuinfo"
+version = "9.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/37/a8/d832f7293ebb21690860d2e01d8115e5ff6f2ae8bbdc953f0eb0fa4bd2c7/py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690", size = 104716, upload-time = "2022-10-25T20:38:06.303Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" },
+]
+
 [[package]]
 name = "pygments"
 version = "2.20.0"
@@ -815,6 +841,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/24/25/1de2678b631f5a49215c6c96fff41ba892b0a34df68d6d80292b1b48aa7f/pytest-9.1.1-py3-none-any.whl", hash = "sha256:37a86b45efb9a47a61a36449063e8e18d0cab3161329fc099eb21783169c4f0c", size = 386536, upload-time = "2026-06-19T10:58:31.347Z" },
 ]
 
+[[package]]
+name = "pytest-benchmark"
+version = "5.2.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "py-cpuinfo" },
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/24/34/9f732b76456d64faffbef6232f1f9dbec7a7c4999ff46282fa418bd1af66/pytest_benchmark-5.2.3.tar.gz", hash = "sha256:deb7317998a23c650fd4ff76e1230066a76cb45dcece0aca5607143c619e7779", size = 341340, upload-time = "2025-11-09T18:48:43.215Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/33/29/e756e715a48959f1c0045342088d7ca9762a2f509b945f362a316e9412b7/pytest_benchmark-5.2.3-py3-none-any.whl", hash = "sha256:bc839726ad20e99aaa0d11a127445457b4219bdb9e80a1afc4b51da7f96b0803", size = 45255, upload-time = "2025-11-09T18:48:39.765Z" },
+]
+
 [[package]]
 name = "pytest-cov"
 version = "7.1.0"
@@ -887,6 +926,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/4c/07/2ebca9b11fb9be7340a818d8d6f63feaebb146be2c4afbd6061701d6df6e/snowballstemmer-3.1.1-py3-none-any.whl", hash = "sha256:7e207fa178741da09cdee59d3ecec3827ad5f92b1fc5c9ff3755b639f71f5752", size = 104164, upload-time = "2026-06-03T00:56:38.614Z" },
 ]
 
+[[package]]
+name = "sortedcontainers"
+version = "2.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" },
+]
+
 [[package]]
 name = "soupsieve"
 version = "2.8.4"

From 42c9c4e7c43ea9fb5d9d2a1ec06bc02831a88fd1 Mon Sep 17 00:00:00 2001
From: mdevolde <martin.devolder2@gmail.com>
Date: Sat, 27 Jun 2026 19:19:17 +0200
Subject: [PATCH 2/2] test: add unit tests to increase coverage

---
 .github/workflows/test.yml                    |   2 +-
 .gitignore                                    |   3 +
 pyproject.toml                                |  14 +-
 pytest.ini                                    |   2 +-
 src/language_tool_python/__main__.py          |   4 +-
 src/language_tool_python/_internals/compat.py |   4 +-
 src/language_tool_python/config_file.py       |   2 +-
 src/language_tool_python/download_lt.py       |   2 +-
 tests/unit/test_api_types.py                  |  64 +++
 tests/unit/test_cli_unit.py                   | 195 ++++++++
 tests/unit/test_config_unit.py                | 235 ++++++++++
 tests/unit/test_download_unit.py              | 305 +++++++++++++
 tests/unit/test_internals_utils.py            | 224 ++++++++++
 tests/unit/test_language_tag.py               | 168 +++++++
 tests/unit/test_match.py                      | 421 ++++++++++++++++++
 tests/unit/test_utils.py                      | 218 +++++++++
 16 files changed, 1843 insertions(+), 20 deletions(-)
 create mode 100644 tests/unit/test_api_types.py
 create mode 100644 tests/unit/test_cli_unit.py
 create mode 100644 tests/unit/test_config_unit.py
 create mode 100644 tests/unit/test_download_unit.py
 create mode 100644 tests/unit/test_internals_utils.py
 create mode 100644 tests/unit/test_language_tag.py
 create mode 100644 tests/unit/test_match.py
 create mode 100644 tests/unit/test_utils.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index a0cd765..3a5ca41 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -35,7 +35,7 @@ jobs:
           - os: ubuntu-26.04
             python-version: "3.14"
           - os: ubuntu-26.04
-            python-version: "3.15.0-beta.2"
+            python-version: "3.15.0-beta.3"
           - os: macos-26
             python-version: "3.14"
           - os: windows-2025
diff --git a/.gitignore b/.gitignore
index e15106e..3f03577 100644
--- a/.gitignore
+++ b/.gitignore
@@ -204,6 +204,9 @@ cython_debug/
 # Ruff stuff:
 .ruff_cache/
 
+# Pytest tmp_path base directory (project-relative to avoid Windows temp permission issues)
+.pytest_tmp/
+
 # PyPI configuration file
 .pypirc
 
diff --git a/pyproject.toml b/pyproject.toml
index 182c0cd..e540e9f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -174,18 +174,8 @@ warn_unreachable = true
 warn_unused_configs = true
 warn_unused_ignores = true
 
-[[tool.mypy.overrides]]
-module = ["tests.benchmarks.*"]
-# pytest-benchmark is untyped; relax Any restrictions for benchmark files only
-disallow_any_unimported = false
-disallow_any_expr = false
-disallow_any_explicit = false
-disallow_any_decorated = false
-
 [[tool.mypy.overrides]]
 module = ["tests.property.*"]
-# hypothesis is untyped; relax Any restrictions for property test files only
-disallow_any_unimported = false
-disallow_any_expr = false
-disallow_any_explicit = false
+# hypothesis decorators contain Any expressions, so we need to disable the following checks for tests using hypothesis
 disallow_any_decorated = false
+disallow_any_expr = false
diff --git a/pytest.ini b/pytest.ini
index 2da9216..cbd524a 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,5 +1,5 @@
 [pytest]
-addopts = -vra --cov=src --cov-report=html --cov-report=xml
+addopts = -vra --cov=src --cov-report=html --cov-report=xml --basetemp=.pytest_tmp
 testpaths = tests
 markers =
     unit: fast, isolated tests with no external dependencies
diff --git a/src/language_tool_python/__main__.py b/src/language_tool_python/__main__.py
index 8ee561a..8300df2 100644
--- a/src/language_tool_python/__main__.py
+++ b/src/language_tool_python/__main__.py
@@ -58,7 +58,7 @@ def _read_project_version(pyproject: Path) -> str:
     __version__ = version("language_tool_python")
     # If the package is not installed in the environment,
     # read the version from pyproject.toml
-except PackageNotFoundError:
+except PackageNotFoundError:  # pragma: no cover
     project_root = Path(__file__).resolve().parent.parent
     pyproject = project_root / "pyproject.toml"
     __version__ = _read_project_version(pyproject)
@@ -258,7 +258,7 @@ def __call__(
             cli_args.disable_categories.update(rule_values)
         elif self.dest == "enable_categories":
             cli_args.enable_categories.update(rule_values)
-        else:
+        else:  # pragma: no cover
             err = f"unexpected rules destination: {self.dest}"
             raise ValueError(err)
 
diff --git a/src/language_tool_python/_internals/compat.py b/src/language_tool_python/_internals/compat.py
index be623b1..9dc0631 100644
--- a/src/language_tool_python/_internals/compat.py
+++ b/src/language_tool_python/_internals/compat.py
@@ -13,11 +13,11 @@
 if sys.version_info >= (3, 11):
     from tomllib import loads as toml_loads
 else:
-    from tomli import loads as toml_loads
+    from tomli import loads as toml_loads  # pragma: no cover
 
 if sys.version_info >= (3, 13):
     from warnings import deprecated
 else:
-    from typing_extensions import deprecated
+    from typing_extensions import deprecated  # pragma: no cover
 
 __all__ = ["deprecated", "toml_loads"]
diff --git a/src/language_tool_python/config_file.py b/src/language_tool_python/config_file.py
index 8b73dbe..c7e703a 100644
--- a/src/language_tool_python/config_file.py
+++ b/src/language_tool_python/config_file.py
@@ -158,7 +158,7 @@ def _path_validator(v: PathLike[str] | str) -> None:
     if not p.exists():
         err = f"path does not exist: {p}"
         raise PathError(err)
-    if not p.is_file() and not p.is_dir():
+    if not p.is_file() and not p.is_dir():  # pragma: no cover
         err = f"path is not a file/directory: {p}"
         raise PathError(err)
 
diff --git a/src/language_tool_python/download_lt.py b/src/language_tool_python/download_lt.py
index 65defa0..1ec3330 100644
--- a/src/language_tool_python/download_lt.py
+++ b/src/language_tool_python/download_lt.py
@@ -385,7 +385,7 @@ def download(self) -> None:
 
         :raises NotImplementedError: Always, unless implemented by a subclass.
         """
-        raise NotImplementedError
+        raise NotImplementedError  # pragma: no cover
 
     def _get_remote_zip(
         self,
diff --git a/tests/unit/test_api_types.py b/tests/unit/test_api_types.py
new file mode 100644
index 0000000..af2b7ce
--- /dev/null
+++ b/tests/unit/test_api_types.py
@@ -0,0 +1,64 @@
+"""Unit tests for _internals/api_types.py TypeGuard helpers."""
+
+from language_tool_python._internals.api_types import (
+    is_check_response,
+    is_language_info,
+)
+
+
+def test_is_language_info_valid() -> None:
+    """Accepts a well-formed LanguageInfo dict."""
+    assert is_language_info({"code": "en", "longCode": "en-US", "name": "English"})
+
+
+def test_is_language_info_not_dict() -> None:
+    """Rejects non-dict values."""
+    assert not is_language_info("not a dict")
+    assert not is_language_info(42)
+    assert not is_language_info(None)
+    assert not is_language_info(["code", "longCode", "name"])
+
+
+def test_is_language_info_missing_field() -> None:
+    """Rejects dicts with missing required fields."""
+    assert not is_language_info({"code": "en", "longCode": "en-US"})
+    assert not is_language_info({"code": "en", "name": "English"})
+    assert not is_language_info({})
+
+
+def test_is_language_info_wrong_type() -> None:
+    """Rejects dicts with non-string field values."""
+    assert not is_language_info({"code": 1, "longCode": "en-US", "name": "English"})
+    assert not is_language_info({"code": "en", "longCode": None, "name": "English"})
+
+
+def test_is_check_response_valid() -> None:
+    """Accepts a well-formed CheckResponse dict."""
+    assert is_check_response(
+        {
+            "matches": [],
+            "language": {"code": "en"},
+            "warnings": {"incompleteResults": False},
+        }
+    )
+
+
+def test_is_check_response_not_dict() -> None:
+    """Rejects non-dict values."""
+    assert not is_check_response("not a dict")
+    assert not is_check_response(None)
+    assert not is_check_response(123)
+
+
+def test_is_check_response_missing_field() -> None:
+    """Rejects dicts with missing required fields."""
+    assert not is_check_response({"matches": [], "language": {}})
+    assert not is_check_response({"matches": [], "warnings": {}})
+    assert not is_check_response({})
+
+
+def test_is_check_response_wrong_type() -> None:
+    """Rejects dicts with wrong field types."""
+    assert not is_check_response({"matches": "[]", "language": {}, "warnings": {}})
+    assert not is_check_response({"matches": [], "language": "en", "warnings": {}})
+    assert not is_check_response({"matches": [], "language": {}, "warnings": "none"})
diff --git a/tests/unit/test_cli_unit.py b/tests/unit/test_cli_unit.py
new file mode 100644
index 0000000..33af934
--- /dev/null
+++ b/tests/unit/test_cli_unit.py
@@ -0,0 +1,195 @@
+"""Unit tests for the CLI helper functions in __main__.py."""
+
+from __future__ import annotations
+
+import io
+from pathlib import Path
+
+import pytest
+
+from language_tool_python.__main__ import (
+    CliArgs,
+    _read_project_version,
+    get_input_text,
+    get_remote_server,
+    get_rules,
+    get_text,
+    parse_args,
+    print_exception,
+)
+
+
+class TestGetRules:
+    """Tests for the get_rules() rule-string parser."""
+
+    def test_comma_separated(self) -> None:
+        """Comma-separated rule IDs are returned as a set."""
+        assert get_rules("RULE_A,RULE_B") == {"RULE_A", "RULE_B"}
+
+    def test_uppercases(self) -> None:
+        """Rule IDs are uppercased."""
+        assert get_rules("rule_a") == {"RULE_A"}
+
+    def test_hyphen_allowed(self) -> None:
+        """Hyphens inside rule IDs are preserved."""
+        assert get_rules("MORFOLOGIK-RULE") == {"MORFOLOGIK-RULE"}
+
+    def test_whitespace_separated(self) -> None:
+        """Whitespace-separated rule IDs are each returned."""
+        assert get_rules("RULE_A RULE_B") == {"RULE_A", "RULE_B"}
+
+    def test_empty_string(self) -> None:
+        """Empty input returns an empty set."""
+        assert get_rules("") == set()
+
+
+class TestParseArgsEnabledOnly:
+    """Tests for the --enabled-only CLI argument validation."""
+
+    def test_enabled_only_with_disable_raises(self) -> None:
+        """--enabled-only combined with --disable causes SystemExit."""
+        with pytest.raises(SystemExit):
+            parse_args(
+                [
+                    "-l",
+                    "en-US",
+                    "--enabled-only",
+                    "-e",
+                    "RULE",
+                    "-d",
+                    "OTHER",
+                    "file.txt",
+                ]
+            )
+
+    def test_enabled_only_with_enable_passes(self) -> None:
+        """--enabled-only with --enable is accepted."""
+        args = parse_args(["-l", "en-US", "--enabled-only", "-e", "RULE", "file.txt"])
+        assert args.enabled_only is True
+        assert "RULE" in args.enable
+
+
+class TestGetRemoteServer:
+    """Tests for the get_remote_server() URL builder."""
+
+    def _args(self, host: str | None = None, port: str | None = None) -> CliArgs:
+        """Build a minimal CliArgs with only remote_host/remote_port set."""
+        args = CliArgs()
+        args.remote_host = host
+        args.remote_port = port
+        return args
+
+    def test_no_host_returns_none(self) -> None:
+        """Returns None when no remote host is set."""
+        assert get_remote_server(self._args()) is None
+
+    def test_host_without_port(self) -> None:
+        """Returns the host name alone when no port is given."""
+        assert get_remote_server(self._args(host="localhost")) == "localhost"
+
+    def test_host_with_port(self) -> None:
+        """Returns host:port when both are provided."""
+        result = get_remote_server(self._args(host="localhost", port="8081"))
+        assert result == "localhost:8081"
+
+
+class TestPrintException:
+    """Tests for the print_exception() stderr printer."""
+
+    def test_without_debug_prints_to_stderr(
+        self, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """Without debug=True, only the message is printed to stderr."""
+        print_exception(ValueError("test error"), debug=False)
+        assert "test error" in capsys.readouterr().err
+
+    def test_with_debug_prints_traceback(
+        self, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """With debug=True, the full traceback is printed to stderr."""
+        try:
+            msg = "original error"
+            raise ValueError(msg)
+        except ValueError:
+            print_exception(ValueError("current error"), debug=True)
+        captured = capsys.readouterr()
+        assert "ValueError" in captured.err
+
+
+class TestGetText:
+    """Tests for the get_text() file reader."""
+
+    def test_reads_file(self, tmp_path: Path) -> None:
+        """File content is returned as-is when no ignore pattern is given."""
+        f = tmp_path / "test.txt"
+        f.write_text("hello world\n", encoding="utf-8")
+        result = get_text(str(f), encoding="utf-8", ignore=None)
+        assert result == "hello world\n"
+
+    def test_ignore_replaces_matching_lines(self, tmp_path: Path) -> None:
+        """Lines matching the ignore regex are replaced with a newline."""
+        f = tmp_path / "test.txt"
+        f.write_text("keep this\n# skip this\nkeep too\n", encoding="utf-8")
+        result = get_text(str(f), encoding="utf-8", ignore=r"#.*")
+        assert "# skip this" not in result
+        assert "keep this" in result
+        assert "keep too" in result
+
+    def test_no_ignore_keeps_all(self, tmp_path: Path) -> None:
+        """All lines are kept when no ignore pattern is set."""
+        f = tmp_path / "test.txt"
+        f.write_text("line1\nline2\n", encoding="utf-8")
+        result = get_text(str(f), encoding=None, ignore=None)
+        assert result == "line1\nline2\n"
+
+
+class TestGetInputText:
+    """Tests for the get_input_text() stdin/file dispatcher."""
+
+    def _args(
+        self, ignore_lines: str | None = None, encoding: str | None = None
+    ) -> CliArgs:
+        """Build a minimal CliArgs with only ignore_lines/encoding set."""
+        args = CliArgs()
+        args.ignore_lines = ignore_lines
+        args.encoding = encoding
+        return args
+
+    def test_reads_from_file(self, tmp_path: Path) -> None:
+        """Regular filename is read from disk."""
+        f = tmp_path / "input.txt"
+        f.write_text("test content", encoding="utf-8")
+        result = get_input_text(str(f), self._args())
+        assert result == "test content"
+
+    def test_reads_from_stdin(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Filename '-' reads from stdin."""
+        monkeypatch.setattr("sys.stdin", io.StringIO("stdin content"))
+        result = get_input_text("-", self._args())
+        assert result == "stdin content"
+
+    def test_stdin_with_ignore_lines(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Matching lines from stdin are suppressed when ignore_lines is set."""
+        monkeypatch.setattr("sys.stdin", io.StringIO("keep\n# skip\nkeep2\n"))
+        result = get_input_text("-", self._args(ignore_lines=r"#.*"))
+        assert "# skip" not in result
+        assert "keep" in result
+
+    def test_uses_encoding(self, tmp_path: Path) -> None:
+        """Non-UTF-8 files are decoded with the specified encoding."""
+        f = tmp_path / "latin.txt"
+        content = "caf\xe9"
+        f.write_bytes(content.encode("latin-1"))
+        result = get_input_text(str(f), self._args(encoding="latin-1"))
+        assert "caf" in result
+
+
+class TestReadProjectVersion:
+    """Tests for _read_project_version()."""
+
+    def test_reads_version_from_pyproject(self) -> None:
+        """Version string is read from the project's pyproject.toml."""
+        pyproject = Path(__file__).parent.parent.parent / "pyproject.toml"
+        version = _read_project_version(pyproject)
+        assert isinstance(version, str)
+        assert version.count(".") >= 1
diff --git a/tests/unit/test_config_unit.py b/tests/unit/test_config_unit.py
new file mode 100644
index 0000000..2005a0d
--- /dev/null
+++ b/tests/unit/test_config_unit.py
@@ -0,0 +1,235 @@
+"""Unit tests for config_file.py encoders, validators, and LanguageToolConfig."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from language_tool_python.config_file import (
+    LanguageToolConfig,
+    _bool_encoder,
+    _comma_list_encoder,
+    _encode_config,
+    _int_encoder,
+    _is_lang_key,
+    _number_encoder,
+    _path_encoder,
+    _path_validator,
+)
+from language_tool_python.exceptions import PathError
+
+
+class TestBoolEncoder:
+    """Tests for the _bool_encoder() function."""
+
+    def test_true(self) -> None:
+        """True is encoded as the string 'true'."""
+        assert _bool_encoder(v=True) == "true"
+
+    def test_false(self) -> None:
+        """False is encoded as the string 'false'."""
+        assert _bool_encoder(v=False) == "false"
+
+    def test_truthy_int(self) -> None:
+        """A truthy integer is encoded as 'true'."""
+        assert _bool_encoder(1) == "true"
+
+    def test_falsy_int(self) -> None:
+        """A falsy integer is encoded as 'false'."""
+        assert _bool_encoder(0) == "false"
+
+
+class TestIntEncoder:
+    """Tests for the _int_encoder() function."""
+
+    def test_positive(self) -> None:
+        """A positive integer is converted to its decimal string."""
+        assert _int_encoder(42) == "42"
+
+    def test_zero(self) -> None:
+        """Zero is converted to '0'."""
+        assert _int_encoder(0) == "0"
+
+
+class TestNumberEncoder:
+    """Tests for the _number_encoder() function."""
+
+    def test_integer(self) -> None:
+        """An integer value is rendered as a float string."""
+        assert _number_encoder(5) == "5.0"
+
+    def test_float(self) -> None:
+        """A float value is rendered with its decimal part."""
+        assert _number_encoder(3.14) == "3.14"
+
+
+class TestCommaListEncoder:
+    """Tests for the _comma_list_encoder() function."""
+
+    def test_string_passthrough(self) -> None:
+        """A plain string is returned unchanged."""
+        assert _comma_list_encoder("a,b,c") == "a,b,c"
+
+    def test_list_joined(self) -> None:
+        """A list of strings is joined with commas."""
+        assert _comma_list_encoder(["a", "b", "c"]) == "a,b,c"
+
+    def test_tuple_joined(self) -> None:
+        """A tuple of strings is joined with commas."""
+        assert _comma_list_encoder(("x", "y")) == "x,y"
+
+    def test_single_item(self) -> None:
+        """A single-element list returns the element without a comma."""
+        assert _comma_list_encoder(["only"]) == "only"
+
+
+class TestPathEncoder:
+    """Tests for the _path_encoder() function."""
+
+    def test_path_object(self, tmp_path: Path) -> None:
+        """A Path object is encoded to a string containing the path components."""
+        result = _path_encoder(tmp_path / "model")
+        assert "model" in result
+
+    def test_backslash_escaped(self) -> None:
+        """Windows backslashes in path strings are escaped or converted."""
+        p = Path("C:\\Users\\test\\model")
+        result = _path_encoder(p)
+        assert "\\\\" in result or "/" in result
+
+
+class TestPathValidator:
+    """Tests for the _path_validator() function."""
+
+    def test_existing_file(self, tmp_path: Path) -> None:
+        """An existing file path passes validation without error."""
+        f = tmp_path / "file.txt"
+        f.write_text("content")
+        _path_validator(f)
+
+    def test_existing_directory(self, tmp_path: Path) -> None:
+        """An existing directory path passes validation without error."""
+        _path_validator(tmp_path)
+
+    def test_nonexistent_raises(self, tmp_path: Path) -> None:
+        """A path that does not exist raises PathError."""
+        with pytest.raises(PathError, match="does not exist"):
+            _path_validator(tmp_path / "nonexistent.txt")
+
+
+class TestIsLangKey:
+    """Tests for the _is_lang_key() predicate."""
+
+    def test_lang_code_format(self) -> None:
+        """A key of the form 'lang-XX' is recognized as a language key."""
+        assert _is_lang_key("lang-en") is True
+
+    def test_lang_code_dict_path_format(self) -> None:
+        """A key of the form 'lang-XX-dictPath' is recognized as a language key."""
+        assert _is_lang_key("lang-en-dictPath") is True
+
+    def test_not_lang_prefix(self) -> None:
+        """A key without the 'lang-' prefix is not a language key."""
+        assert _is_lang_key("cacheSize") is False
+
+    def test_lang_only_no_code(self) -> None:
+        """'lang-' with no language code is not a valid language key."""
+        assert _is_lang_key("lang-") is False
+
+    def test_lang_too_many_parts(self) -> None:
+        """A key with more than three parts is not a valid language key."""
+        assert _is_lang_key("lang-en-dictPath-extra") is False
+
+
+class TestEncodeConfig:
+    """Tests for the _encode_config() dict encoder."""
+
+    def test_int_option(self) -> None:
+        """An integer option value is encoded as its decimal string."""
+        result = _encode_config({"cacheSize": 1000})
+        assert result == {"cacheSize": "1000"}
+
+    def test_bool_option(self) -> None:
+        """A boolean option value is encoded as 'true' or 'false'."""
+        result = _encode_config({"pipelineCaching": True})
+        assert result == {"pipelineCaching": "true"}
+
+    def test_number_option(self) -> None:
+        """A float option value is encoded as its float string."""
+        result = _encode_config({"maxErrorsPerWordRate": 0.5})
+        assert result == {"maxErrorsPerWordRate": "0.5"}
+
+    def test_list_option(self) -> None:
+        """A list option value is encoded as a comma-separated string."""
+        result = _encode_config({"blockedReferrers": ["a.com", "b.com"]})
+        assert result == {"blockedReferrers": "a.com,b.com"}
+
+    def test_lang_code_option(self) -> None:
+        """A language-code option is passed through without modification."""
+        result = _encode_config({"lang-en": "custom-word"})
+        assert result == {"lang-en": "custom-word"}
+
+    def test_lang_dict_path_option(self, tmp_path: Path) -> None:
+        """A language dict-path option is accepted when the path exists."""
+        result = _encode_config({"lang-en-dictPath": str(tmp_path)})
+        assert "lang-en-dictPath" in result
+
+    def test_unknown_key_raises(self) -> None:
+        """An unrecognized config key raises ValueError."""
+        with pytest.raises(ValueError, match="unexpected key"):
+            _encode_config({"unknownKey": "value"})
+
+    def test_wrong_type_raises(self) -> None:
+        """A value of the wrong type for a known key raises TypeError."""
+        with pytest.raises(TypeError, match="invalid type"):
+            _encode_config({"cacheSize": "not_an_int"})
+
+    def test_path_validator_called(self, tmp_path: Path) -> None:
+        """A path-type config option with a nonexistent path raises PathError."""
+        nonexistent = tmp_path / "no_such_model"
+        with pytest.raises(PathError, match="does not exist"):
+            _encode_config({"languageModel": str(nonexistent)})
+
+
+class TestLanguageToolConfig:
+    """Tests for the LanguageToolConfig class."""
+
+    def test_empty_config_raises(self) -> None:
+        """Constructing with an empty dict raises ValueError."""
+        with pytest.raises(ValueError, match="cannot be empty"):
+            LanguageToolConfig({})
+
+    def test_valid_config_creates_file(self) -> None:
+        """A valid config creates a temporary .properties file on disk."""
+        cfg = LanguageToolConfig({"cacheSize": 500})
+        assert cfg.path
+        assert Path(cfg.path).exists()
+
+    def test_config_file_content(self) -> None:
+        """The .properties file contains the expected key=value pair."""
+        cfg = LanguageToolConfig({"cacheSize": 500})
+        content = Path(cfg.path).read_text(encoding="utf-8")
+        assert "cacheSize=500" in content
+
+    def test_multiple_options(self) -> None:
+        """Multiple config options all appear in the .properties file."""
+        cfg = LanguageToolConfig({"cacheSize": 100, "pipelineCaching": True})
+        content = Path(cfg.path).read_text(encoding="utf-8")
+        assert "cacheSize=100" in content
+        assert "pipelineCaching=true" in content
+
+    def test_config_dict_stored(self) -> None:
+        """The encoded config is stored on the .config attribute."""
+        cfg = LanguageToolConfig({"cacheSize": 200})
+        assert cfg.config == {"cacheSize": "200"}
+
+    def test_boolean_config(self) -> None:
+        """A boolean config value is encoded as 'true' or 'false'."""
+        cfg = LanguageToolConfig({"premiumOnly": False})
+        assert cfg.config == {"premiumOnly": "false"}
+
+    def test_list_config(self) -> None:
+        """A list config value is encoded as a comma-separated string."""
+        cfg = LanguageToolConfig({"disabledRuleIds": ["RULE_A", "RULE_B"]})
+        assert cfg.config["disabledRuleIds"] == "RULE_A,RULE_B"
diff --git a/tests/unit/test_download_unit.py b/tests/unit/test_download_unit.py
new file mode 100644
index 0000000..9111496
--- /dev/null
+++ b/tests/unit/test_download_unit.py
@@ -0,0 +1,305 @@
+"""Unit tests for download_lt.py helpers (no network, no Java required).
+
+Note: test_download.py calls importlib.reload(download_lt) which invalidates
+static class imports. We access classes via the module object (updated in-place
+by reload) to ensure isinstance checks work regardless of test ordering.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING
+
+import pytest
+
+import language_tool_python.download_lt as _dl
+from language_tool_python.exceptions import PathError
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+_JAVA_8_MINOR = 8
+_JAVA_17_MAJOR = 17
+_JAVA_21_MAJOR = 21
+_SHA256_HEX_LENGTH = 64
+_KIBIBYTE = 1024
+
+
+def return_42(_: object) -> int:
+    """Return 42, used for monkeypatching."""
+    return 42
+
+
+class TestLoadsManifest:
+    """Tests for the _loads_manifest() TOML parser."""
+
+    def test_valid_toml_returns_dict(self) -> None:
+        """Valid TOML input returns a dict."""
+        result = _dl._loads_manifest('[hashes]\n"6.8" = "abc"\n')
+        assert isinstance(result, dict)
+
+    def test_empty_toml(self) -> None:
+        """Empty TOML input returns an empty dict."""
+        result = _dl._loads_manifest("")
+        assert result == {}
+
+
+class TestLoadExpectedDownloadSha256:
+    """Tests for _load_expected_download_sha256()."""
+
+    def test_valid_manifest(self) -> None:
+        """A well-formed hash entry is parsed to version → hash mapping."""
+        sha = "a" * _SHA256_HEX_LENGTH
+        result = _dl._load_expected_download_sha256(f'"6.8" = "{sha}"\n')
+        assert result["6.8"] == sha
+
+    def test_non_dict_raises(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """A manifest that does not parse to a dict raises PathError."""
+        monkeypatch.setattr(
+            "language_tool_python.download_lt._loads_manifest",
+            return_42,
+        )
+        with pytest.raises(PathError, match="expected a TOML table"):
+            _dl._load_expected_download_sha256("anything")
+
+    def test_non_string_value_raises(self) -> None:
+        """A non-string hash value in the manifest raises PathError."""
+        with pytest.raises(PathError, match="expected string keys and values"):
+            _dl._load_expected_download_sha256('"6.8" = 42\n')
+
+
+class TestValidateDownloadSize:
+    """Tests for the _validate_download_size() Content-Length checker."""
+
+    def test_none_returns_none(self) -> None:
+        """None input (missing header) returns None."""
+        assert _dl._validate_download_size(None) is None
+
+    def test_valid_size(self) -> None:
+        """A numeric size string is converted to an int."""
+        assert _dl._validate_download_size("1024") == _KIBIBYTE
+
+    def test_zero_is_valid(self) -> None:
+        """Zero is a valid content-length."""
+        assert _dl._validate_download_size("0") == 0
+
+    def test_invalid_string_raises(self) -> None:
+        """A non-numeric string raises PathError."""
+        with pytest.raises(PathError, match="Invalid Content-Length"):
+            _dl._validate_download_size("notanumber")
+
+    def test_negative_raises(self) -> None:
+        """A negative value raises PathError."""
+        with pytest.raises(PathError, match="Invalid Content-Length"):
+            _dl._validate_download_size("-1")
+
+    def test_too_large_raises(self) -> None:
+        """A size exceeding the maximum raises PathError."""
+        with pytest.raises(PathError, match="Refusing to download"):
+            _dl._validate_download_size(str(512 * 1024 * 1024 + 1))
+
+
+class TestParseJavaVersion:
+    """Tests for _parse_java_version() version string parsing."""
+
+    def test_old_format_quoted(self) -> None:
+        """The old 'java version "1.8.0_N"' format is parsed to (1, 8)."""
+        text = 'java version "1.8.0_292"'
+        major, minor = _dl._parse_java_version(text)
+        assert major == 1
+        assert minor == _JAVA_8_MINOR
+
+    def test_new_format_17(self) -> None:
+        """The new 'openjdk N.M.P' format is parsed to (17, 0)."""
+        text = "openjdk 17.0.1 2021-10-19"
+        major, minor = _dl._parse_java_version(text)
+        assert major == _JAVA_17_MAJOR
+        assert minor == 0
+
+    def test_new_format_21(self) -> None:
+        """The new quoted 'openjdk version "21.0.2"' format is parsed to (21, ...)."""
+        text = 'openjdk version "21.0.2" 2024-01-16'
+        major, _ = _dl._parse_java_version(text)
+        assert major == _JAVA_21_MAJOR
+
+    def test_unparseable_raises(self) -> None:
+        """A string that matches no known pattern causes SystemExit."""
+        with pytest.raises(SystemExit, match="Could not parse"):
+            _dl._parse_java_version("not a java version string")
+
+    def test_multiline_output(self) -> None:
+        """Multiline java -version output is parsed from the first line."""
+        text = (
+            'openjdk version "21.0.2" 2024-01-16\n'
+            "OpenJDK Runtime Environment (build 21.0.2+13)\n"
+            "OpenJDK 64-Bit Server VM (build 21.0.2+13, mixed mode, sharing)\n"
+        )
+        major, _ = _dl._parse_java_version(text)
+        assert major == _JAVA_21_MAJOR
+
+
+class TestLocalLanguageToolFromVersionName:
+    """Tests for LocalLanguageTool.from_version_name() factory method."""
+
+    def test_release_version(self) -> None:
+        """An 'X.Y' string returns a ReleaseLocalLanguageTool instance."""
+        lt = _dl.LocalLanguageTool.from_version_name("6.8")
+        assert isinstance(lt, _dl.ReleaseLocalLanguageTool)
+
+    def test_snapshot_date_version(self) -> None:
+        """A 'YYYYMMDD' string returns a SnapshotLocalLanguageTool instance."""
+        lt = _dl.LocalLanguageTool.from_version_name("20240101")
+        assert isinstance(lt, _dl.SnapshotLocalLanguageTool)
+
+    def test_snapshot_latest(self) -> None:
+        """'latest' returns a SnapshotLocalLanguageTool instance."""
+        lt = _dl.LocalLanguageTool.from_version_name("latest")
+        assert isinstance(lt, _dl.SnapshotLocalLanguageTool)
+
+    def test_unknown_format_raises(self) -> None:
+        """An unrecognized version string raises ValueError."""
+        with pytest.raises(ValueError, match="Unknown LanguageTool version"):
+            _dl.LocalLanguageTool.from_version_name("unknown-format")
+
+    def test_default_version(self) -> None:
+        """Calling without arguments returns the default release version."""
+        lt = _dl.LocalLanguageTool.from_version_name()
+        assert isinstance(lt, _dl.ReleaseLocalLanguageTool)
+
+
+class TestLocalLanguageToolFromPath:
+    """Tests for LocalLanguageTool.from_path() directory-name parser."""
+
+    def test_valid_release_path(self, tmp_path: Path) -> None:
+        """A 'LanguageTool-X.Y' directory name returns a ReleaseLocalLanguageTool."""
+        d = tmp_path / "LanguageTool-6.8"
+        lt = _dl.LocalLanguageTool.from_path(d)
+        assert isinstance(lt, _dl.ReleaseLocalLanguageTool)
+
+    def test_valid_snapshot_path(self, tmp_path: Path) -> None:
+        """A 'LanguageTool-YYYYMMDD' directory returns a SnapshotLocalLanguageTool."""
+        d = tmp_path / "LanguageTool-20240101"
+        lt = _dl.LocalLanguageTool.from_path(d)
+        assert isinstance(lt, _dl.SnapshotLocalLanguageTool)
+
+    def test_invalid_path_raises(self, tmp_path: Path) -> None:
+        """A directory name without the expected pattern raises ValueError."""
+        d = tmp_path / "not-a-lt-dir"
+        with pytest.raises(ValueError, match="Could not determine"):
+            _dl.LocalLanguageTool.from_path(d)
+
+
+class TestReleaseLocalLanguageTool:
+    """Tests for ReleaseLocalLanguageTool attributes and ordering."""
+
+    def test_version_name(self) -> None:
+        """The version_name attribute reflects the version given at construction."""
+        lt = _dl.ReleaseLocalLanguageTool("6.8")
+        assert lt.version_name == "6.8"
+
+    def test_eq(self) -> None:
+        """Two instances with the same version are equal."""
+        a = _dl.ReleaseLocalLanguageTool("6.8")
+        b = _dl.ReleaseLocalLanguageTool("6.8")
+        assert a == b
+
+    def test_neq(self) -> None:
+        """Instances with different versions are not equal."""
+        a = _dl.ReleaseLocalLanguageTool("6.8")
+        b = _dl.ReleaseLocalLanguageTool("6.7")
+        assert a != b
+
+    def test_lt(self) -> None:
+        """An older version is less than a newer version."""
+        old = _dl.ReleaseLocalLanguageTool("6.7")
+        new = _dl.ReleaseLocalLanguageTool("6.8")
+        assert old < new
+
+    def test_hash(self) -> None:
+        """Equal instances produce the same hash."""
+        a = _dl.ReleaseLocalLanguageTool("6.8")
+        b = _dl.ReleaseLocalLanguageTool("6.8")
+        assert hash(a) == hash(b)
+
+    def test_in_set(self) -> None:
+        """Duplicate instances collapse to one element in a set."""
+        s = {_dl.ReleaseLocalLanguageTool("6.8"), _dl.ReleaseLocalLanguageTool("6.8")}
+        assert len(s) == 1
+
+    def test_download_url_new_version(self) -> None:
+        """The download URL for a recent version contains the version string."""
+        lt = _dl.ReleaseLocalLanguageTool("6.8")
+        assert "6.8" in lt.download_url
+
+    def test_download_url_old_version_uses_archive(self) -> None:
+        """The download URL for an old version also contains the version string."""
+        lt = _dl.ReleaseLocalLanguageTool("4.0")
+        assert "4.0" in lt.download_url
+
+
+class TestSnapshotLocalLanguageTool:
+    """Tests for SnapshotLocalLanguageTool attributes and equality."""
+
+    def test_version_name_date(self) -> None:
+        """A date-format version name is stored as-is."""
+        lt = _dl.SnapshotLocalLanguageTool("20240101")
+        assert lt.version_name == "20240101"
+
+    def test_version_name_latest_expands_to_date(self) -> None:
+        """'latest' expands to an 8-digit date string."""
+        lt = _dl.SnapshotLocalLanguageTool("latest")
+        assert re.match(r"^\d{8}$", lt.version_name)
+
+    def test_eq(self) -> None:
+        """Two instances with the same date are equal."""
+        a = _dl.SnapshotLocalLanguageTool("20240101")
+        b = _dl.SnapshotLocalLanguageTool("20240101")
+        assert a == b
+
+    def test_neq(self) -> None:
+        """Instances with different dates are not equal."""
+        a = _dl.SnapshotLocalLanguageTool("20240101")
+        b = _dl.SnapshotLocalLanguageTool("20240201")
+        assert a != b
+
+    def test_hash(self) -> None:
+        """Equal instances produce the same hash."""
+        a = _dl.SnapshotLocalLanguageTool("20240101")
+        b = _dl.SnapshotLocalLanguageTool("20240101")
+        assert hash(a) == hash(b)
+
+
+class TestGetZipHash:
+    """Tests for _get_zip_hash() SHA-256 lookup."""
+
+    def test_bypass_env_returns_none_with_warning(
+        self, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        """LTP_BYPASS_VERIFIED_DOWNLOADS=true skips verification with a warning."""
+        monkeypatch.setenv("LTP_BYPASS_VERIFIED_DOWNLOADS", "true")
+        with pytest.warns(RuntimeWarning, match="bypassed"):
+            result = _dl._get_zip_hash("6.8")
+        assert result is None
+
+    def test_known_version_returns_hash(self) -> None:
+        """A version present in the integrity manifest returns a 64-char hex hash."""
+        if not _dl._EXPECTED_DOWNLOAD_SHA256:
+            pytest.skip("No known hashes in manifest")
+        version_name = next(iter(_dl._EXPECTED_DOWNLOAD_SHA256))
+        result = _dl._get_zip_hash(version_name)
+        assert result is not None
+        assert len(result) == _SHA256_HEX_LENGTH
+
+    def test_unknown_version_returns_none(
+        self, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        """A version absent from the manifest returns None."""
+        monkeypatch.delenv("LTP_BYPASS_VERIFIED_DOWNLOADS", raising=False)
+        result = _dl._get_zip_hash("0.0")
+        assert result is None
+
+    def test_invalid_hash_in_env_raises(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """An invalid SHA-256 value in LTP_DOWNLOAD_SHA256 raises PathError."""
+        monkeypatch.setenv("LTP_DOWNLOAD_SHA256", "not-a-valid-sha256")
+        with pytest.raises(PathError, match="Invalid SHA-256"):
+            _dl._get_zip_hash("6.8")
diff --git a/tests/unit/test_internals_utils.py b/tests/unit/test_internals_utils.py
new file mode 100644
index 0000000..7a41cb3
--- /dev/null
+++ b/tests/unit/test_internals_utils.py
@@ -0,0 +1,224 @@
+"""Unit tests for language_tool_python._internals.utils."""
+
+from __future__ import annotations
+
+import subprocess
+import sys
+import time
+from typing import TYPE_CHECKING
+
+import psutil
+import pytest
+
+from language_tool_python._internals.utils import (
+    get_env_float,
+    get_env_int,
+    get_language_tool_download_path,
+    get_locale_language,
+    kill_process_force,
+    parse_url,
+    version_tuple,
+)
+from language_tool_python.exceptions import PathError
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+_DEFAULT_INT = 42
+_ENV_INT_VALUE = 100
+_DEFAULT_FLOAT = 1.5
+
+
+class TestParseUrl:
+    """Tests for parse_url() scheme normalisation."""
+
+    def test_full_url_unchanged(self) -> None:
+        """A complete http URL is returned as-is."""
+        assert parse_url("http://localhost:8081") == "http://localhost:8081"
+
+    def test_https_url_unchanged(self) -> None:
+        """A complete https URL is returned as-is."""
+        assert parse_url("https://example.com") == "https://example.com"
+
+    def test_adds_http_scheme(self) -> None:
+        """A host:port string without a scheme gets http:// prepended."""
+        result = parse_url("localhost:8081")
+        assert result.startswith("http://")
+        assert "localhost" in result
+
+    def test_canonical_form(self) -> None:
+        """An already-complete URL with trailing slash is returned unchanged."""
+        assert parse_url("http://localhost:8081/") == "http://localhost:8081/"
+
+
+class TestGetEnvInt:
+    """Tests for get_env_int() environment variable reader."""
+
+    def test_returns_default_when_absent(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """The default is returned when the variable is not set."""
+        monkeypatch.delenv("TEST_INT_VAR", raising=False)
+        assert get_env_int("TEST_INT_VAR", _DEFAULT_INT) == _DEFAULT_INT
+
+    def test_reads_valid_value(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """A valid integer string in the environment is returned as an int."""
+        monkeypatch.setenv("TEST_INT_VAR", str(_ENV_INT_VALUE))
+        assert get_env_int("TEST_INT_VAR", 0) == _ENV_INT_VALUE
+
+    def test_raises_on_non_integer(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """A non-numeric string raises PathError."""
+        monkeypatch.setenv("TEST_INT_VAR", "notanint")
+        with pytest.raises(PathError, match="Invalid integer"):
+            get_env_int("TEST_INT_VAR", 0)
+
+    def test_raises_on_zero(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Zero is not a valid positive integer and raises PathError."""
+        monkeypatch.setenv("TEST_INT_VAR", "0")
+        with pytest.raises(PathError, match="Invalid integer"):
+            get_env_int("TEST_INT_VAR", 0)
+
+    def test_raises_on_negative(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """A negative integer string raises PathError."""
+        monkeypatch.setenv("TEST_INT_VAR", "-5")
+        with pytest.raises(PathError, match="Invalid integer"):
+            get_env_int("TEST_INT_VAR", 0)
+
+
+class TestGetEnvFloat:
+    """Tests for get_env_float() environment variable reader."""
+
+    def test_returns_default_when_absent(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """The default is returned when the variable is not set."""
+        monkeypatch.delenv("TEST_FLOAT_VAR", raising=False)
+        assert get_env_float("TEST_FLOAT_VAR", _DEFAULT_FLOAT) == _DEFAULT_FLOAT
+
+    def test_reads_valid_value(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """A valid float string is returned as a float."""
+        monkeypatch.setenv("TEST_FLOAT_VAR", "3.14")
+        assert get_env_float("TEST_FLOAT_VAR", 0.0) == pytest.approx(3.14)
+
+    def test_raises_on_non_float(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """A non-numeric string raises PathError."""
+        monkeypatch.setenv("TEST_FLOAT_VAR", "notafloat")
+        with pytest.raises(PathError, match="Invalid float"):
+            get_env_float("TEST_FLOAT_VAR", 0.0)
+
+    def test_raises_on_zero(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Zero is not a valid positive float and raises PathError."""
+        monkeypatch.setenv("TEST_FLOAT_VAR", "0.0")
+        with pytest.raises(PathError, match="Invalid float"):
+            get_env_float("TEST_FLOAT_VAR", 1.0)
+
+    def test_raises_on_negative(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """A negative float string raises PathError."""
+        monkeypatch.setenv("TEST_FLOAT_VAR", "-1.0")
+        with pytest.raises(PathError, match="Invalid float"):
+            get_env_float("TEST_FLOAT_VAR", 1.0)
+
+    def test_raises_on_inf(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Infinity is not a valid positive float and raises PathError."""
+        monkeypatch.setenv("TEST_FLOAT_VAR", "inf")
+        with pytest.raises(PathError, match="Invalid float"):
+            get_env_float("TEST_FLOAT_VAR", 1.0)
+
+
+class TestGetLanguageToolDownloadPath:
+    """Tests for get_language_tool_download_path() path resolver."""
+
+    def test_returns_path(
+        self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+    ) -> None:
+        """The returned path exists and is a directory."""
+        monkeypatch.setenv("LTP_PATH", str(tmp_path))
+        path = get_language_tool_download_path()
+        assert path.exists()
+        assert path.is_dir()
+
+    def test_creates_directory(
+        self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+    ) -> None:
+        """A non-existent directory under LTP_PATH is created on first use."""
+        new_dir = tmp_path / "new_subdir"
+        monkeypatch.setenv("LTP_PATH", str(new_dir))
+        path = get_language_tool_download_path()
+        assert path.exists()
+
+    def test_default_path_in_home(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Without LTP_PATH, the default path contains 'language_tool_python'."""
+        monkeypatch.delenv("LTP_PATH", raising=False)
+        path = get_language_tool_download_path()
+        assert "language_tool_python" in str(path)
+
+
+class TestGetLocaleLanguage:
+    """Tests for get_locale_language() system locale lookup."""
+
+    def test_returns_string(self) -> None:
+        """The function returns a non-empty string."""
+        result = get_locale_language()
+        assert isinstance(result, str)
+        assert len(result) > 0
+
+
+class TestKillProcessForce:
+    """Tests for kill_process_force() process terminator."""
+
+    def test_raises_when_no_args(self) -> None:
+        """Calling with neither pid nor proc raises ValueError."""
+        with pytest.raises(ValueError, match="Must pass either pid or proc"):
+            kill_process_force()
+
+    def test_kills_by_pid(self) -> None:
+        """A process is terminated when its pid is given."""
+        proc = subprocess.Popen(
+            [sys.executable, "-c", "import time; time.sleep(60)"],
+        )
+        kill_process_force(pid=proc.pid)
+        proc.wait(timeout=5)
+
+    def test_kills_by_proc(self) -> None:
+        """A process is terminated when a psutil.Process object is given."""
+        proc = subprocess.Popen(
+            [sys.executable, "-c", "import time; time.sleep(60)"],
+        )
+        ps_proc = psutil.Process(proc.pid)
+        kill_process_force(proc=ps_proc)
+        proc.wait(timeout=5)
+
+    def test_kills_process_with_children(self) -> None:
+        """A process and its children are all terminated."""
+        parent = subprocess.Popen(
+            [
+                sys.executable,
+                "-c",
+                (
+                    "import subprocess, sys, time; "
+                    "subprocess.Popen([sys.executable, '-c', "
+                    "'import time; time.sleep(60)']); "
+                    "time.sleep(60)"
+                ),
+            ],
+        )
+        time.sleep(0.3)
+        kill_process_force(pid=parent.pid)
+        parent.wait(timeout=10)
+
+    def test_nonexistent_pid_is_silent(self) -> None:
+        """A nonexistent pid is silently ignored."""
+        kill_process_force(pid=999999999)
+
+
+class TestVersionTuple:
+    """Tests for version_tuple() version string parser."""
+
+    def test_parses_version(self) -> None:
+        """A 'X.Y' version string is parsed to a (X, Y) int tuple."""
+        assert version_tuple("6.8") == (6, 8)
+
+    def test_parses_version_with_zeros(self) -> None:
+        """A 'X.0' version string is parsed correctly."""
+        assert version_tuple("4.0") == (4, 0)
+
+    def test_raises_on_invalid_format(self) -> None:
+        """A version string without a dot raises ValueError."""
+        with pytest.raises(ValueError, match="not enough values"):
+            version_tuple("invalid")
diff --git a/tests/unit/test_language_tag.py b/tests/unit/test_language_tag.py
new file mode 100644
index 0000000..7ac4d3e
--- /dev/null
+++ b/tests/unit/test_language_tag.py
@@ -0,0 +1,168 @@
+"""Unit tests for LanguageTag normalization and comparison."""
+
+import pytest
+
+from language_tool_python.language_tag import LanguageTag
+
+_LANGS = ["en-US", "en-GB", "en", "de-DE", "fr-FR", "pt-BR"]
+
+_SET_SIZE_TWO = 2
+
+
+def _tag(tag: str, languages: list[str] = _LANGS) -> LanguageTag:
+    """Construct a LanguageTag against _LANGS by default."""
+    return LanguageTag(tag, languages)
+
+
+class TestInit:
+    """Tests for basic LanguageTag initialization and normalization."""
+
+    def test_exact_match(self) -> None:
+        """An exact match in the language list is returned unchanged."""
+        lt = _tag("en-US")
+        assert lt.normalized_tag == "en-US"
+
+    def test_underscore_normalized_to_dash(self) -> None:
+        """Underscore locale separators are converted to dashes."""
+        lt = _tag("en_US")
+        assert lt.normalized_tag == "en-US"
+
+    def test_case_insensitive(self) -> None:
+        """Tag lookup is case-insensitive."""
+        lt = _tag("EN-us")
+        assert lt.normalized_tag == "en-US"
+
+    def test_tag_stored(self) -> None:
+        """The original (pre-normalization) tag is preserved."""
+        lt = _tag("en-US")
+        assert lt.tag == "en-US"
+
+    def test_languages_stored(self) -> None:
+        """The language list is accessible on the tag object."""
+        lt = _tag("en-US")
+        assert "en-US" in lt.languages
+
+
+class TestNormalizePosix:
+    """Tests for POSIX/C locale fallback behaviour."""
+
+    def test_c_locale_falls_back_to_en_us(self) -> None:
+        """'C' locale resolves to en-US when available."""
+        lt = _tag("C")
+        assert lt.normalized_tag == "en-US"
+
+    def test_posix_locale_falls_back_to_en_us(self) -> None:
+        """'POSIX' locale resolves to en-US when available."""
+        lt = _tag("POSIX")
+        assert lt.normalized_tag == "en-US"
+
+    def test_c_dot_variant(self) -> None:
+        """'C.UTF-8' resolves to en-US when available."""
+        lt = _tag("C.UTF-8")
+        assert lt.normalized_tag == "en-US"
+
+    def test_posix_prefers_en_gb_when_no_en_us(self) -> None:
+        """'C' locale falls back to en-GB when en-US is absent."""
+        lt = LanguageTag("C", ["en-GB", "fr-FR"])
+        assert lt.normalized_tag == "en-GB"
+
+    def test_posix_falls_to_en_when_no_en_us_or_gb(self) -> None:
+        """'C' locale falls back to bare 'en' when no regional variant exists."""
+        lt = LanguageTag("C", ["en", "fr-FR"])
+        assert lt.normalized_tag == "en"
+
+    def test_posix_raises_when_no_english(self) -> None:
+        """'C' locale raises ValueError when no English variant is available."""
+        with pytest.raises(ValueError, match="unsupported language"):
+            LanguageTag("C", ["de-DE", "fr-FR"])
+
+
+class TestNormalizeFallback:
+    """Tests for regex-based region-stripping fallback."""
+
+    def test_language_only_matches_base(self) -> None:
+        """A bare language code matches the base language entry."""
+        lt = _tag("en")
+        assert lt.normalized_tag == "en"
+
+    def test_regex_fallback_to_base_language(self) -> None:
+        """An exact-match tag is returned as-is."""
+        lt = _tag("pt-BR")
+        assert lt.normalized_tag == "pt-BR"
+
+    def test_regex_fallback_strips_region(self) -> None:
+        """A tag with an unavailable region falls back to the base language."""
+        lt = LanguageTag("en-AU", ["en", "de-DE"])
+        assert lt.normalized_tag == "en"
+
+    def test_empty_tag_raises(self) -> None:
+        """An empty tag string raises ValueError."""
+        with pytest.raises(ValueError, match="empty language tag"):
+            _tag("")
+
+    def test_unsupported_tag_raises(self) -> None:
+        """A tag with no match raises ValueError."""
+        with pytest.raises(ValueError, match="unsupported language"):
+            _tag("zz-ZZ")
+
+    def test_unmatched_pattern_raises(self) -> None:
+        """A non-language-like string raises ValueError."""
+        with pytest.raises(ValueError, match="unsupported language"):
+            _tag("123invalid")
+
+
+class TestComparisons:
+    """Tests for LanguageTag equality, ordering, and hashing."""
+
+    def test_eq_same_tag(self) -> None:
+        """Two tags with the same value are equal."""
+        assert _tag("en-US") == _tag("en-US")
+
+    def test_eq_with_string(self) -> None:
+        """A LanguageTag equals its normalized string."""
+        assert _tag("en-US") == "en-US"
+
+    def test_eq_not_equal(self) -> None:
+        """Tags with different values are not equal."""
+        assert _tag("en-US") != _tag("de-DE")
+
+    def test_eq_not_implemented_for_non_str(self) -> None:
+        """Comparing with a non-string returns NotImplemented."""
+        assert _tag("en-US").__eq__(42) is NotImplemented
+
+    def test_lt_ordering(self) -> None:
+        """Tags are ordered lexicographically by their normalized value."""
+        assert _tag("de-DE") < _tag("en-US")
+
+    def test_lt_not_implemented_for_non_str(self) -> None:
+        """Less-than comparison with a non-string returns NotImplemented."""
+        assert _tag("en-US").__lt__(42) is NotImplemented
+
+    def test_hash_equal_tags(self) -> None:
+        """Equal tags produce the same hash."""
+        assert hash(_tag("en-US")) == hash(_tag("en-US"))
+
+    def test_hash_different_tags(self) -> None:
+        """Different tags produce different hashes (high probability)."""
+        assert hash(_tag("en-US")) != hash(_tag("de-DE"))
+
+    def test_in_set(self) -> None:
+        """Two distinct tags result in a two-element set."""
+        s = {_tag("en-US"), _tag("de-DE")}
+        assert len(s) == _SET_SIZE_TWO
+
+
+class TestStrRepr:
+    """Tests for LanguageTag string representations."""
+
+    def test_str_returns_normalized(self) -> None:
+        """str() returns the normalized tag."""
+        assert str(_tag("en-US")) == "en-US"
+
+    def test_repr_format(self) -> None:
+        """repr() uses the canonical angle-bracket format."""
+        assert repr(_tag("en-US")) == '<LanguageTag "en-US">'
+
+    def test_total_ordering_gt(self) -> None:
+        """Greater-than comparison works via total_ordering."""
+        assert _tag("en-US") > _tag("de-DE")
diff --git a/tests/unit/test_match.py b/tests/unit/test_match.py
new file mode 100644
index 0000000..fdccbc8
--- /dev/null
+++ b/tests/unit/test_match.py
@@ -0,0 +1,421 @@
+"""Unit tests for the Match class and related helpers."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+from language_tool_python.match import (
+    Match,
+    _four_byte_char_positions,
+    _get_match_ordered_dict,
+    is_check_match,
+)
+
+if TYPE_CHECKING:
+    from language_tool_python._internals.api_types import CheckMatch
+
+_DEFAULT_OFFSET = 8
+_DEFAULT_LENGTH = 4
+_DEFAULT_CONTEXT_OFFSET = 8
+_NUM_MATCH_FIELDS = 10
+
+
+def _make_attrib(  # noqa: PLR0913
+    *,
+    message: str = "Possible spelling mistake.",
+    short_message: str = "Spelling mistake",
+    replacements: list[str] | None = None,
+    offset: int = 8,
+    length: int = 4,
+    context_text: str = "This is noot okay.",
+    context_offset: int = 8,
+    sentence: str = "This is noot okay.",
+    rule_id: str = "MORFOLOGIK_RULE_EN_US",
+    rule_desc: str = "Possible spelling mistake",
+    issue_type: str = "misspelling",
+    category_id: str = "TYPOS",
+    category_name: str = "Possible Typo",
+) -> CheckMatch:
+    repl_list: list[str] = replacements if replacements is not None else ["not", "noon"]
+    return {
+        "message": message,
+        "shortMessage": short_message,
+        "replacements": [{"value": r} for r in repl_list],
+        "offset": offset,
+        "length": length,
+        "context": {"text": context_text, "offset": context_offset, "length": length},
+        "sentence": sentence,
+        "type": {"typeName": "Other"},
+        "rule": {
+            "id": rule_id,
+            "description": rule_desc,
+            "issueType": issue_type,
+            "category": {"id": category_id, "name": category_name},
+        },
+        "ignoreForIncompleteSentence": False,
+        "contextForSureMatch": 0,
+    }
+
+
+def _make_match(text: str = "This is noot okay.", **kwargs: object) -> Match:
+    return Match(_make_attrib(**kwargs), text)  # type: ignore[arg-type]
+
+
+class TestMatchInit:
+    """Tests for Match.__init__() attribute mapping."""
+
+    def test_basic_attributes(self) -> None:
+        """Default attributes are populated correctly from the attrib dict."""
+        m = _make_match()
+        assert m.rule_id == "MORFOLOGIK_RULE_EN_US"
+        assert m.message == "Possible spelling mistake."
+        assert m.replacements == ["not", "noon"]
+        assert m.offset == _DEFAULT_OFFSET
+        assert m.error_length == _DEFAULT_LENGTH
+        assert m.category == "TYPOS"
+        assert m.rule_issue_type == "misspelling"
+        assert m.sentence == "This is noot okay."
+
+    def test_context_attributes(self) -> None:
+        """Context text and offset are set from the nested context dict."""
+        m = _make_match()
+        assert m.context == "This is noot okay."
+        assert m.offset_in_context == _DEFAULT_CONTEXT_OFFSET
+
+    def test_unicode_normalization(self) -> None:
+        """Message text is NFKC-normalized on construction."""
+        # "ﬁ" (U+FB01 LATIN SMALL LIGATURE FI) → "fi"
+        m = _make_match(message="ﬁnd the error")
+        assert m.message == "find the error"
+
+    def test_four_byte_char_adjustment(self) -> None:
+        """A 4-byte emoji before the match shifts the Python offset by 1."""
+        # "🌅" at position 0 is 1 Python char but 2 Java chars
+        # Java offset 3 → Python offset 2 ("🌅 he" → 'h' is at index 2)
+        text = "🌅 hello world"
+        attrib = _make_attrib(
+            offset=3,
+            length=5,
+            context_text="🌅 hello world",
+            context_offset=3,
+            sentence="🌅 hello world",
+        )
+        m = Match(attrib, text)
+        adjusted_offset = 2
+        assert m.offset == adjusted_offset
+
+    def test_no_adjustment_without_four_byte_chars(self) -> None:
+        """Offsets are unchanged when no 4-byte characters precede the match."""
+        text = "Hello world today"
+        expected_offset = 6
+        m = Match(
+            _make_attrib(
+                offset=expected_offset,
+                length=5,
+                context_text=text,
+                context_offset=expected_offset,
+                sentence=text,
+            ),
+            text,
+        )
+        assert m.offset == expected_offset
+
+    def test_same_text_reuses_cache(self) -> None:
+        """Two matches on the same text share the cached position list."""
+        text = "Same text here."
+        explicit_offset = 5
+        m1 = Match(_make_attrib(context_text=text, sentence=text), text)
+        m2 = Match(
+            _make_attrib(
+                context_text=text,
+                sentence=text,
+                offset=explicit_offset,
+                length=_DEFAULT_LENGTH,
+                context_offset=explicit_offset,
+            ),
+            text,
+        )
+        assert text == Match.PREVIOUS_MATCHES_TEXT
+        assert m1.offset == _DEFAULT_OFFSET
+        assert m2.offset == explicit_offset
+
+
+class TestFourByteCharPositions:
+    """Tests for _four_byte_char_positions() helper."""
+
+    def test_empty_string(self) -> None:
+        """An empty string has no 4-byte char positions."""
+        assert _four_byte_char_positions("") == []
+
+    def test_ascii_only(self) -> None:
+        """A pure-ASCII string has no 4-byte char positions."""
+        assert _four_byte_char_positions("hello") == []
+
+    def test_emoji_at_start(self) -> None:
+        """An emoji at position 0 is reported at index 0."""
+        assert _four_byte_char_positions("🌅abc") == [0]
+
+    def test_multiple_emojis(self) -> None:
+        """Two consecutive emojis are reported at their Python indices."""
+        positions = _four_byte_char_positions("🌅🎉abc")
+        assert positions == [0, 2]
+
+    def test_emoji_in_middle(self) -> None:
+        """An emoji in the middle of ASCII text is reported at the correct index."""
+        positions = _four_byte_char_positions("ab🌅cd")
+        assert positions == [2]
+
+
+class TestMatchOrderedDict:
+    """Tests for _get_match_ordered_dict() field-type registry."""
+
+    def test_returns_all_keys(self) -> None:
+        """All expected field names are returned in order."""
+        d = _get_match_ordered_dict()
+        expected_keys = [
+            "rule_id",
+            "message",
+            "replacements",
+            "offset_in_context",
+            "context",
+            "offset",
+            "error_length",
+            "category",
+            "rule_issue_type",
+            "sentence",
+        ]
+        assert list(d.keys()) == expected_keys
+
+    def test_value_types(self) -> None:
+        """Field types are the expected Python built-ins."""
+        d = _get_match_ordered_dict()
+        assert d["offset"] is int
+        assert d["rule_id"] is str
+        assert d["replacements"] is list
+
+
+class TestIsCheckMatch:
+    """Tests for the is_check_match() type-guard."""
+
+    def test_valid_check_match(self) -> None:
+        """A fully populated attrib dict is recognised as a CheckMatch."""
+        assert is_check_match(_make_attrib())
+
+    def test_not_dict(self) -> None:
+        """Non-dict values are rejected."""
+        assert not is_check_match("not a dict")
+        assert not is_check_match(None)
+        assert not is_check_match(42)
+
+    def test_missing_field(self) -> None:
+        """A dict missing a required field is rejected."""
+        attrib = dict(_make_attrib())
+        del attrib["message"]
+        assert not is_check_match(attrib)
+
+    def test_wrong_type(self) -> None:
+        """A dict with a field of the wrong type is rejected."""
+        attrib = dict(_make_attrib())
+        attrib["offset"] = "not_an_int"
+        assert not is_check_match(attrib)
+
+
+class TestMatchStr:
+    """Tests for Match.__str__() human-readable formatter."""
+
+    def test_str_contains_rule_id(self) -> None:
+        """The rule ID is present in the string representation."""
+        m = _make_match()
+        s = str(m)
+        assert "MORFOLOGIK_RULE_EN_US" in s
+
+    def test_str_contains_message(self) -> None:
+        """The error message is present in the string representation."""
+        m = _make_match()
+        assert "Possible spelling mistake" in str(m)
+
+    def test_str_contains_suggestions(self) -> None:
+        """Replacement suggestions are present in the string representation."""
+        m = _make_match()
+        assert "not" in str(m)
+
+    def test_str_no_message_skips_message_line(self) -> None:
+        """A match with no message omits the Message line."""
+        m = _make_match(message="")
+        assert "Message" not in str(m)
+
+    def test_str_no_replacements_skips_suggestion(self) -> None:
+        """A match with no replacements omits the Suggestion line."""
+        m = _make_match(replacements=[])
+        assert "Suggestion" not in str(m)
+
+
+class TestMatchRepr:
+    """Tests for Match.__repr__() machine-readable formatter."""
+
+    def test_repr_contains_class_name(self) -> None:
+        """The class name 'Match(' appears in the repr."""
+        m = _make_match()
+        assert "Match(" in repr(m)
+
+    def test_repr_contains_rule_id(self) -> None:
+        """The rule ID appears in the repr."""
+        m = _make_match()
+        assert "MORFOLOGIK_RULE_EN_US" in repr(m)
+
+
+class TestMatchedText:
+    """Tests for the matched_text property."""
+
+    def test_matched_text_extracts_correctly(self) -> None:
+        """matched_text returns the exact text slice at offset/length."""
+        m = _make_match()
+        assert m.matched_text == "noot"
+
+
+class TestGetLineAndColumn:
+    """Tests for Match.get_line_and_column()."""
+
+    def test_single_line(self) -> None:
+        """A single-line text returns line 1 and a positive column."""
+        text = "This is noot okay."
+        m = _make_match(text=text)
+        line, col = m.get_line_and_column(text)
+        assert line == 1
+        assert col > 0
+
+    def test_context_not_in_text_raises(self) -> None:
+        """Passing unrelated text raises ValueError."""
+        m = _make_match()
+        with pytest.raises(ValueError, match="does not match the context"):
+            m.get_line_and_column("completely different text here blah blah")
+
+
+class TestSelectReplacement:
+    """Tests for Match.select_replacement() replacement narrower."""
+
+    def test_select_valid_index(self) -> None:
+        """Selecting index 1 keeps only the second replacement."""
+        m = _make_match()
+        m.select_replacement(1)
+        assert m.replacements == ["noon"]
+
+    def test_select_first(self) -> None:
+        """Selecting index 0 keeps only the first replacement."""
+        m = _make_match()
+        m.select_replacement(0)
+        assert m.replacements == ["not"]
+
+    def test_negative_index_raises(self) -> None:
+        """A negative index raises ValueError."""
+        m = _make_match()
+        with pytest.raises(ValueError, match="numbered from 0"):
+            m.select_replacement(-1)
+
+    def test_out_of_bounds_raises(self) -> None:
+        """An out-of-range index raises ValueError."""
+        m = _make_match()
+        with pytest.raises(ValueError, match="numbered from 0"):
+            m.select_replacement(99)
+
+    def test_no_replacements_raises(self) -> None:
+        """Selecting when there are no replacements raises ValueError."""
+        m = _make_match(replacements=[])
+        with pytest.raises(ValueError, match="no suggestions"):
+            m.select_replacement(0)
+
+
+class TestMatchComparisons:
+    """Tests for Match equality, ordering, and NotImplemented handling."""
+
+    def test_eq_equal_matches(self) -> None:
+        """Two matches built from the same attrib dict are equal."""
+        m1 = _make_match()
+        m2 = _make_match()
+        assert m1 == m2
+
+    def test_eq_different_offset(self) -> None:
+        """Matches with different offsets are not equal."""
+        m1 = _make_match()
+        m2 = _make_match(offset=0, context_offset=0)
+        assert m1 != m2
+
+    def test_eq_not_implemented_for_non_match(self) -> None:
+        """Comparing a Match with a non-Match returns NotImplemented."""
+        m = _make_match()
+        assert m.__eq__("not a match") is NotImplemented
+
+    def test_lt(self) -> None:
+        """A match at an earlier offset is less than one at a later offset."""
+        text = "This is noot okay, and also baaad."
+        m_early = Match(
+            _make_attrib(
+                offset=0,
+                length=_DEFAULT_LENGTH,
+                context_text=text,
+                context_offset=0,
+                sentence=text,
+            ),
+            text,
+        )
+        m_later = Match(
+            _make_attrib(
+                offset=_DEFAULT_OFFSET,
+                length=_DEFAULT_LENGTH,
+                context_text=text,
+                context_offset=_DEFAULT_OFFSET,
+                sentence=text,
+            ),
+            text,
+        )
+        assert m_early < m_later
+
+    def test_lt_not_implemented_for_non_match(self) -> None:
+        """Less-than comparison with a non-Match returns NotImplemented."""
+        m = _make_match()
+        assert m.__lt__("not a match") is NotImplemented
+
+
+class TestMatchIter:
+    """Tests for Match.__iter__() field-value iterator."""
+
+    def test_iter_yields_all_values(self) -> None:
+        """Iterating a match yields exactly _NUM_MATCH_FIELDS values."""
+        m = _make_match()
+        values = list(m)
+        assert len(values) == _NUM_MATCH_FIELDS
+
+    def test_iter_first_is_rule_id(self) -> None:
+        """The first value yielded by the iterator is the rule_id."""
+        m = _make_match()
+        assert next(iter(m)) == "MORFOLOGIK_RULE_EN_US"
+
+
+class TestMatchSetAttr:
+    """Tests for Match.__setattr__() type-coercing setter."""
+
+    def test_setattr_known_key_coerces_type(self) -> None:
+        """Setting a known field with a string coerces it to the declared type."""
+        m = _make_match()
+        new_offset = 5
+        m.offset = "5"  # type: ignore[assignment]
+        assert m.offset == new_offset
+        assert isinstance(m.offset, int)
+
+    def test_setattr_unknown_key_is_ignored(self) -> None:
+        """Setting an unknown field is silently ignored."""
+        m = _make_match()
+        m.__setattr__("nonexistent_key", "value")
+        assert not hasattr(m, "nonexistent_key")
+
+
+class TestMatchGetAttr:
+    """Tests for Match.__getattr__() unknown-attribute guard."""
+
+    def test_getattr_unknown_key_raises(self) -> None:
+        """Accessing an unknown attribute raises AttributeError."""
+        m = _make_match()
+        with pytest.raises(AttributeError, match="no attribute"):
+            _ = m.completely_unknown
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
new file mode 100644
index 0000000..aa44343
--- /dev/null
+++ b/tests/unit/test_utils.py
@@ -0,0 +1,218 @@
+"""Unit tests for language_tool_python.utils (classify_matches, correct)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+from language_tool_python.match import Match
+from language_tool_python.utils import TextStatus, classify_matches, correct
+
+if TYPE_CHECKING:
+    from language_tool_python._internals.api_types import CheckMatch
+
+
+def _make_match(
+    rule_id: str = "RULE",
+    offset: int = 0,
+    length: int = 4,
+    replacements: list[str] | None = None,
+) -> Match:
+    attrib: CheckMatch = {
+        "message": "Error",
+        "shortMessage": "",
+        "replacements": [{"value": r} for r in (replacements or [])],
+        "offset": offset,
+        "length": length,
+        "context": {"text": "text here.", "offset": offset, "length": length},
+        "sentence": "text here.",
+        "type": {"typeName": "Other"},
+        "rule": {
+            "id": rule_id,
+            "description": "desc",
+            "issueType": "misspelling",
+            "category": {"id": "TYPOS", "name": "Typos"},
+        },
+        "ignoreForIncompleteSentence": False,
+        "contextForSureMatch": 0,
+    }
+    return Match(attrib, "text here.")
+
+
+class TestClassifyMatches:
+    """Tests for classify_matches() match-set status classifier."""
+
+    def test_no_matches_returns_correct(self) -> None:
+        """An empty match list is classified as CORRECT."""
+        assert classify_matches([]) == TextStatus.CORRECT
+
+    def test_matches_with_replacements_returns_faulty(self) -> None:
+        """A match that has a replacement is classified as FAULTY."""
+        m = _make_match(replacements=["fix"])
+        assert classify_matches([m]) == TextStatus.FAULTY
+
+    def test_matches_without_replacements_returns_garbage(self) -> None:
+        """A match without any replacement is classified as GARBAGE."""
+        m = _make_match(replacements=[])
+        assert classify_matches([m]) == TextStatus.GARBAGE
+
+    def test_mixed_filters_to_faulty(self) -> None:
+        """A mix of matches with and without replacements is classified as FAULTY."""
+        m_with = _make_match(replacements=["fix"])
+        m_without = _make_match(replacements=[])
+        assert classify_matches([m_with, m_without]) == TextStatus.FAULTY
+
+    def test_all_without_replacements_is_garbage(self) -> None:
+        """Multiple matches all lacking replacements are classified as GARBAGE."""
+        matches = [_make_match(replacements=[]) for _ in range(3)]
+        assert classify_matches(matches) == TextStatus.GARBAGE
+
+
+class TestCorrect:
+    """Tests for correct() auto-correction function."""
+
+    def test_no_matches_returns_unchanged(self) -> None:
+        """Text with no matches is returned unchanged."""
+        assert correct("hello world", []) == "hello world"
+
+    def test_single_correction(self) -> None:
+        """A single match with a replacement is applied to the text."""
+        m = _make_match(offset=0, length=4, replacements=["text"])
+        result = correct("text here.", [m])
+        assert result == "text here."
+
+    def test_correction_replaces_error(self) -> None:
+        """A misspelled word is replaced by the first suggested correction."""
+        text = "Helo world"
+        attrib: CheckMatch = {
+            "message": "Misspelling",
+            "shortMessage": "",
+            "replacements": [{"value": "Hello"}],
+            "offset": 0,
+            "length": 4,
+            "context": {"text": text, "offset": 0, "length": 4},
+            "sentence": text,
+            "type": {"typeName": "Other"},
+            "rule": {
+                "id": "SPELL",
+                "description": "Spelling",
+                "issueType": "misspelling",
+                "category": {"id": "TYPOS", "name": "Typos"},
+            },
+            "ignoreForIncompleteSentence": False,
+            "contextForSureMatch": 0,
+        }
+        m = Match(attrib, text)
+        result = correct(text, [m])
+        assert result == "Hello world"
+
+    def test_match_without_replacement_is_skipped(self) -> None:
+        """A match with no replacement leaves the text unchanged."""
+        m = _make_match(offset=0, length=4, replacements=[])
+        assert correct("text here.", [m]) == "text here."
+
+    def test_overlapping_match_skips_mismatched_error(self) -> None:
+        """The second of two overlapping matches is skipped when offset drifts."""
+        # First match replaces "aa" (offset 0, len 2) with "xxxxxx" (longer).
+        # Second match overlaps at offset 1, len 2 ("ab"). After the first
+        # replacement expands the text, the second match's expected text no
+        # longer sits at the right position → continue branch is hit.
+        text = "aabbc"
+        attrib1: CheckMatch = {
+            "message": "e",
+            "shortMessage": "",
+            "replacements": [{"value": "xxxxxx"}],
+            "offset": 0,
+            "length": 2,
+            "context": {"text": text, "offset": 0, "length": 2},
+            "sentence": text,
+            "type": {"typeName": "Other"},
+            "rule": {
+                "id": "R",
+                "description": "d",
+                "issueType": "misspelling",
+                "category": {"id": "C", "name": "C"},
+            },
+            "ignoreForIncompleteSentence": False,
+            "contextForSureMatch": 0,
+        }
+        attrib2: CheckMatch = {
+            "message": "e",
+            "shortMessage": "",
+            "replacements": [{"value": "y"}],
+            "offset": 1,
+            "length": 2,
+            "context": {"text": text, "offset": 1, "length": 2},
+            "sentence": text,
+            "type": {"typeName": "Other"},
+            "rule": {
+                "id": "R",
+                "description": "d",
+                "issueType": "misspelling",
+                "category": {"id": "C", "name": "C"},
+            },
+            "ignoreForIncompleteSentence": False,
+            "contextForSureMatch": 0,
+        }
+        m1 = Match(attrib1, text)
+        m2 = Match(attrib2, text)
+        result = correct(text, [m1, m2])
+        assert result == "xxxxxxbbc"
+
+    def test_correct_adjusts_offset_for_length_change(self) -> None:
+        """A length-changing replacement shifts the offset for subsequent matches."""
+        text = "A b c"
+        attrib1: CheckMatch = {
+            "message": "err",
+            "shortMessage": "",
+            "replacements": [{"value": "AAA"}],
+            "offset": 0,
+            "length": 1,
+            "context": {"text": text, "offset": 0, "length": 1},
+            "sentence": text,
+            "type": {"typeName": "Other"},
+            "rule": {
+                "id": "R",
+                "description": "d",
+                "issueType": "misspelling",
+                "category": {"id": "C", "name": "C"},
+            },
+            "ignoreForIncompleteSentence": False,
+            "contextForSureMatch": 0,
+        }
+        attrib2: CheckMatch = {
+            "message": "err",
+            "shortMessage": "",
+            "replacements": [{"value": "BBB"}],
+            "offset": 2,
+            "length": 1,
+            "context": {"text": text, "offset": 2, "length": 1},
+            "sentence": text,
+            "type": {"typeName": "Other"},
+            "rule": {
+                "id": "R",
+                "description": "d",
+                "issueType": "misspelling",
+                "category": {"id": "C", "name": "C"},
+            },
+            "ignoreForIncompleteSentence": False,
+            "contextForSureMatch": 0,
+        }
+        m1 = Match(attrib1, text)
+        m2 = Match(attrib2, text)
+        result = correct(text, [m1, m2])
+        assert result == "AAA BBB c"
+
+
+@pytest.mark.parametrize(
+    ("status", "value"),
+    [
+        (TextStatus.CORRECT, "correct"),
+        (TextStatus.FAULTY, "faulty"),
+        (TextStatus.GARBAGE, "garbage"),
+    ],
+)
+def test_text_status_values(status: TextStatus, value: str) -> None:
+    """TextStatus enum values match expected strings."""
+    assert status.value == value