diff --git a/README.rst b/README.rst index da5ff17..3ac9832 100644 --- a/README.rst +++ b/README.rst @@ -26,7 +26,7 @@ Supported compression formats are: - gzip (``.gz``) - bzip2 (``.bz2``) - xz (``.xz``) -- Zstandard (``.zst``) (optional) +- Zstandard (``.zst``) Example usage @@ -164,20 +164,19 @@ bzip2 and xz compression methods do not store timestamps in the file headers, so output from them is also reproducible. -Optional Zstandard support --------------------------- +Zstandard support +----------------- -For reading and writing Zstandard (``.zst``) files, either the ``zstd`` command-line -program or the Python ``zstandard`` package needs to be installed. +For reading and writing Zstandard (``.zst``) files, +either the ``zstd`` command-line program, +the ``backports.zstd`` package or (on Python 3.14+), +the ``compression.zstd`` module in the standard library is used. * If the ``threads`` parameter to ``xopen()`` is ``None`` (the default) or any value greater than 0, ``xopen`` uses an external ``zstd`` process. * If the above fails (because no ``zstd`` program is available) or if ``threads`` is 0, the ``zstandard`` package is used. -To ensure that you get the correct ``zstandard`` version, you can specify the ``zstd`` extra for -``xopen``, that is, install it using ``pip install xopen[zstd]``. - Changelog --------- @@ -185,8 +184,15 @@ Changelog development version ~~~~~~~~~~~~~~~~~~~ +* Zstandard is now supported by using ``compression.zstd``, which is part of the + Python standard library since Python 3.14. + On Python versions before 3.14, ``backports.zstd`` is used instead. +* Zstandard support is no longer optional. + That is, it is no longer necessary to install ``xopen`` with the ``zstd`` extra. + The reason Zstandard was optional was that ``python-zstandard`` are quite large, + but ``backports.zstd`` wheels are much smaller. * Dropped support for Python 3.8 and 3.9 -* Started supporting Python 3.13 +* Started supporting Python 3.13 and 3.14 (including free-threaded) v2.0.2 (2024-06-12) ~~~~~~~~~~~~~~~~~~~ diff --git a/pyproject.toml b/pyproject.toml index afbf201..ec0f923 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,8 @@ requires-python = ">=3.10" dynamic = ["version"] dependencies = [ 'isal>=1.6.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', - 'zlib-ng>=0.4.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"' + 'zlib-ng>=0.4.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', + 'backports.zstd; python_version < "3.14"', ] [project.urls] @@ -27,7 +28,7 @@ homepage = "https://github.com/pycompression/xopen/" [project.optional-dependencies] dev = ["pytest"] -zstd = ["zstandard<1"] +zstd = [] # Leave this in here for backwards compatibility (Zstandard support used to be optional) [tool.setuptools_scm] write_to = "src/xopen/_version.py" diff --git a/src/xopen/__init__.py b/src/xopen/__init__.py index 1348ea9..756d10c 100644 --- a/src/xopen/__init__.py +++ b/src/xopen/__init__.py @@ -65,10 +65,10 @@ gzip_ng_threaded = None zlib_ng = None -try: - import zstandard # type: ignore -except ImportError: - zstandard = None # type: ignore +if sys.version_info >= (3, 14): + from compression import zstd +else: + from backports import zstd try: import fcntl @@ -509,8 +509,8 @@ def _open_zst( assert compresslevel != 0 if compresslevel is None: compresslevel = XOPEN_DEFAULT_ZST_COMPRESSION - if zstandard: - max_window_bits = zstandard.WINDOWLOG_MAX + if zstd: + max_window_bits = zstd.DecompressionParameter.window_log_max.bounds()[1] else: max_window_bits = 31 if threads != 0: @@ -531,15 +531,22 @@ def _open_zst( _ProgramSettings(program_args, tuple(range(1, 20)), "-T"), ) except OSError: - if zstandard is None: + if zstd is None: # No fallback available raise - if zstandard is None: - raise ImportError("zstandard module (python-zstandard) not available") - dctx = zstandard.ZstdDecompressor(max_window_size=2**max_window_bits) - cctx = zstandard.ZstdCompressor(level=compresslevel) - f = zstandard.open(filename, mode, cctx=cctx, dctx=dctx) # type: ignore + if zstd is None: + raise ImportError("zstd module not available") + + if "r" in mode: + level = None + options = { + zstd.DecompressionParameter.window_log_max: max_window_bits, + } + else: + level = compresslevel + options = None + f = zstd.open(filename, mode, options=options, level=level) # type: ignore if mode == "rb": return io.BufferedReader(f) return io.BufferedWriter(f) # mode "ab" and "wb" diff --git a/tests/test_xopen.py b/tests/test_xopen.py index 9e8f816..8f96db7 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -18,10 +18,10 @@ from xopen import xopen, _detect_format_from_content -try: - import zstandard -except ImportError: - zstandard = None +if sys.version_info >= (3, 14): + from compression import zstd +else: + from backports import zstd # TODO this is duplicated in test_piped.py @@ -29,7 +29,7 @@ CONTENT_LINES = ["Testing, testing ...\n", "The second line.\n"] CONTENT = "".join(CONTENT_LINES) extensions = ["", ".gz", ".bz2", ".xz"] -if shutil.which("zstd") or zstandard: +if shutil.which("zstd") or zstd: extensions += [".zst"] base = os.path.join(os.path.dirname(__file__), "file.txt") files = [base + ext for ext in extensions] @@ -107,7 +107,7 @@ def test_binary(fname): @pytest.mark.parametrize("mode", ["b", "", "t"]) @pytest.mark.parametrize("threads", [None, 0]) def test_roundtrip(ext, tmp_path, threads, mode): - if ext == ".zst" and threads == 0 and zstandard is None: + if ext == ".zst" and threads == 0 and zstd is None: return path = tmp_path / f"file{ext}" data = b"Hello" if mode == "b" else "Hello" @@ -118,7 +118,7 @@ def test_roundtrip(ext, tmp_path, threads, mode): def test_binary_no_isal_no_threads(fname, xopen_without_igzip): - if fname.endswith(".zst") and zstandard is None: + if fname.endswith(".zst") and zstd is None: return with xopen_without_igzip(fname, "rb", threads=0) as f: lines = list(f) @@ -268,8 +268,8 @@ def test_invalid_compression_level(tmp_path): @pytest.mark.parametrize("ext", extensions) @pytest.mark.parametrize("threads", (0, 1)) def test_append(ext, threads, tmp_path): - if ext == ".zst" and zstandard is None and threads == 0: - pytest.skip("No zstandard installed") + if ext == ".zst" and zstd is None and threads == 0: + pytest.skip("No zstd installed") text = b"AB" reference = text + text path = tmp_path / f"the-file{ext}" @@ -367,7 +367,7 @@ def test_read_no_threads(ext): ".zst": io.BufferedReader, "": io.BufferedReader, } - if ext == ".zst" and zstandard is None: + if ext == ".zst" and zstd is None: return klass = klasses[ext] with xopen(TEST_DIR / f"file.txt{ext}", "rb", threads=0) as f: @@ -398,7 +398,7 @@ def test_write_no_threads(tmp_path, ext): "": io.BufferedWriter, } if ext == ".zst": - # Skip zst because if python-zstandard is not installed, + # Skip zst because if zstd is not available, # we fall back to an external process even when threads=0 return klass = klasses[ext] @@ -540,7 +540,7 @@ def test_override_output_format_wrong_format(tmp_path): @pytest.mark.parametrize("opener", OPENERS) @pytest.mark.parametrize("extension", extensions) def test_text_encoding_newline_passthrough(opener, extension, tmp_path): - if extension == ".zst" and zstandard is None: + if extension == ".zst" and zstd is None: return # "Eén ree\nTwee reeën\n" latin-1 encoded with \r for as line separator. encoded_text = b"E\xe9n ree\rTwee ree\xebn\r" @@ -555,7 +555,7 @@ def test_text_encoding_newline_passthrough(opener, extension, tmp_path): @pytest.mark.parametrize("opener", OPENERS) @pytest.mark.parametrize("extension", extensions) def test_text_encoding_errors(opener, extension, tmp_path): - if extension == ".zst" and zstandard is None: + if extension == ".zst" and zstd is None: return # "Eén ree\nTwee reeën\n" latin-1 encoded. This is not valid ascii. encoded_text = b"E\xe9n ree\nTwee ree\xebn\n" @@ -583,10 +583,10 @@ def test_read_devnull(): pass -def test_xopen_zst_fails_when_zstandard_not_available(monkeypatch): +def test_xopen_zst_fails_when_zstd_not_available(monkeypatch): import xopen - monkeypatch.setattr(xopen, "zstandard", None) + monkeypatch.setattr(xopen, "zstd", None) with pytest.raises(ImportError): with xopen.xopen(TEST_DIR / "file.txt.zst", mode="rb", threads=0) as f: f.read() @@ -594,7 +594,7 @@ def test_xopen_zst_fails_when_zstandard_not_available(monkeypatch): @pytest.mark.parametrize("threads", (0, 1)) def test_xopen_zst_long_window_size(threads): - if threads == 0 and zstandard is None: + if threads == 0 and zstd is None: return elif threads == 1 and not shutil.which("zstd"): return @@ -611,7 +611,7 @@ def test_xopen_zst_long_window_size(threads): @pytest.mark.parametrize("threads", (0, 1)) @pytest.mark.parametrize("ext", extensions) def test_pass_file_object_for_reading(ext, threads): - if ext == ".zst" and zstandard is None: + if ext == ".zst" and zstd is None: return with open(TEST_DIR / f"file.txt{ext}", "rb") as fh: @@ -622,7 +622,7 @@ def test_pass_file_object_for_reading(ext, threads): @pytest.mark.parametrize("threads", (0, 1)) @pytest.mark.parametrize("ext", extensions) def test_pass_file_object_for_writing(tmp_path, ext, threads): - if ext == ".zst" and zstandard is None: + if ext == ".zst" and zstd is None: return first_line = CONTENT_LINES[0].encode("utf-8") with open(tmp_path / "out{ext}", "wb") as fh: @@ -639,7 +639,7 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads): format = ext[1:] if ext == "": format = None - if ext == ".zst" and zstandard is None: + if ext == ".zst" and zstd is None: return first_line = CONTENT_LINES[0].encode("utf-8") writer = xopen(filelike, "wb", format=format, threads=threads) @@ -654,7 +654,7 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads): @pytest.mark.parametrize("threads", (0, 1)) def test_xopen_stdin(monkeypatch, ext, threads): - if ext == ".zst" and zstandard is None: + if ext == ".zst" and zstd is None: return # Add encoding to suppress encoding warnings with open(TEST_DIR / f"file.txt{ext}", "rt", encoding="latin-1") as in_file: @@ -677,7 +677,7 @@ def test_xopen_stdout(monkeypatch): @pytest.mark.parametrize("threads", (0, 1)) def test_xopen_read_from_pipe(ext, threads): - if ext == ".zst" and zstandard is None: + if ext == ".zst" and zstd is None: return in_file = TEST_DIR / f"file.txt{ext}" process = subprocess.Popen(("cat", str(in_file)), stdout=subprocess.PIPE) @@ -690,7 +690,7 @@ def test_xopen_read_from_pipe(ext, threads): @pytest.mark.parametrize("threads", (0, 1)) def test_xopen_write_to_pipe(threads, ext): - if ext == ".zst" and zstandard is None: + if ext == ".zst" and zstd is None: return format = ext.lstrip(".") if format == "": @@ -711,7 +711,7 @@ def test_xopen_write_to_pipe(threads, ext): ) @pytest.mark.parametrize("threads", (0, 1)) def test_xopen_dev_stdin_read(threads, ext): - if ext == ".zst" and zstandard is None: + if ext == ".zst" and zstd is None: return file = str(Path(__file__).parent / f"file.txt{ext}") result = subprocess.run(