Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Supported compression formats are:
- gzip (``.gz``)
- bzip2 (``.bz2``)
- xz (``.xz``)
- Zstandard (``.zst``) (optional)
- Zstandard (``.zst``)


Example usage
Expand Down Expand Up @@ -164,29 +164,35 @@ bzip2 and xz compression methods do not store timestamps in the file headers,
so output from them is also reproducible.


Optional Zstandard support
--------------------------
Zstandard support
-----------------

For reading and writing Zstandard (``.zst``) files, either the ``zstd`` command-line
program or the Python ``zstandard`` package needs to be installed.
For reading and writing Zstandard (``.zst``) files,
either the ``zstd`` command-line program,
the ``backports.zstd`` package or (on Python 3.14+),
the ``compression.zstd`` module in the standard library is used.

* If the ``threads`` parameter to ``xopen()`` is ``None`` (the default) or any value greater than 0,
``xopen`` uses an external ``zstd`` process.
* If the above fails (because no ``zstd`` program is available) or if ``threads`` is 0,
the ``zstandard`` package is used.

To ensure that you get the correct ``zstandard`` version, you can specify the ``zstd`` extra for
``xopen``, that is, install it using ``pip install xopen[zstd]``.


Changelog
---------

development version
~~~~~~~~~~~~~~~~~~~

* Zstandard is now supported by using ``compression.zstd``, which is part of the
Python standard library since Python 3.14.
On Python versions before 3.14, ``backports.zstd`` is used instead.
* Zstandard support is no longer optional.
That is, it is no longer necessary to install ``xopen`` with the ``zstd`` extra.
The reason Zstandard was optional was that ``python-zstandard`` are quite large,
but ``backports.zstd`` wheels are much smaller.
* Dropped support for Python 3.8 and 3.9
* Started supporting Python 3.13
* Started supporting Python 3.13 and 3.14 (including free-threaded)

v2.0.2 (2024-06-12)
~~~~~~~~~~~~~~~~~~~
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,16 @@ requires-python = ">=3.10"
dynamic = ["version"]
dependencies = [
'isal>=1.6.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"',
'zlib-ng>=0.4.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"'
'zlib-ng>=0.4.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"',
'backports.zstd; python_version < "3.14"',
]

[project.urls]
homepage = "https://github.com/pycompression/xopen/"

[project.optional-dependencies]
dev = ["pytest"]
zstd = ["zstandard<1"]
zstd = [] # Leave this in here for backwards compatibility (Zstandard support used to be optional)

[tool.setuptools_scm]
write_to = "src/xopen/_version.py"
Expand Down
31 changes: 19 additions & 12 deletions src/xopen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@
gzip_ng_threaded = None
zlib_ng = None

try:
import zstandard # type: ignore
except ImportError:
zstandard = None # type: ignore
if sys.version_info >= (3, 14):
from compression import zstd
else:
from backports import zstd

try:
import fcntl
Expand Down Expand Up @@ -509,8 +509,8 @@ def _open_zst(
assert compresslevel != 0
if compresslevel is None:
compresslevel = XOPEN_DEFAULT_ZST_COMPRESSION
if zstandard:
max_window_bits = zstandard.WINDOWLOG_MAX
if zstd:
max_window_bits = zstd.DecompressionParameter.window_log_max.bounds()[1]
else:
max_window_bits = 31
if threads != 0:
Expand All @@ -531,15 +531,22 @@ def _open_zst(
_ProgramSettings(program_args, tuple(range(1, 20)), "-T"),
)
except OSError:
if zstandard is None:
if zstd is None:
# No fallback available
raise

if zstandard is None:
raise ImportError("zstandard module (python-zstandard) not available")
dctx = zstandard.ZstdDecompressor(max_window_size=2**max_window_bits)
cctx = zstandard.ZstdCompressor(level=compresslevel)
f = zstandard.open(filename, mode, cctx=cctx, dctx=dctx) # type: ignore
if zstd is None:
raise ImportError("zstd module not available")

if "r" in mode:
level = None
options = {
zstd.DecompressionParameter.window_log_max: max_window_bits,
}
else:
level = compresslevel
options = None
f = zstd.open(filename, mode, options=options, level=level) # type: ignore
if mode == "rb":
return io.BufferedReader(f)
return io.BufferedWriter(f) # mode "ab" and "wb"
Expand Down
46 changes: 23 additions & 23 deletions tests/test_xopen.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,18 @@

from xopen import xopen, _detect_format_from_content

try:
import zstandard
except ImportError:
zstandard = None
if sys.version_info >= (3, 14):
from compression import zstd
else:
from backports import zstd


# TODO this is duplicated in test_piped.py
TEST_DIR = Path(__file__).parent
CONTENT_LINES = ["Testing, testing ...\n", "The second line.\n"]
CONTENT = "".join(CONTENT_LINES)
extensions = ["", ".gz", ".bz2", ".xz"]
if shutil.which("zstd") or zstandard:
if shutil.which("zstd") or zstd:
extensions += [".zst"]
base = os.path.join(os.path.dirname(__file__), "file.txt")
files = [base + ext for ext in extensions]
Expand Down Expand Up @@ -107,7 +107,7 @@ def test_binary(fname):
@pytest.mark.parametrize("mode", ["b", "", "t"])
@pytest.mark.parametrize("threads", [None, 0])
def test_roundtrip(ext, tmp_path, threads, mode):
if ext == ".zst" and threads == 0 and zstandard is None:
if ext == ".zst" and threads == 0 and zstd is None:
return
path = tmp_path / f"file{ext}"
data = b"Hello" if mode == "b" else "Hello"
Expand All @@ -118,7 +118,7 @@ def test_roundtrip(ext, tmp_path, threads, mode):


def test_binary_no_isal_no_threads(fname, xopen_without_igzip):
if fname.endswith(".zst") and zstandard is None:
if fname.endswith(".zst") and zstd is None:
return
with xopen_without_igzip(fname, "rb", threads=0) as f:
lines = list(f)
Expand Down Expand Up @@ -268,8 +268,8 @@ def test_invalid_compression_level(tmp_path):
@pytest.mark.parametrize("ext", extensions)
@pytest.mark.parametrize("threads", (0, 1))
def test_append(ext, threads, tmp_path):
if ext == ".zst" and zstandard is None and threads == 0:
pytest.skip("No zstandard installed")
if ext == ".zst" and zstd is None and threads == 0:
pytest.skip("No zstd installed")
text = b"AB"
reference = text + text
path = tmp_path / f"the-file{ext}"
Expand Down Expand Up @@ -367,7 +367,7 @@ def test_read_no_threads(ext):
".zst": io.BufferedReader,
"": io.BufferedReader,
}
if ext == ".zst" and zstandard is None:
if ext == ".zst" and zstd is None:
return
klass = klasses[ext]
with xopen(TEST_DIR / f"file.txt{ext}", "rb", threads=0) as f:
Expand Down Expand Up @@ -398,7 +398,7 @@ def test_write_no_threads(tmp_path, ext):
"": io.BufferedWriter,
}
if ext == ".zst":
# Skip zst because if python-zstandard is not installed,
# Skip zst because if zstd is not available,
# we fall back to an external process even when threads=0
return
klass = klasses[ext]
Expand Down Expand Up @@ -540,7 +540,7 @@ def test_override_output_format_wrong_format(tmp_path):
@pytest.mark.parametrize("opener", OPENERS)
@pytest.mark.parametrize("extension", extensions)
def test_text_encoding_newline_passthrough(opener, extension, tmp_path):
if extension == ".zst" and zstandard is None:
if extension == ".zst" and zstd is None:
return
# "Eén ree\nTwee reeën\n" latin-1 encoded with \r for as line separator.
encoded_text = b"E\xe9n ree\rTwee ree\xebn\r"
Expand All @@ -555,7 +555,7 @@ def test_text_encoding_newline_passthrough(opener, extension, tmp_path):
@pytest.mark.parametrize("opener", OPENERS)
@pytest.mark.parametrize("extension", extensions)
def test_text_encoding_errors(opener, extension, tmp_path):
if extension == ".zst" and zstandard is None:
if extension == ".zst" and zstd is None:
return
# "Eén ree\nTwee reeën\n" latin-1 encoded. This is not valid ascii.
encoded_text = b"E\xe9n ree\nTwee ree\xebn\n"
Expand Down Expand Up @@ -583,18 +583,18 @@ def test_read_devnull():
pass


def test_xopen_zst_fails_when_zstandard_not_available(monkeypatch):
def test_xopen_zst_fails_when_zstd_not_available(monkeypatch):
import xopen

monkeypatch.setattr(xopen, "zstandard", None)
monkeypatch.setattr(xopen, "zstd", None)
with pytest.raises(ImportError):
with xopen.xopen(TEST_DIR / "file.txt.zst", mode="rb", threads=0) as f:
f.read()


@pytest.mark.parametrize("threads", (0, 1))
def test_xopen_zst_long_window_size(threads):
if threads == 0 and zstandard is None:
if threads == 0 and zstd is None:
return
elif threads == 1 and not shutil.which("zstd"):
return
Expand All @@ -611,7 +611,7 @@ def test_xopen_zst_long_window_size(threads):
@pytest.mark.parametrize("threads", (0, 1))
@pytest.mark.parametrize("ext", extensions)
def test_pass_file_object_for_reading(ext, threads):
if ext == ".zst" and zstandard is None:
if ext == ".zst" and zstd is None:
return

with open(TEST_DIR / f"file.txt{ext}", "rb") as fh:
Expand All @@ -622,7 +622,7 @@ def test_pass_file_object_for_reading(ext, threads):
@pytest.mark.parametrize("threads", (0, 1))
@pytest.mark.parametrize("ext", extensions)
def test_pass_file_object_for_writing(tmp_path, ext, threads):
if ext == ".zst" and zstandard is None:
if ext == ".zst" and zstd is None:
return
first_line = CONTENT_LINES[0].encode("utf-8")
with open(tmp_path / "out{ext}", "wb") as fh:
Expand All @@ -639,7 +639,7 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads):
format = ext[1:]
if ext == "":
format = None
if ext == ".zst" and zstandard is None:
if ext == ".zst" and zstd is None:
return
first_line = CONTENT_LINES[0].encode("utf-8")
writer = xopen(filelike, "wb", format=format, threads=threads)
Expand All @@ -654,7 +654,7 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads):

@pytest.mark.parametrize("threads", (0, 1))
def test_xopen_stdin(monkeypatch, ext, threads):
if ext == ".zst" and zstandard is None:
if ext == ".zst" and zstd is None:
return
# Add encoding to suppress encoding warnings
with open(TEST_DIR / f"file.txt{ext}", "rt", encoding="latin-1") as in_file:
Expand All @@ -677,7 +677,7 @@ def test_xopen_stdout(monkeypatch):

@pytest.mark.parametrize("threads", (0, 1))
def test_xopen_read_from_pipe(ext, threads):
if ext == ".zst" and zstandard is None:
if ext == ".zst" and zstd is None:
return
in_file = TEST_DIR / f"file.txt{ext}"
process = subprocess.Popen(("cat", str(in_file)), stdout=subprocess.PIPE)
Expand All @@ -690,7 +690,7 @@ def test_xopen_read_from_pipe(ext, threads):

@pytest.mark.parametrize("threads", (0, 1))
def test_xopen_write_to_pipe(threads, ext):
if ext == ".zst" and zstandard is None:
if ext == ".zst" and zstd is None:
return
format = ext.lstrip(".")
if format == "":
Expand All @@ -711,7 +711,7 @@ def test_xopen_write_to_pipe(threads, ext):
)
@pytest.mark.parametrize("threads", (0, 1))
def test_xopen_dev_stdin_read(threads, ext):
if ext == ".zst" and zstandard is None:
if ext == ".zst" and zstd is None:
return
file = str(Path(__file__).parent / f"file.txt{ext}")
result = subprocess.run(
Expand Down
Loading