From 71a1c06233edf27cf2eeec3c6595fb2703bca7bb Mon Sep 17 00:00:00 2001
From: Ian Later <ian@siftstack.com>
Date: Tue, 23 Jun 2026 16:12:34 -0700
Subject: [PATCH 01/14] python(fix): bound the channel data cache to avoid OOM
 on long pulls

Add channel data cache size configuration.
---
 python/CHANGELOG.md                           |  15 +
 .../_internal/low_level_wrappers/data.py      | 163 +++++++++--
 .../low_level_wrappers/test_data_cache.py     | 269 ++++++++++++++++++
 python/lib/sift_client/client.py              |  15 +
 python/lib/sift_client/resources/channels.py  |  15 +-
 5 files changed, 452 insertions(+), 25 deletions(-)
 create mode 100644 python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py

diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md
index 407e657a7..510efdafb 100644
--- a/python/CHANGELOG.md
+++ b/python/CHANGELOG.md
@@ -7,6 +7,21 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 
 ### What's New
 
+#### Bounded channel data cache
+
+`SiftClient.get_data` cache state is now per-instance and byte-bounded instead of shared across the process and unbounded. A new `data_cache_max_bytes` constructor kwarg (default 512 MiB) caps the in-memory channel-data footprint; the least-recently-used cached channel is evicted once the bound is reached. Set `data_cache_max_bytes=0` to disable caching entirely.
+
+`ignore_cache=True` on `client.channels.get_data(...)` now also skips writing into the cache, matching its read-side bypass semantics. Previously a "non-caching" workload still appended to the shared cache on every call, which silently OOM'd long-running pods doing sustained data pulls.
+
+```python
+client = SiftClient(
+    connection_config=config,
+    data_cache_max_bytes=128 * 1024 * 1024,  # 128 MiB cap
+)
+```
+
+The internal `DataLowLevelClient.channel_cache` is no longer a class attribute. Any external code that relied on `DataLowLevelClient.channel_cache.channels.clear()` as a workaround should remove it — the bounded cache no longer requires manual purging.
+
 #### Resource and principal attributes (ABAC)
 
 Added a public API for attribute based access control (ABAC) attributes. `client.resource_attributes` manages attribute keys assigned to entities (assets, channels, runs), and `client.principal_attributes` manages attribute keys assigned to principals (users and user groups). Both are available synchronously and asynchronously via `client.async_`.
diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data.py b/python/lib/sift_client/_internal/low_level_wrappers/data.py
index 57b24e398..bed1b3a44 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data.py
@@ -2,6 +2,7 @@
 
 import asyncio
 import logging
+from collections import OrderedDict
 from datetime import datetime, timezone
 from typing import TYPE_CHECKING, Any, cast
 
@@ -34,17 +35,123 @@
 # has been resolved. In the mean time each channel gets its own request.
 REQUEST_BATCH_SIZE = 1
 
+# Default in-memory budget for cached channel DataFrames, per ``DataLowLevelClient``
+# instance. 512 MiB is well below typical pod limits while still letting common
+# interactive workloads stay in cache. Override via ``SiftClient(data_cache_max_bytes=...)``.
+DEFAULT_DATA_CACHE_MAX_BYTES = 512 * 1024 * 1024
+
 
 class ChannelCacheEntry(BaseModel):
     model_config = ConfigDict(arbitrary_types_allowed=True)
     data: pd.DataFrame
     start_time: datetime
     end_time: datetime
+    # ``df.memory_usage(deep=True).sum()`` at construction time. Stored on the
+    # entry so eviction is O(1) per dropped item instead of re-walking frames.
+    size_bytes: int
+
+
+def _new_cache_entry(
+    data: pd.DataFrame, start_time: datetime, end_time: datetime
+) -> ChannelCacheEntry:
+    return ChannelCacheEntry(
+        data=data,
+        start_time=start_time,
+        end_time=end_time,
+        size_bytes=int(data.memory_usage(deep=True).sum()),
+    )
+
+
+class ChannelCache:
+    """LRU-ordered, byte-bounded cache of per-channel DataFrames.
+
+    Each ``DataLowLevelClient`` owns its own ``ChannelCache``; the previous
+    implementation kept this on the class, which silently shared state across
+    every ``SiftClient`` in the process and grew without bound. Sustained pulls
+    against that shared cache OOM'd long-running pods.
+
+    Bookkeeping invariant: ``_total_bytes == sum(e.size_bytes for e in _entries.values())``.
+    Maintained by every mutation path so the bound is checked in O(1) without
+    re-walking entries.
+
+    ``max_bytes <= 0`` disables retention: every ``get`` misses, ``put`` returns
+    without storing. ``name_id_map`` is intentionally outside the bound — it's
+    a tiny string→string map and forms part of the contract with ``_update_cache``,
+    which depends on it to translate channel names to ids.
+    """
+
+    def __init__(self, max_bytes: int = DEFAULT_DATA_CACHE_MAX_BYTES):
+        if max_bytes < 0:
+            raise ValueError(
+                f"data_cache_max_bytes must be >= 0, got {max_bytes}"
+            )
+        self.name_id_map: dict[str, str] = {}
+        self._entries: OrderedDict[str, ChannelCacheEntry] = OrderedDict()
+        self._total_bytes: int = 0
+        self._max_bytes: int = max_bytes
+
+    @property
+    def enabled(self) -> bool:
+        return self._max_bytes > 0
+
+    @property
+    def max_bytes(self) -> int:
+        return self._max_bytes
+
+    @property
+    def total_bytes(self) -> int:
+        return self._total_bytes
+
+    def __len__(self) -> int:
+        return len(self._entries)
+
+    def __contains__(self, channel_id: str) -> bool:
+        return channel_id in self._entries
+
+    def get(self, channel_id: str) -> ChannelCacheEntry | None:
+        """Return the entry for ``channel_id`` if cached, otherwise None.
 
+        Promotes the entry to most-recently-used on hit.
+        """
+        entry = self._entries.get(channel_id)
+        if entry is not None:
+            self._entries.move_to_end(channel_id)
+        return entry
+
+    def put(self, channel_id: str, entry: ChannelCacheEntry) -> None:
+        """Insert or replace ``channel_id``, then evict LRU until under the bound.
 
-class ChannelCache(BaseModel):
-    name_id_map: dict[str, str]
-    channels: dict[str, ChannelCacheEntry]
+        Reclaims any prior entry's byte count BEFORE adding the new one's, so a
+        re-insert (e.g. concat-merge of fresh data into an existing entry)
+        accounts for the size delta correctly rather than double-counting.
+        """
+        if not self.enabled:
+            return
+        prior = self._entries.pop(channel_id, None)
+        if prior is not None:
+            self._total_bytes -= prior.size_bytes
+        self._entries[channel_id] = entry
+        self._total_bytes += entry.size_bytes
+        self._evict_until_under_bound()
+
+    def invalidate(self, channel_id: str) -> None:
+        prior = self._entries.pop(channel_id, None)
+        if prior is not None:
+            self._total_bytes -= prior.size_bytes
+
+    def clear(self) -> None:
+        self._entries.clear()
+        self._total_bytes = 0
+
+    def _evict_until_under_bound(self) -> None:
+        # ``popitem(last=False)`` drops the oldest entry. A single fresh entry
+        # whose ``size_bytes`` alone exceeds ``max_bytes`` ends up evicted on
+        # the final iteration — the deliberate choice over "keep the oversized
+        # entry and violate the bound" or "evict everyone else and still
+        # violate the bound."
+        while self._entries and self._total_bytes > self._max_bytes:
+            _, dropped = self._entries.popitem(last=False)
+            self._total_bytes -= dropped.size_bytes
 
 
 class DataLowLevelClient(LowLevelClientBase, WithGrpcClient):
@@ -53,15 +160,21 @@ class DataLowLevelClient(LowLevelClientBase, WithGrpcClient):
     This class provides a thin wrapper around the autogenerated bindings for the DataAPI.
     """
 
-    channel_cache: ChannelCache = ChannelCache(name_id_map={}, channels={})
-
-    def __init__(self, grpc_client: GrpcClient):
+    def __init__(
+        self,
+        grpc_client: GrpcClient,
+        *,
+        data_cache_max_bytes: int = DEFAULT_DATA_CACHE_MAX_BYTES,
+    ):
         """Initialize the DataLowLevelClient.
 
         Args:
             grpc_client: The gRPC client to use for making API calls.
+            data_cache_max_bytes: Cap on the in-memory channel-data cache (bytes).
+                Set to ``0`` to disable caching. See ``ChannelCache``.
         """
         super().__init__(grpc_client)
+        self.channel_cache = ChannelCache(max_bytes=data_cache_max_bytes)
 
     def _update_name_id_map(self, channels: list[Channel]):
         """Update the name id map with the new channels."""
@@ -109,7 +222,9 @@ def _filter_cached_channels(self, channel_ids: list[str]) -> tuple[list[str], li
         cached_channels = []
         not_cached_channels = []
         for channel_id in channel_ids:
-            if self.channel_cache.channels.get(channel_id):
+            # ``__contains__`` is a non-promoting peek; ``_check_cache`` does
+            # the LRU-touching ``get`` shortly after for the actual lookup.
+            if channel_id in self.channel_cache:
                 cached_channels.append(channel_id)
             else:
                 not_cached_channels.append(channel_id)
@@ -139,7 +254,7 @@ def _check_cache(
             A tuple of (data, start_time, end_time)
             where data is a pandas dataframe and start and end times are what should be used for the next call based on what is not covered by the cached data.
         """
-        cached_data = self.channel_cache.channels.get(channel_id)
+        cached_data = self.channel_cache.get(channel_id)
         ret_start_time = start_time
         ret_end_time = end_time
         ret_data = None
@@ -204,24 +319,23 @@ def _update_cache(
                     # So we just don't update the cache.
                     continue
 
-            if channel_id in self.channel_cache.channels:
-                self.channel_cache.channels[channel_id].data = (
-                    pd.concat([self.channel_cache.channels[channel_id].data, data])
-                    .groupby(level=0)
-                    .last()
-                )
-                self.channel_cache.channels[channel_id].start_time = min(
-                    suggested_start_time, self.channel_cache.channels[channel_id].start_time
+            existing = self.channel_cache.get(channel_id)
+            if existing is not None:
+                merged_data = (
+                    pd.concat([existing.data, data]).groupby(level=0).last()
                 )
-                self.channel_cache.channels[channel_id].end_time = max(
-                    end_time, self.channel_cache.channels[channel_id].end_time
+                entry = _new_cache_entry(
+                    data=merged_data,
+                    start_time=min(suggested_start_time, existing.start_time),
+                    end_time=max(end_time, existing.end_time),
                 )
             else:
-                self.channel_cache.channels[channel_id] = ChannelCacheEntry(
+                entry = _new_cache_entry(
                     data=data,
                     start_time=suggested_start_time,
                     end_time=end_time,
                 )
+            self.channel_cache.put(channel_id, entry)
 
     async def get_channel_data(
         self,
@@ -308,9 +422,14 @@ async def get_channel_data(
                     else:
                         ret_data[name] = pd.concat([ret_data[name], df]).groupby(level=0).last()
 
-        self._update_cache(
-            channel_data=ret_data, start_time=start_time, end_time=end_time, run_id=run_id
-        )
+        # ``ignore_cache=True`` is documented as a read-side bypass, but the
+        # previous implementation still wrote to the shared cache on every
+        # call, which meant a "non-caching" workload still grew the cache
+        # without bound. Skip writes when the caller asked us to ignore it.
+        if not ignore_cache:
+            self._update_cache(
+                channel_data=ret_data, start_time=start_time, end_time=end_time, run_id=run_id
+            )
 
         return ret_data
 
diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py
new file mode 100644
index 000000000..97fa70ddb
--- /dev/null
+++ b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py
@@ -0,0 +1,269 @@
+"""Tests for the channel data cache in :mod:`sift_client._internal.low_level_wrappers.data`.
+
+Two layers covered here:
+
+* :class:`ChannelCache` directly — byte accounting, LRU promotion, eviction,
+  edge cases. These tests construct cache entries from real (tiny) DataFrames
+  so the size measurement code is exercised end-to-end.
+* :class:`DataLowLevelClient` — ``ignore_cache=True`` skipping writes,
+  per-instance cache isolation, ``data_cache_max_bytes=0`` disabling cache.
+
+The OOM regression that motivated this code happened because the cache was a
+class attribute that grew without bound. The instance-isolation test below is
+the canary that catches anyone re-introducing that pattern.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timedelta, timezone
+from unittest.mock import MagicMock
+
+import pandas as pd
+import pytest
+
+from sift_client._internal.low_level_wrappers.data import (
+    DEFAULT_DATA_CACHE_MAX_BYTES,
+    ChannelCache,
+    ChannelCacheEntry,
+    DataLowLevelClient,
+    _new_cache_entry,
+)
+
+
+def _entry(rows: int, *, value_dtype: str = "float64") -> ChannelCacheEntry:
+    """Build a ChannelCacheEntry with ``rows`` rows of fake data."""
+    index = pd.date_range("2025-01-01", periods=rows, freq="ms", tz=timezone.utc)
+    data = pd.DataFrame({"value": range(rows)}, index=index).astype({"value": value_dtype})
+    return _new_cache_entry(
+        data=data,
+        start_time=index[0].to_pydatetime(),
+        end_time=index[-1].to_pydatetime(),
+    )
+
+
+def _invariant_holds(cache: ChannelCache) -> bool:
+    return cache.total_bytes == sum(e.size_bytes for e in cache._entries.values())
+
+
+class TestChannelCacheBookkeeping:
+    """Tight checks on the internal byte counter and ordering."""
+
+    def test_put_get_roundtrip(self) -> None:
+        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
+        entry = _entry(rows=10)
+        cache.put("c1", entry)
+
+        assert cache.get("c1") is entry
+        assert cache.total_bytes == entry.size_bytes
+        assert _invariant_holds(cache)
+
+    def test_put_replaces_size_accounting(self) -> None:
+        """A second put for the same key must reclaim the prior size first."""
+        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
+        small = _entry(rows=10)
+        big = _entry(rows=1000)
+
+        cache.put("c1", small)
+        cache.put("c1", big)
+
+        # Total reflects only the second entry, never small + big.
+        assert cache.total_bytes == big.size_bytes
+        assert cache.get("c1") is big
+        assert _invariant_holds(cache)
+
+    def test_invalidate_drops_byte_count(self) -> None:
+        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
+        cache.put("c1", _entry(rows=10))
+        cache.invalidate("c1")
+
+        assert cache.get("c1") is None
+        assert cache.total_bytes == 0
+        assert _invariant_holds(cache)
+
+    def test_invalidate_missing_is_noop(self) -> None:
+        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
+        cache.invalidate("nope")
+        assert cache.total_bytes == 0
+
+    def test_clear_empties_total(self) -> None:
+        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
+        cache.put("c1", _entry(rows=10))
+        cache.put("c2", _entry(rows=20))
+        cache.clear()
+
+        assert cache.total_bytes == 0
+        assert len(cache) == 0
+        assert _invariant_holds(cache)
+
+
+class TestChannelCacheEviction:
+    """Eviction policy: LRU, byte-bounded, oversized-entry-dropped."""
+
+    def test_oldest_entry_evicted_first(self) -> None:
+        """Insertion order determines who goes when only inserts have happened."""
+        a, b, c = _entry(rows=50), _entry(rows=50), _entry(rows=50)
+        cap = a.size_bytes + b.size_bytes  # room for exactly two
+        cache = ChannelCache(max_bytes=cap)
+
+        cache.put("a", a)
+        cache.put("b", b)
+        cache.put("c", c)  # forces eviction of "a"
+
+        assert "a" not in cache
+        assert "b" in cache
+        assert "c" in cache
+        assert cache.total_bytes <= cap
+        assert _invariant_holds(cache)
+
+    def test_get_promotes_to_most_recent(self) -> None:
+        """Reading an entry must protect it from the next eviction."""
+        a, b, c = _entry(rows=50), _entry(rows=50), _entry(rows=50)
+        cap = a.size_bytes + b.size_bytes
+        cache = ChannelCache(max_bytes=cap)
+
+        cache.put("a", a)
+        cache.put("b", b)
+        assert cache.get("a") is a  # promote
+        cache.put("c", c)  # now "b" is the oldest and should be evicted
+
+        assert "a" in cache
+        assert "b" not in cache
+        assert "c" in cache
+        assert _invariant_holds(cache)
+
+    def test_oversized_entry_evicts_with_neighbours(self) -> None:
+        """A single entry larger than the cap ends up evicted itself.
+
+        The alternative ("keep the oversized entry and accept that the cap is
+        soft") would silently reintroduce the unbounded-growth bug for any
+        workload whose typical entry is bigger than ``max_bytes``.
+        """
+        small_a, small_b = _entry(rows=10), _entry(rows=10)
+        oversized = _entry(rows=10_000)
+        cap = small_a.size_bytes + small_b.size_bytes  # comfortably below ``oversized``
+        cache = ChannelCache(max_bytes=cap)
+
+        cache.put("a", small_a)
+        cache.put("b", small_b)
+        cache.put("huge", oversized)
+
+        assert "huge" not in cache
+        # Every other entry was evicted in the failed attempt to make room.
+        assert "a" not in cache
+        assert "b" not in cache
+        assert cache.total_bytes == 0
+        assert _invariant_holds(cache)
+
+    def test_max_bytes_zero_disables_cache(self) -> None:
+        cache = ChannelCache(max_bytes=0)
+        cache.put("c1", _entry(rows=100))
+
+        assert not cache.enabled
+        assert cache.get("c1") is None
+        assert cache.total_bytes == 0
+        assert len(cache) == 0
+
+    def test_negative_max_bytes_raises(self) -> None:
+        with pytest.raises(ValueError, match="data_cache_max_bytes"):
+            ChannelCache(max_bytes=-1)
+
+    def test_repeated_concat_updates_stay_under_bound(self) -> None:
+        """Simulates the customer's sliding-window pull: same channel, growing.
+
+        Without size reclamation on update, ``total_bytes`` would creep above
+        the cap silently. We re-build the entry each iteration to mimic the
+        ``_update_cache`` concat path.
+        """
+        cap = 1_000_000  # ~1 MB
+        cache = ChannelCache(max_bytes=cap)
+        accumulated = pd.DataFrame()
+        for i in range(50):
+            chunk = pd.DataFrame(
+                {"value": range(1000)},
+                index=pd.date_range(
+                    datetime(2025, 1, 1, tzinfo=timezone.utc) + timedelta(seconds=i),
+                    periods=1000,
+                    freq="us",
+                ),
+            )
+            accumulated = pd.concat([accumulated, chunk])
+            cache.put(
+                "c1",
+                _new_cache_entry(
+                    data=accumulated,
+                    start_time=accumulated.index[0].to_pydatetime(),
+                    end_time=accumulated.index[-1].to_pydatetime(),
+                ),
+            )
+            assert cache.total_bytes <= cap, (
+                f"iteration {i}: total_bytes={cache.total_bytes} exceeded cap={cap}"
+            )
+            assert _invariant_holds(cache)
+
+
+class TestDataLowLevelClientIntegration:
+    """End-to-end checks on the constructor wiring and ignore_cache semantics."""
+
+    def test_per_instance_isolation(self) -> None:
+        """Two clients must not share cache state.
+
+        This is the regression test for the original OOM bug: ``channel_cache``
+        was a class attribute, so every ``SiftClient`` in the process appended
+        to the same dict. Construct two clients, populate one, the other must
+        stay empty.
+        """
+        client_a = DataLowLevelClient(MagicMock())
+        client_b = DataLowLevelClient(MagicMock())
+
+        client_a.channel_cache.put("c1", _entry(rows=10))
+
+        assert "c1" in client_a.channel_cache
+        assert "c1" not in client_b.channel_cache
+        assert client_b.channel_cache.total_bytes == 0
+
+    def test_ignore_cache_skips_writes(self) -> None:
+        """``ignore_cache=True`` must not populate the cache.
+
+        Previously the read path was bypassed but ``_update_cache`` still ran
+        unconditionally, so a "non-caching" workload still grew memory until
+        OOM. Verify by exercising ``_update_cache`` only when ``ignore_cache``
+        is false.
+        """
+        client = DataLowLevelClient(MagicMock())
+        client.channel_cache.name_id_map["chan"] = "c1"
+
+        index = pd.date_range("2025-01-01", periods=5, freq="ms", tz=timezone.utc)
+        df = pd.DataFrame({"value": range(5)}, index=index)
+
+        # Real ``get_channel_data`` would call ``_update_cache`` from inside an
+        # ``if not ignore_cache`` branch; assert the helper itself is what
+        # writes, and that ``get_channel_data`` doesn't invoke it when
+        # ``ignore_cache=True``. We verify the branch directly to keep this
+        # test free of gRPC stubbing.
+        client._update_cache(
+            channel_data={"chan": df},
+            start_time=index[0].to_pydatetime(),
+            end_time=index[-1].to_pydatetime(),
+        )
+        assert "c1" in client.channel_cache
+
+        # Skipping the call (as ``get_channel_data`` does when ignore_cache is
+        # true) leaves the cache untouched.
+        client.channel_cache.invalidate("c1")
+        assert "c1" not in client.channel_cache
+
+    def test_data_cache_max_bytes_zero_disables_caching(self) -> None:
+        """Constructor knob: ``data_cache_max_bytes=0`` → no cache writes land."""
+        client = DataLowLevelClient(MagicMock(), data_cache_max_bytes=0)
+        client.channel_cache.name_id_map["chan"] = "c1"
+
+        index = pd.date_range("2025-01-01", periods=5, freq="ms", tz=timezone.utc)
+        df = pd.DataFrame({"value": range(5)}, index=index)
+
+        client._update_cache(
+            channel_data={"chan": df},
+            start_time=index[0].to_pydatetime(),
+            end_time=index[-1].to_pydatetime(),
+        )
+        assert "c1" not in client.channel_cache
+        assert client.channel_cache.total_bytes == 0
diff --git a/python/lib/sift_client/client.py b/python/lib/sift_client/client.py
index 2e2b64ffd..792025f8f 100644
--- a/python/lib/sift_client/client.py
+++ b/python/lib/sift_client/client.py
@@ -136,6 +136,7 @@ def __init__(
         rest_url: str | None = None,
         connection_config: SiftConnectionConfig | None = None,
         app_url: str | None = None,
+        data_cache_max_bytes: int | None = None,
     ):
         """Initialize the SiftClient with specific connection parameters or a connection_config.
 
@@ -148,7 +149,16 @@ def __init__(
                 Set this for on-prem or custom deployments whose API host can't be
                 mapped to a frontend automatically; see the ``app_url`` property.
                 A value here takes precedence over ``connection_config.app_url``.
+            data_cache_max_bytes: Cap on the in-memory channel data cache used
+                by ``client.channels.get_data`` (bytes). When the bound is
+                reached, the least-recently-used cached channel is evicted.
+                Defaults to 512 MiB. Set to ``0`` to disable caching. Must be
+                ``>= 0``.
         """
+        if data_cache_max_bytes is not None and data_cache_max_bytes < 0:
+            raise ValueError(
+                f"data_cache_max_bytes must be >= 0, got {data_cache_max_bytes}"
+            )
         if not (api_key and grpc_url and rest_url) and not connection_config:
             raise ValueError(
                 "Either api_key, grpc_url and rest_url or connection_config must be provided to establish a connection."
@@ -179,6 +189,11 @@ def __init__(
         # pytest plugin's ``--sift-disabled`` mode.
         self._simulate: bool = False
 
+        # Read by ``ChannelsAPIAsync._ensure_data_low_level_client`` when it
+        # lazily constructs the data wrapper. ``None`` means "use the wrapper
+        # default" so we don't have to import the constant here.
+        self._data_cache_max_bytes: int | None = data_cache_max_bytes
+
         self.ping = PingAPI(self)
         self.assets = AssetsAPI(self)
         self.calculated_channels = CalculatedChannelsAPI(self)
diff --git a/python/lib/sift_client/resources/channels.py b/python/lib/sift_client/resources/channels.py
index aa5cdf96e..41d478d81 100644
--- a/python/lib/sift_client/resources/channels.py
+++ b/python/lib/sift_client/resources/channels.py
@@ -242,9 +242,18 @@ async def unarchive(self, channels: list[str | Channel]) -> None:
     def _ensure_data_low_level_client(self):
         """Ensure that the data low level client is initialized. Separated out like this to not require large dependencies (pandas/pyarrow) for the client if not fetching data."""
         if self._data_low_level_client is None:
-            from sift_client._internal.low_level_wrappers.data import DataLowLevelClient
-
-            self._data_low_level_client = DataLowLevelClient(grpc_client=self.client.grpc_client)
+            from sift_client._internal.low_level_wrappers.data import (
+                DEFAULT_DATA_CACHE_MAX_BYTES,
+                DataLowLevelClient,
+            )
+
+            max_bytes = getattr(self.client, "_data_cache_max_bytes", None)
+            self._data_low_level_client = DataLowLevelClient(
+                grpc_client=self.client.grpc_client,
+                data_cache_max_bytes=(
+                    DEFAULT_DATA_CACHE_MAX_BYTES if max_bytes is None else max_bytes
+                ),
+            )
 
     async def get_data(
         self,

From c12bc98fd0f8aa81b0693e4b0ae9559e331093bc Mon Sep 17 00:00:00 2001
From: Ian Later <ian@siftstack.com>
Date: Tue, 23 Jun 2026 16:13:53 -0700
Subject: [PATCH 02/14] python(perf): batch the get_data page-flatten concat

Improve get data:
Bench numbers from the same shape inputs:
  10 pages * 10k rows:    6.3x faster   (22ms -> 3.5ms)
  50 pages * 10k rows:   26.0x faster  (488ms -> 19ms)
  200 pages * 10k rows:  81.7x faster (10.9s -> 134ms)
  500 pages * 1k rows:  224.3x faster  (4.5s -> 20ms)
---
 python/CHANGELOG.md                           |   8 +-
 .../_internal/low_level_wrappers/data.py      |  48 ++++-
 .../low_level_wrappers/test_data_cache.py     | 175 +++++++++++++++++-
 3 files changed, 219 insertions(+), 12 deletions(-)

diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md
index 510efdafb..e4016eb69 100644
--- a/python/CHANGELOG.md
+++ b/python/CHANGELOG.md
@@ -7,11 +7,15 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 
 ### What's New
 
+#### Faster `get_data` pagination
+
+Up to a ~80x speedup for some get_data calls.
+
 #### Bounded channel data cache
 
-`SiftClient.get_data` cache state is now per-instance and byte-bounded instead of shared across the process and unbounded. A new `data_cache_max_bytes` constructor kwarg (default 512 MiB) caps the in-memory channel-data footprint; the least-recently-used cached channel is evicted once the bound is reached. Set `data_cache_max_bytes=0` to disable caching entirely.
+A new `data_cache_max_bytes` constructor kwarg (default 512 MiB) caps the in-memory channel-data footprint; the least-recently-used cached channel is evicted once the bound is reached. Set `data_cache_max_bytes=0` to disable caching entirely.
 
-`ignore_cache=True` on `client.channels.get_data(...)` now also skips writing into the cache, matching its read-side bypass semantics. Previously a "non-caching" workload still appended to the shared cache on every call, which silently OOM'd long-running pods doing sustained data pulls.
+`ignore_cache=True` on `client.channels.get_data(...)` now also skips writing into the cache, matching its read-side bypass semantics. Previously a "non-caching" workload still appended to the shared cache on every call, which still caused increased memory usage.
 
 ```python
 client = SiftClient(
diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data.py b/python/lib/sift_client/_internal/low_level_wrappers/data.py
index bed1b3a44..c14ba6266 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data.py
@@ -412,15 +412,7 @@ async def get_channel_data(
             tasks.append(task)
 
         pages = await asyncio.gather(*tasks)
-        # Flatten the data
-        for page in pages:
-            for data in page:
-                page_results = self.try_deserialize_channel_data(data)
-                for name, df in page_results.items():
-                    if name not in ret_data:
-                        ret_data[name] = df
-                    else:
-                        ret_data[name] = pd.concat([ret_data[name], df]).groupby(level=0).last()
+        ret_data = self._merge_pages(pages, initial=ret_data)
 
         # ``ignore_cache=True`` is documented as a read-side bypass, but the
         # previous implementation still wrote to the shared cache on every
@@ -433,6 +425,44 @@ async def get_channel_data(
 
         return ret_data
 
+    def _merge_pages(
+        self,
+        pages: list[list[Any]],
+        *,
+        initial: dict[str, pd.DataFrame],
+    ) -> dict[str, pd.DataFrame]:
+        """Flatten paged channel data + any cached slices into one DataFrame per channel.
+
+        Replaces a per-page ``pd.concat(...).groupby(...)`` loop that was
+        O(N²) in the number of pages — each iteration copied the cumulative
+        DataFrame — with a single batched concat per channel. At realistic
+        pagination depths the speedup is large: 200 pages of 10k rows each
+        drops from ~11 s to ~130 ms in the bench.
+
+        ``initial`` carries any cached slices already populated by
+        ``_check_cache``. Cached entries are folded in as the first frame for
+        their channel so they participate in the same final concat;
+        ``groupby(level=0).last()`` preserves the previous behavior of letting
+        a later-positioned (fresher) value win on duplicate timestamps.
+        """
+        per_channel_frames: dict[str, list[pd.DataFrame]] = {}
+        for page in pages:
+            for data in page:
+                for name, df in self.try_deserialize_channel_data(data).items():
+                    per_channel_frames.setdefault(name, []).append(df)
+
+        ret_data: dict[str, pd.DataFrame] = dict(initial)
+        for name, frames in per_channel_frames.items():
+            if name in ret_data:
+                # Cached slice goes first so fresher pages (positioned later
+                # in the list) win on overlapping timestamps after groupby.
+                frames.insert(0, ret_data[name])
+            if len(frames) == 1:
+                ret_data[name] = frames[0]
+            else:
+                ret_data[name] = pd.concat(frames).groupby(level=0).last()
+        return ret_data
+
     @staticmethod
     def try_deserialize_channel_data(channel_data: Any) -> dict[str, pd.DataFrame]:
         """Deserialize a channel data object into a numpy array."""
diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py
index 97fa70ddb..d0d4a67e0 100644
--- a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py
+++ b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py
@@ -16,7 +16,7 @@ class attribute that grew without bound. The instance-isolation test below is
 from __future__ import annotations
 
 from datetime import datetime, timedelta, timezone
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch
 
 import pandas as pd
 import pytest
@@ -267,3 +267,176 @@ def test_data_cache_max_bytes_zero_disables_caching(self) -> None:
         )
         assert "c1" not in client.channel_cache
         assert client.channel_cache.total_bytes == 0
+
+
+class TestMergePages:
+    """Behavioural tests for :meth:`DataLowLevelClient._merge_pages`.
+
+    The helper replaces a previously inline O(N²) per-page concat loop with a
+    single batched concat per channel. These tests pin the merge semantics so
+    a future refactor can't silently drift, in particular:
+
+    * Single-frame channels skip the concat entirely (cheap path).
+    * Multi-frame channels concat in the order frames were collected.
+    * Cached slices from ``_check_cache`` are folded in as the first frame so
+      fresher pages win on overlapping timestamps via ``groupby.last``.
+    """
+
+    @staticmethod
+    def _client_with_fake_deserializer(
+        sentinel_to_frames: dict[str, dict[str, pd.DataFrame]],
+    ):
+        """Build a DataLowLevelClient whose ``try_deserialize_channel_data``
+        translates string sentinels (passed in lieu of protos) to dicts of
+        already-built DataFrames. Lets the merge logic be tested without
+        constructing protos.
+        """
+        client = DataLowLevelClient(MagicMock())
+        patcher = patch.object(
+            DataLowLevelClient,
+            "try_deserialize_channel_data",
+            staticmethod(lambda data: sentinel_to_frames[data]),
+        )
+        patcher.start()
+        return client, patcher
+
+    @staticmethod
+    def _frame(channel: str, start: str, rows: int, offset: int = 0) -> pd.DataFrame:
+        index = pd.date_range(start, periods=rows, freq="ms", tz=timezone.utc)
+        return pd.DataFrame({channel: range(offset, offset + rows)}, index=index)
+
+    def test_empty_pages_returns_initial(self) -> None:
+        """No pages, no fresh data — initial passes through untouched."""
+        client, patcher = self._client_with_fake_deserializer({})
+        try:
+            initial_df = self._frame("chan", "2025-01-01", rows=5)
+            result = client._merge_pages(pages=[], initial={"chan": initial_df})
+            assert result["chan"] is initial_df
+        finally:
+            patcher.stop()
+
+    def test_single_frame_skips_concat(self) -> None:
+        """One frame for a channel → returned by identity, no concat call."""
+        only_df = self._frame("chan", "2025-01-01", rows=5)
+        client, patcher = self._client_with_fake_deserializer(
+            {"page_a": {"chan": only_df}}
+        )
+        try:
+            result = client._merge_pages(pages=[["page_a"]], initial={})
+            # Identity check: no concat happened, so the original frame is
+            # returned by reference.
+            assert result["chan"] is only_df
+        finally:
+            patcher.stop()
+
+    def test_disjoint_pages_concat_in_order(self) -> None:
+        """Multiple disjoint pages for one channel → single concat result."""
+        df1 = self._frame("chan", "2025-01-01", rows=10, offset=0)
+        df2 = self._frame("chan", "2025-01-02", rows=10, offset=10)
+        df3 = self._frame("chan", "2025-01-03", rows=10, offset=20)
+        client, patcher = self._client_with_fake_deserializer(
+            {
+                "p1": {"chan": df1},
+                "p2": {"chan": df2},
+                "p3": {"chan": df3},
+            }
+        )
+        try:
+            result = client._merge_pages(pages=[["p1", "p2"], ["p3"]], initial={})
+
+            expected = pd.concat([df1, df2, df3]).groupby(level=0).last()
+            pd.testing.assert_frame_equal(
+                result["chan"].sort_index(), expected.sort_index()
+            )
+            assert len(result["chan"]) == 30
+        finally:
+            patcher.stop()
+
+    def test_overlapping_timestamps_later_page_wins(self) -> None:
+        """On overlapping timestamps, the later page's value survives groupby.last.
+
+        This pins the existing behavior: the loop's old shape did
+        ``concat([acc, new]).groupby(...).last()`` which kept the LATER value
+        on conflict; the batched concat must preserve that ordering.
+        """
+        index = pd.date_range("2025-01-01", periods=5, freq="ms", tz=timezone.utc)
+        df_first = pd.DataFrame({"chan": [0] * 5}, index=index)
+        df_second = pd.DataFrame({"chan": [99] * 5}, index=index)
+        client, patcher = self._client_with_fake_deserializer(
+            {"p1": {"chan": df_first}, "p2": {"chan": df_second}}
+        )
+        try:
+            result = client._merge_pages(pages=[["p1", "p2"]], initial={})
+            assert (result["chan"]["chan"] == 99).all()
+        finally:
+            patcher.stop()
+
+    def test_cached_slice_folded_in_first_and_loses_on_overlap(self) -> None:
+        """Cached slice from ``_check_cache`` is the first frame in the merge.
+
+        Fresh pages should overwrite cached values on duplicate timestamps,
+        matching the pre-existing semantic that the latest fetch wins.
+        """
+        index = pd.date_range("2025-01-01", periods=5, freq="ms", tz=timezone.utc)
+        cached = pd.DataFrame({"chan": [-1] * 5}, index=index)
+        fresh = pd.DataFrame({"chan": [42] * 5}, index=index)
+        client, patcher = self._client_with_fake_deserializer(
+            {"p1": {"chan": fresh}}
+        )
+        try:
+            result = client._merge_pages(
+                pages=[["p1"]], initial={"chan": cached}
+            )
+            assert (result["chan"]["chan"] == 42).all()
+        finally:
+            patcher.stop()
+
+    def test_cached_only_no_pages_preserves_cache(self) -> None:
+        """Channels in ``initial`` with no fresh page data must survive intact."""
+        client, patcher = self._client_with_fake_deserializer({})
+        try:
+            cached = self._frame("chan", "2025-01-01", rows=5)
+            result = client._merge_pages(pages=[[]], initial={"chan": cached})
+            assert result["chan"] is cached
+        finally:
+            patcher.stop()
+
+    def test_multiple_channels_independent(self) -> None:
+        """Per-channel grouping is independent: one channel's pages don't bleed.
+
+        Same shape as a multi-channel ``get_data`` call where each channel
+        returns its own pages.
+        """
+        a1 = self._frame("a", "2025-01-01", rows=5, offset=0)
+        a2 = self._frame("a", "2025-01-02", rows=5, offset=5)
+        b1 = self._frame("b", "2025-01-01", rows=5, offset=100)
+        client, patcher = self._client_with_fake_deserializer(
+            {
+                "p_a1": {"a": a1},
+                "p_a2": {"a": a2},
+                "p_b1": {"b": b1},
+            }
+        )
+        try:
+            result = client._merge_pages(
+                pages=[["p_a1", "p_b1"], ["p_a2"]], initial={}
+            )
+            assert len(result["a"]) == 10
+            assert len(result["b"]) == 5
+            assert (result["b"]["b"] >= 100).all()
+        finally:
+            patcher.stop()
+
+    def test_does_not_mutate_initial(self) -> None:
+        """``initial`` is a defensive copy; caller's dict isn't mutated."""
+        cached = self._frame("chan", "2025-01-01", rows=5)
+        initial = {"chan": cached}
+        fresh = self._frame("chan", "2025-01-02", rows=5, offset=10)
+        client, patcher = self._client_with_fake_deserializer(
+            {"p1": {"chan": fresh}}
+        )
+        try:
+            _ = client._merge_pages(pages=[["p1"]], initial=initial)
+            assert initial["chan"] is cached
+        finally:
+            patcher.stop()

From 988dab6501e467ae1ccf2d7324b894cecb380380 Mon Sep 17 00:00:00 2001
From: Ian Later <ian@siftstack.com>
Date: Tue, 23 Jun 2026 20:14:25 -0700
Subject: [PATCH 03/14] lint

---
 .../_internal/low_level_wrappers/data.py      |  8 ++-----
 .../low_level_wrappers/test_data_cache.py     | 24 +++++--------------
 python/lib/sift_client/client.py              |  4 +---
 3 files changed, 9 insertions(+), 27 deletions(-)

diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data.py b/python/lib/sift_client/_internal/low_level_wrappers/data.py
index c14ba6266..238e6477c 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data.py
@@ -82,9 +82,7 @@ class ChannelCache:
 
     def __init__(self, max_bytes: int = DEFAULT_DATA_CACHE_MAX_BYTES):
         if max_bytes < 0:
-            raise ValueError(
-                f"data_cache_max_bytes must be >= 0, got {max_bytes}"
-            )
+            raise ValueError(f"data_cache_max_bytes must be >= 0, got {max_bytes}")
         self.name_id_map: dict[str, str] = {}
         self._entries: OrderedDict[str, ChannelCacheEntry] = OrderedDict()
         self._total_bytes: int = 0
@@ -321,9 +319,7 @@ def _update_cache(
 
             existing = self.channel_cache.get(channel_id)
             if existing is not None:
-                merged_data = (
-                    pd.concat([existing.data, data]).groupby(level=0).last()
-                )
+                merged_data = pd.concat([existing.data, data]).groupby(level=0).last()
                 entry = _new_cache_entry(
                     data=merged_data,
                     start_time=min(suggested_start_time, existing.start_time),
diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py
index d0d4a67e0..b0841657b 100644
--- a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py
+++ b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py
@@ -318,9 +318,7 @@ def test_empty_pages_returns_initial(self) -> None:
     def test_single_frame_skips_concat(self) -> None:
         """One frame for a channel → returned by identity, no concat call."""
         only_df = self._frame("chan", "2025-01-01", rows=5)
-        client, patcher = self._client_with_fake_deserializer(
-            {"page_a": {"chan": only_df}}
-        )
+        client, patcher = self._client_with_fake_deserializer({"page_a": {"chan": only_df}})
         try:
             result = client._merge_pages(pages=[["page_a"]], initial={})
             # Identity check: no concat happened, so the original frame is
@@ -345,9 +343,7 @@ def test_disjoint_pages_concat_in_order(self) -> None:
             result = client._merge_pages(pages=[["p1", "p2"], ["p3"]], initial={})
 
             expected = pd.concat([df1, df2, df3]).groupby(level=0).last()
-            pd.testing.assert_frame_equal(
-                result["chan"].sort_index(), expected.sort_index()
-            )
+            pd.testing.assert_frame_equal(result["chan"].sort_index(), expected.sort_index())
             assert len(result["chan"]) == 30
         finally:
             patcher.stop()
@@ -380,13 +376,9 @@ def test_cached_slice_folded_in_first_and_loses_on_overlap(self) -> None:
         index = pd.date_range("2025-01-01", periods=5, freq="ms", tz=timezone.utc)
         cached = pd.DataFrame({"chan": [-1] * 5}, index=index)
         fresh = pd.DataFrame({"chan": [42] * 5}, index=index)
-        client, patcher = self._client_with_fake_deserializer(
-            {"p1": {"chan": fresh}}
-        )
+        client, patcher = self._client_with_fake_deserializer({"p1": {"chan": fresh}})
         try:
-            result = client._merge_pages(
-                pages=[["p1"]], initial={"chan": cached}
-            )
+            result = client._merge_pages(pages=[["p1"]], initial={"chan": cached})
             assert (result["chan"]["chan"] == 42).all()
         finally:
             patcher.stop()
@@ -418,9 +410,7 @@ def test_multiple_channels_independent(self) -> None:
             }
         )
         try:
-            result = client._merge_pages(
-                pages=[["p_a1", "p_b1"], ["p_a2"]], initial={}
-            )
+            result = client._merge_pages(pages=[["p_a1", "p_b1"], ["p_a2"]], initial={})
             assert len(result["a"]) == 10
             assert len(result["b"]) == 5
             assert (result["b"]["b"] >= 100).all()
@@ -432,9 +422,7 @@ def test_does_not_mutate_initial(self) -> None:
         cached = self._frame("chan", "2025-01-01", rows=5)
         initial = {"chan": cached}
         fresh = self._frame("chan", "2025-01-02", rows=5, offset=10)
-        client, patcher = self._client_with_fake_deserializer(
-            {"p1": {"chan": fresh}}
-        )
+        client, patcher = self._client_with_fake_deserializer({"p1": {"chan": fresh}})
         try:
             _ = client._merge_pages(pages=[["p1"]], initial=initial)
             assert initial["chan"] is cached
diff --git a/python/lib/sift_client/client.py b/python/lib/sift_client/client.py
index 792025f8f..7d20fbe85 100644
--- a/python/lib/sift_client/client.py
+++ b/python/lib/sift_client/client.py
@@ -156,9 +156,7 @@ def __init__(
                 ``>= 0``.
         """
         if data_cache_max_bytes is not None and data_cache_max_bytes < 0:
-            raise ValueError(
-                f"data_cache_max_bytes must be >= 0, got {data_cache_max_bytes}"
-            )
+            raise ValueError(f"data_cache_max_bytes must be >= 0, got {data_cache_max_bytes}")
         if not (api_key and grpc_url and rest_url) and not connection_config:
             raise ValueError(
                 "Either api_key, grpc_url and rest_url or connection_config must be provided to establish a connection."

From ea259faa4736d8b7020b0ba8ecd7c6a542fc976c Mon Sep 17 00:00:00 2001
From: Ian Later <ian@siftstack.com>
Date: Wed, 24 Jun 2026 10:59:54 -0700
Subject: [PATCH 04/14] comment cleanup

---
 .../_internal/low_level_wrappers/data.py      | 35 +++----------------
 1 file changed, 4 insertions(+), 31 deletions(-)

diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data.py b/python/lib/sift_client/_internal/low_level_wrappers/data.py
index 238e6477c..381b6667d 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data.py
@@ -36,7 +36,7 @@
 REQUEST_BATCH_SIZE = 1
 
 # Default in-memory budget for cached channel DataFrames, per ``DataLowLevelClient``
-# instance. 512 MiB is well below typical pod limits while still letting common
+# instance. 512 MiB is well below typical limits while still letting common
 # interactive workloads stay in cache. Override via ``SiftClient(data_cache_max_bytes=...)``.
 DEFAULT_DATA_CACHE_MAX_BYTES = 512 * 1024 * 1024
 
@@ -46,8 +46,6 @@ class ChannelCacheEntry(BaseModel):
     data: pd.DataFrame
     start_time: datetime
     end_time: datetime
-    # ``df.memory_usage(deep=True).sum()`` at construction time. Stored on the
-    # entry so eviction is O(1) per dropped item instead of re-walking frames.
     size_bytes: int
 
 
@@ -65,19 +63,8 @@ def _new_cache_entry(
 class ChannelCache:
     """LRU-ordered, byte-bounded cache of per-channel DataFrames.
 
-    Each ``DataLowLevelClient`` owns its own ``ChannelCache``; the previous
-    implementation kept this on the class, which silently shared state across
-    every ``SiftClient`` in the process and grew without bound. Sustained pulls
-    against that shared cache OOM'd long-running pods.
-
-    Bookkeeping invariant: ``_total_bytes == sum(e.size_bytes for e in _entries.values())``.
-    Maintained by every mutation path so the bound is checked in O(1) without
-    re-walking entries.
-
     ``max_bytes <= 0`` disables retention: every ``get`` misses, ``put`` returns
-    without storing. ``name_id_map`` is intentionally outside the bound — it's
-    a tiny string→string map and forms part of the contract with ``_update_cache``,
-    which depends on it to translate channel names to ids.
+    without storing.
     """
 
     def __init__(self, max_bytes: int = DEFAULT_DATA_CACHE_MAX_BYTES):
@@ -117,7 +104,7 @@ def get(self, channel_id: str) -> ChannelCacheEntry | None:
         return entry
 
     def put(self, channel_id: str, entry: ChannelCacheEntry) -> None:
-        """Insert or replace ``channel_id``, then evict LRU until under the bound.
+        """Insert or replace ``channel_id``, then evict LRU until within size bounds.
 
         Reclaims any prior entry's byte count BEFORE adding the new one's, so a
         re-insert (e.g. concat-merge of fresh data into an existing entry)
@@ -144,9 +131,7 @@ def clear(self) -> None:
     def _evict_until_under_bound(self) -> None:
         # ``popitem(last=False)`` drops the oldest entry. A single fresh entry
         # whose ``size_bytes`` alone exceeds ``max_bytes`` ends up evicted on
-        # the final iteration — the deliberate choice over "keep the oversized
-        # entry and violate the bound" or "evict everyone else and still
-        # violate the bound."
+        # the final iteration.
         while self._entries and self._total_bytes > self._max_bytes:
             _, dropped = self._entries.popitem(last=False)
             self._total_bytes -= dropped.size_bytes
@@ -220,8 +205,6 @@ def _filter_cached_channels(self, channel_ids: list[str]) -> tuple[list[str], li
         cached_channels = []
         not_cached_channels = []
         for channel_id in channel_ids:
-            # ``__contains__`` is a non-promoting peek; ``_check_cache`` does
-            # the LRU-touching ``get`` shortly after for the actual lookup.
             if channel_id in self.channel_cache:
                 cached_channels.append(channel_id)
             else:
@@ -410,10 +393,6 @@ async def get_channel_data(
         pages = await asyncio.gather(*tasks)
         ret_data = self._merge_pages(pages, initial=ret_data)
 
-        # ``ignore_cache=True`` is documented as a read-side bypass, but the
-        # previous implementation still wrote to the shared cache on every
-        # call, which meant a "non-caching" workload still grew the cache
-        # without bound. Skip writes when the caller asked us to ignore it.
         if not ignore_cache:
             self._update_cache(
                 channel_data=ret_data, start_time=start_time, end_time=end_time, run_id=run_id
@@ -429,12 +408,6 @@ def _merge_pages(
     ) -> dict[str, pd.DataFrame]:
         """Flatten paged channel data + any cached slices into one DataFrame per channel.
 
-        Replaces a per-page ``pd.concat(...).groupby(...)`` loop that was
-        O(N²) in the number of pages — each iteration copied the cumulative
-        DataFrame — with a single batched concat per channel. At realistic
-        pagination depths the speedup is large: 200 pages of 10k rows each
-        drops from ~11 s to ~130 ms in the bench.
-
         ``initial`` carries any cached slices already populated by
         ``_check_cache``. Cached entries are folded in as the first frame for
         their channel so they participate in the same final concat;

From 5cf780698ecc3fb29732187e87d6c7b83a7b0b38 Mon Sep 17 00:00:00 2001
From: Ian Later <ian@siftstack.com>
Date: Wed, 24 Jun 2026 11:46:48 -0700
Subject: [PATCH 05/14] Add test coverage for cache and data shape.

---
 .../_internal/low_level_wrappers/test_data.py | 539 ++++++++++++++++++
 .../low_level_wrappers/test_data_cache.py     | 430 --------------
 2 files changed, 539 insertions(+), 430 deletions(-)
 create mode 100644 python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
 delete mode 100644 python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py

diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
new file mode 100644
index 000000000..e2b12cecf
--- /dev/null
+++ b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
@@ -0,0 +1,539 @@
+"""Tests for :mod:`sift_client._internal.low_level_wrappers.data`.
+
+Four classes, narrowest scope first:
+
+* :class:`TestChannelCache` — pure ``ChannelCache`` unit tests (byte
+  accounting, LRU promotion, eviction).
+* :class:`TestMergePages` — ``DataLowLevelClient._merge_pages``, the
+  per-channel concat helper.
+* :class:`TestDataLowLevelClient` — constructor wiring and per-instance
+  isolation.
+* :class:`TestGetChannelData` — end-to-end on the public
+  ``get_channel_data`` API against a mocked ``_get_data_impl``.
+
+The OOM regression that motivated this code happened because the cache was
+a class attribute that grew without bound. ``test_per_instance_isolation``
+is the canary that catches anyone re-introducing that pattern.
+"""
+
+from __future__ import annotations
+
+from contextlib import contextmanager
+from datetime import datetime, timedelta, timezone
+from typing import Any, Iterator
+from unittest.mock import MagicMock, patch
+
+import pandas as pd
+import pytest
+
+from sift_client._internal.low_level_wrappers.data import (
+    DEFAULT_DATA_CACHE_MAX_BYTES,
+    ChannelCache,
+    ChannelCacheEntry,
+    DataLowLevelClient,
+    _new_cache_entry,
+)
+from sift_client.sift_types.channel import Channel, ChannelDataType
+
+_NOW = datetime(2025, 1, 1, tzinfo=timezone.utc)
+_WINDOW_END = _NOW + timedelta(days=1)
+
+
+# ---------- shared helpers -----------
+
+
+def _frame(
+    cid: str = "value",
+    *,
+    rows: int = 5,
+    start: datetime = _NOW,
+    offset: int = 0,
+    freq: str = "ms",
+    value_dtype: str = "float64",
+) -> pd.DataFrame:
+    """DataFrame indexed by a tz-aware DatetimeIndex with ``rows`` rows."""
+    index = pd.date_range(start, periods=rows, freq=freq, tz=timezone.utc)
+    return pd.DataFrame(
+        {cid: [(offset + i) * 1.0 for i in range(rows)]},
+        index=index,
+    ).astype({cid: value_dtype})
+
+
+def _entry(*, rows: int = 5, value_dtype: str = "float64") -> ChannelCacheEntry:
+    """``ChannelCacheEntry`` wrapping a small generated DataFrame."""
+    data = _frame(rows=rows, value_dtype=value_dtype)
+    return _new_cache_entry(
+        data=data,
+        start_time=data.index[0].to_pydatetime(),
+        end_time=data.index[-1].to_pydatetime(),
+    )
+
+
+def _channel(cid: str) -> Channel:
+    """Minimal ``Channel`` with required fields populated."""
+    return Channel(
+        id_=cid,
+        name=cid,
+        data_type=ChannelDataType.DOUBLE,
+        description="",
+        unit="",
+        asset_id="a1",
+        is_archived=False,
+        created_date=_NOW,
+        modified_date=_NOW,
+        created_by_user_id="u1",
+        modified_by_user_id="u1",
+    )
+
+
+def _invariant_holds(cache: ChannelCache) -> bool:
+    """``total_bytes`` must equal the sum of per-entry sizes at all times."""
+    return cache.total_bytes == sum(e.size_bytes for e in cache._entries.values())
+
+
+def _patch_deserializer(sentinel_to_frames: dict[str, dict[str, pd.DataFrame]]) -> Any:
+    """Patch ``try_deserialize_channel_data`` to translate string sentinels.
+
+    Lets tests pass strings in lieu of protos. Returned object is a context
+    manager; callers use ``with _patch_deserializer(...):``.
+    """
+    return patch.object(
+        DataLowLevelClient,
+        "try_deserialize_channel_data",
+        staticmethod(lambda s: sentinel_to_frames[s]),
+    )
+
+
+@contextmanager
+def _fake_grpc(
+    client: DataLowLevelClient,
+    channel_to_pages: dict[str, list[pd.DataFrame]],
+) -> Iterator[list[dict[str, Any]]]:
+    """Mock the gRPC boundary so each "page" is a sentinel string.
+
+    ``_get_data_impl`` is replaced with a coroutine that pops one DataFrame
+    off ``channel_to_pages[cid]`` per call per channel, until exhausted.
+    ``try_deserialize_channel_data`` is patched to map the sentinel back to
+    the corresponding ``{channel: DataFrame}`` dict.
+
+    Yields a ``call_log`` list so tests can assert which channels actually
+    hit the wire. The patch is torn down and ``_get_data_impl`` restored on
+    exit.
+    """
+    sentinel_to_frames: dict[str, dict[str, pd.DataFrame]] = {}
+    next_page_index: dict[str, int] = dict.fromkeys(channel_to_pages, 0)
+    call_log: list[dict[str, Any]] = []
+
+    async def fake_impl(
+        *,
+        channel_ids: list[str],
+        page_size: int | None = None,
+        page_token: str | None = None,
+        order_by: str | None = None,
+        **kwargs: Any,
+    ) -> tuple[list[str], str]:
+        call_log.append({"channel_ids": list(channel_ids), **kwargs})
+        data: list[str] = []
+        more_remaining = False
+        for cid in channel_ids:
+            i = next_page_index[cid]
+            if i >= len(channel_to_pages[cid]):
+                continue  # this channel is exhausted; just emit nothing
+            sentinel = f"{cid}|{i}"
+            sentinel_to_frames[sentinel] = {cid: channel_to_pages[cid][i]}
+            data.append(sentinel)
+            next_page_index[cid] += 1
+            if next_page_index[cid] < len(channel_to_pages[cid]):
+                more_remaining = True
+        # ``_handle_pagination`` loops until it sees ``page_token == ""``.
+        return data, ("next" if more_remaining else "")
+
+    original_impl = client._get_data_impl
+    client._get_data_impl = fake_impl  # type: ignore[method-assign]
+    try:
+        with _patch_deserializer(sentinel_to_frames):
+            yield call_log
+    finally:
+        client._get_data_impl = original_impl  # type: ignore[method-assign]
+
+
+# ---------- tests -----------
+
+
+class TestChannelCache:
+    """Byte accounting, LRU promotion, eviction."""
+
+    def test_put_get_roundtrip_and_size_replacement(self) -> None:
+        """First put records size; second put on same key replaces it.
+
+        Without size reclamation on the second put, ``total_bytes`` would
+        double-count and trip the eviction loop on the next insert.
+        """
+        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
+        small, big = _entry(rows=10), _entry(rows=1000)
+        cache.put("c1", small)
+        assert cache.get("c1") is small
+        assert cache.total_bytes == small.size_bytes
+        cache.put("c1", big)
+        assert cache.get("c1") is big
+        assert cache.total_bytes == big.size_bytes  # not small + big
+        assert _invariant_holds(cache)
+
+    def test_invalidate(self) -> None:
+        """Removes a present entry and decrements bytes; no-op for missing keys."""
+        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
+        cache.invalidate("never_added")  # safe before any puts
+        assert cache.total_bytes == 0
+        cache.put("c1", _entry(rows=10))
+        cache.invalidate("c1")
+        assert cache.get("c1") is None
+        assert cache.total_bytes == 0
+        assert _invariant_holds(cache)
+
+    def test_clear(self) -> None:
+        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
+        cache.put("c1", _entry(rows=10))
+        cache.put("c2", _entry(rows=20))
+        cache.clear()
+        assert cache.total_bytes == 0
+        assert len(cache) == 0
+        assert _invariant_holds(cache)
+
+    def test_oldest_entry_evicted_first(self) -> None:
+        """Insertion order determines eviction when only puts have happened."""
+        a, b, c = _entry(rows=50), _entry(rows=50), _entry(rows=50)
+        cache = ChannelCache(max_bytes=a.size_bytes + b.size_bytes)  # room for two
+        cache.put("a", a)
+        cache.put("b", b)
+        cache.put("c", c)  # evicts "a"
+        assert "a" not in cache
+        assert "b" in cache
+        assert "c" in cache
+        assert cache.total_bytes <= a.size_bytes + b.size_bytes
+        assert _invariant_holds(cache)
+
+    def test_get_promotes_to_most_recent(self) -> None:
+        """Reading an entry must protect it from the next eviction."""
+        a, b, c = _entry(rows=50), _entry(rows=50), _entry(rows=50)
+        cache = ChannelCache(max_bytes=a.size_bytes + b.size_bytes)
+        cache.put("a", a)
+        cache.put("b", b)
+        assert cache.get("a") is a  # promote a
+        cache.put("c", c)  # b is now oldest, gets evicted
+        assert "a" in cache
+        assert "b" not in cache
+        assert "c" in cache
+        assert _invariant_holds(cache)
+
+    def test_oversized_entry_evicts_with_neighbours(self) -> None:
+        """A single entry larger than the cap ends up evicted itself.
+
+        The alternative ("keep the oversized entry and accept that the cap
+        is soft") would silently reintroduce unbounded growth for any
+        workload whose typical entry is bigger than ``max_bytes``.
+        """
+        small_a, small_b, oversized = _entry(rows=10), _entry(rows=10), _entry(rows=10_000)
+        cache = ChannelCache(max_bytes=small_a.size_bytes + small_b.size_bytes)
+        cache.put("a", small_a)
+        cache.put("b", small_b)
+        cache.put("huge", oversized)
+        assert "huge" not in cache
+        # Every other entry was evicted in the failed attempt to make room.
+        assert "a" not in cache
+        assert "b" not in cache
+        assert cache.total_bytes == 0
+        assert _invariant_holds(cache)
+
+    def test_max_bytes_zero_disables_cache(self) -> None:
+        cache = ChannelCache(max_bytes=0)
+        cache.put("c1", _entry(rows=100))
+        assert not cache.enabled
+        assert cache.get("c1") is None
+        assert cache.total_bytes == 0
+        assert len(cache) == 0
+
+    def test_negative_max_bytes_raises(self) -> None:
+        with pytest.raises(ValueError, match="data_cache_max_bytes"):
+            ChannelCache(max_bytes=-1)
+
+    def test_repeated_concat_updates_stay_under_bound(self) -> None:
+        """Simulates the customer's sliding-window pull: same channel, growing.
+
+        Without size reclamation on update, ``total_bytes`` would creep
+        above the cap silently. We re-build the entry each iteration to
+        mimic the ``_update_cache`` concat path.
+        """
+        cap = 1_000_000  # ~1 MB
+        cache = ChannelCache(max_bytes=cap)
+        accumulated = pd.DataFrame()
+        for i in range(50):
+            chunk = _frame(rows=1000, start=_NOW + timedelta(seconds=i), freq="us")
+            accumulated = pd.concat([accumulated, chunk])
+            cache.put(
+                "c1",
+                _new_cache_entry(
+                    data=accumulated,
+                    start_time=accumulated.index[0].to_pydatetime(),
+                    end_time=accumulated.index[-1].to_pydatetime(),
+                ),
+            )
+            assert cache.total_bytes <= cap, (
+                f"iteration {i}: total_bytes={cache.total_bytes} exceeded cap={cap}"
+            )
+            assert _invariant_holds(cache)
+
+
+class TestMergePages:
+    """Behaviour of :meth:`DataLowLevelClient._merge_pages`.
+
+    The helper replaces a previously inline O(N²) per-page concat loop with
+    a single batched concat per channel. These tests pin the merge
+    semantics so a future refactor can't silently drift:
+
+    * Single-frame channels skip the concat entirely (cheap identity path).
+    * Multi-frame channels concat in collected order; ``groupby.last``
+      makes the latest frame win on overlapping timestamps.
+    * Cached slices from ``_check_cache`` are folded in as the *first*
+      frame so fresh pages still win on overlap.
+    """
+
+    @pytest.mark.parametrize(
+        "pages", [[], [[]]], ids=["no_tasks_queued", "task_returned_empty"]
+    )
+    def test_no_fresh_data_returns_initial(self, pages: list) -> None:
+        """No fresh pages → initial dict passes through by identity."""
+        client = DataLowLevelClient(MagicMock())
+        initial_df = _frame("chan", rows=5)
+        with _patch_deserializer({}):
+            result = client._merge_pages(pages=pages, initial={"chan": initial_df})
+        assert result["chan"] is initial_df
+
+    def test_single_frame_skips_concat(self) -> None:
+        """One frame for a channel → returned by identity, no concat call."""
+        only_df = _frame("chan", rows=5)
+        client = DataLowLevelClient(MagicMock())
+        with _patch_deserializer({"p1": {"chan": only_df}}):
+            result = client._merge_pages(pages=[["p1"]], initial={})
+        assert result["chan"] is only_df
+
+    def test_disjoint_pages_concat_in_order(self) -> None:
+        """Multiple disjoint pages for one channel → single concat result."""
+        df1 = _frame("chan", rows=10, start=_NOW, offset=0, freq="s")
+        df2 = _frame("chan", rows=10, start=_NOW + timedelta(minutes=1), offset=10, freq="s")
+        df3 = _frame("chan", rows=10, start=_NOW + timedelta(minutes=2), offset=20, freq="s")
+        client = DataLowLevelClient(MagicMock())
+        sentinels = {"p1": {"chan": df1}, "p2": {"chan": df2}, "p3": {"chan": df3}}
+        with _patch_deserializer(sentinels):
+            result = client._merge_pages(pages=[["p1", "p2"], ["p3"]], initial={})
+        expected = pd.concat([df1, df2, df3]).groupby(level=0).last()
+        pd.testing.assert_frame_equal(result["chan"].sort_index(), expected.sort_index())
+        assert len(result["chan"]) == 30
+
+    def test_overlapping_timestamps_later_page_wins(self) -> None:
+        """On overlap, the later page's value survives ``groupby.last``.
+
+        Pins the old inline ``concat([acc, new]).groupby(level=0).last()``
+        semantic: latest concat position wins on conflict.
+        """
+        index = pd.date_range(_NOW, periods=5, freq="ms", tz=timezone.utc)
+        df_first = pd.DataFrame({"chan": [0] * 5}, index=index)
+        df_second = pd.DataFrame({"chan": [99] * 5}, index=index)
+        client = DataLowLevelClient(MagicMock())
+        with _patch_deserializer({"p1": {"chan": df_first}, "p2": {"chan": df_second}}):
+            result = client._merge_pages(pages=[["p1", "p2"]], initial={})
+        assert (result["chan"]["chan"] == 99).all()
+
+    def test_cached_slice_folded_in_first_and_loses_on_overlap(self) -> None:
+        """Cached slice from ``_check_cache`` is the first frame in the merge.
+
+        Fresh pages must overwrite cached values on duplicate timestamps,
+        matching the pre-existing "latest fetch wins" semantic.
+        """
+        index = pd.date_range(_NOW, periods=5, freq="ms", tz=timezone.utc)
+        cached = pd.DataFrame({"chan": [-1] * 5}, index=index)
+        fresh = pd.DataFrame({"chan": [42] * 5}, index=index)
+        client = DataLowLevelClient(MagicMock())
+        with _patch_deserializer({"p1": {"chan": fresh}}):
+            result = client._merge_pages(pages=[["p1"]], initial={"chan": cached})
+        assert (result["chan"]["chan"] == 42).all()
+
+    def test_multiple_channels_independent(self) -> None:
+        """Per-channel grouping is independent: one channel's pages don't bleed."""
+        a1 = _frame("a", rows=5, start=_NOW, offset=0, freq="s")
+        a2 = _frame("a", rows=5, start=_NOW + timedelta(minutes=1), offset=5, freq="s")
+        b1 = _frame("b", rows=5, start=_NOW, offset=100, freq="s")
+        client = DataLowLevelClient(MagicMock())
+        sentinels = {"p_a1": {"a": a1}, "p_a2": {"a": a2}, "p_b1": {"b": b1}}
+        with _patch_deserializer(sentinels):
+            result = client._merge_pages(pages=[["p_a1", "p_b1"], ["p_a2"]], initial={})
+        assert len(result["a"]) == 10
+        assert len(result["b"]) == 5
+        assert (result["b"]["b"] >= 100).all()
+
+    def test_does_not_mutate_initial(self) -> None:
+        """``initial`` is a defensive copy; caller's dict isn't mutated."""
+        cached = _frame("chan", rows=5)
+        initial = {"chan": cached}
+        fresh = _frame("chan", rows=5, start=_NOW + timedelta(seconds=1), offset=10)
+        client = DataLowLevelClient(MagicMock())
+        with _patch_deserializer({"p1": {"chan": fresh}}):
+            client._merge_pages(pages=[["p1"]], initial=initial)
+        assert initial["chan"] is cached
+
+
+class TestDataLowLevelClient:
+    """Constructor wiring and per-instance isolation.
+
+    Per-call behaviour (cache hits, ``ignore_cache``, pagination) lives in
+    :class:`TestGetChannelData`.
+    """
+
+    def test_per_instance_isolation(self) -> None:
+        """Two clients must not share cache state.
+
+        Regression test for the original OOM bug: ``channel_cache`` was a
+        class attribute, so every ``SiftClient`` in the process appended to
+        the same dict. Two fresh clients must have independent caches.
+        """
+        client_a = DataLowLevelClient(MagicMock())
+        client_b = DataLowLevelClient(MagicMock())
+        client_a.channel_cache.put("c1", _entry(rows=10))
+        assert "c1" in client_a.channel_cache
+        assert "c1" not in client_b.channel_cache
+        assert client_b.channel_cache.total_bytes == 0
+
+    def test_data_cache_max_bytes_kwarg_propagates(self) -> None:
+        """``data_cache_max_bytes`` is forwarded to the underlying cache.
+
+        The disabled-cache *behaviour* itself is covered by
+        :meth:`TestChannelCache.test_max_bytes_zero_disables_cache`; this
+        test just verifies the constructor passes the kwarg through.
+        """
+        assert DataLowLevelClient(MagicMock(), data_cache_max_bytes=0).channel_cache.max_bytes == 0
+        assert DataLowLevelClient(MagicMock(), data_cache_max_bytes=42).channel_cache.max_bytes == 42
+
+
+class TestGetChannelData:
+    """End-to-end assertions on the public ``get_channel_data`` return shape."""
+
+    @pytest.mark.asyncio
+    async def test_single_page_per_channel(self) -> None:
+        """Result is keyed by channel name; single-page frames pass through unchanged."""
+        client = DataLowLevelClient(MagicMock())
+        c1_df, c2_df = _frame("c1"), _frame("c2", offset=100)
+        with _fake_grpc(client, {"c1": [c1_df], "c2": [c2_df]}):
+            result = await client.get_channel_data(
+                channels=[_channel("c1"), _channel("c2")],
+                start_time=_NOW,
+                end_time=_WINDOW_END,
+                ignore_cache=True,
+            )
+        assert set(result.keys()) == {"c1", "c2"}
+        pd.testing.assert_frame_equal(result["c1"], c1_df)
+        pd.testing.assert_frame_equal(result["c2"], c2_df)
+
+    @pytest.mark.asyncio
+    async def test_multi_page_response_concatenated_per_channel(self) -> None:
+        """Three disjoint pages for one channel → single merged frame.
+
+        Catches regressions in the ``_handle_pagination`` + ``_merge_pages``
+        interaction (the perf fix's batched concat must still produce the
+        full 30-row contiguous result).
+        """
+        client = DataLowLevelClient(MagicMock())
+        p1 = _frame("c1", rows=10, start=_NOW, offset=0)
+        p2 = _frame("c1", rows=10, start=_NOW + timedelta(seconds=1), offset=10)
+        p3 = _frame("c1", rows=10, start=_NOW + timedelta(seconds=2), offset=20)
+        with _fake_grpc(client, {"c1": [p1, p2, p3]}):
+            result = await client.get_channel_data(
+                channels=[_channel("c1")],
+                start_time=_NOW,
+                end_time=_WINDOW_END,
+                ignore_cache=True,
+            )
+        assert set(result.keys()) == {"c1"}
+        assert len(result["c1"]) == 30
+        expected = pd.concat([p1, p2, p3]).groupby(level=0).last()
+        pd.testing.assert_frame_equal(result["c1"].sort_index(), expected.sort_index())
+
+    @pytest.mark.asyncio
+    async def test_cache_hit_short_circuits_grpc(self) -> None:
+        """Second request for the same channel + window skips ``_get_data_impl``.
+
+        Stages two pages-worth of data so a faulty cache that falls through
+        wouldn't silently pass by hitting EOF — any second-call invocation
+        would consume the second page and bump ``len(call_log)``.
+        """
+        client = DataLowLevelClient(MagicMock())
+        df = _frame("c1")
+        with _fake_grpc(client, {"c1": [df, df]}) as call_log:
+            first = await client.get_channel_data(
+                channels=[_channel("c1")],
+                start_time=_NOW,
+                end_time=_WINDOW_END,
+            )
+            calls_after_first = len(call_log)
+            assert calls_after_first >= 1
+
+            second = await client.get_channel_data(
+                channels=[_channel("c1")],
+                start_time=_NOW,
+                end_time=_WINDOW_END,
+            )
+            assert len(call_log) == calls_after_first, (
+                "second call should be served from cache without invoking _get_data_impl"
+            )
+        pd.testing.assert_frame_equal(first["c1"].sort_index(), second["c1"].sort_index())
+
+    @pytest.mark.asyncio
+    async def test_partial_cache_hit_merges_cached_and_fresh(self) -> None:
+        """Cached + uncached channels resolved together in one return dict.
+
+        Only the uncached channel triggers ``_get_data_impl``.
+        """
+        client = DataLowLevelClient(MagicMock())
+        c1_df, c2_df = _frame("c1"), _frame("c2", offset=100)
+        with _fake_grpc(client, {"c1": [c1_df], "c2": [c2_df]}) as call_log:
+            await client.get_channel_data(
+                channels=[_channel("c1")],
+                start_time=_NOW,
+                end_time=_WINDOW_END,
+            )
+            calls_after_warmup = len(call_log)
+
+            result = await client.get_channel_data(
+                channels=[_channel("c1"), _channel("c2")],
+                start_time=_NOW,
+                end_time=_WINDOW_END,
+            )
+            new_calls = call_log[calls_after_warmup:]
+
+        assert new_calls, "c2 should hit the wire on the second call"
+        for call in new_calls:
+            assert call["channel_ids"] == ["c2"], (
+                f"only c2 should hit the wire, saw {call!r}"
+            )
+        assert set(result.keys()) == {"c1", "c2"}
+        pd.testing.assert_frame_equal(result["c1"].sort_index(), c1_df.sort_index())
+        pd.testing.assert_frame_equal(result["c2"].sort_index(), c2_df.sort_index())
+
+    @pytest.mark.asyncio
+    async def test_ignore_cache_true_returns_fresh_and_skips_write(self) -> None:
+        """``ignore_cache=True`` returns mock data and leaves the cache empty.
+
+        End-to-end version of the latent bug that compounded the customer's
+        OOM: pre-fix, ``_update_cache`` ran even when the caller had asked
+        the cache to be ignored.
+        """
+        client = DataLowLevelClient(MagicMock())
+        df = _frame("c1")
+        with _fake_grpc(client, {"c1": [df]}):
+            result = await client.get_channel_data(
+                channels=[_channel("c1")],
+                start_time=_NOW,
+                end_time=_WINDOW_END,
+                ignore_cache=True,
+            )
+        pd.testing.assert_frame_equal(result["c1"], df)
+        assert "c1" not in client.channel_cache
+        assert client.channel_cache.total_bytes == 0
diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py
deleted file mode 100644
index b0841657b..000000000
--- a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data_cache.py
+++ /dev/null
@@ -1,430 +0,0 @@
-"""Tests for the channel data cache in :mod:`sift_client._internal.low_level_wrappers.data`.
-
-Two layers covered here:
-
-* :class:`ChannelCache` directly — byte accounting, LRU promotion, eviction,
-  edge cases. These tests construct cache entries from real (tiny) DataFrames
-  so the size measurement code is exercised end-to-end.
-* :class:`DataLowLevelClient` — ``ignore_cache=True`` skipping writes,
-  per-instance cache isolation, ``data_cache_max_bytes=0`` disabling cache.
-
-The OOM regression that motivated this code happened because the cache was a
-class attribute that grew without bound. The instance-isolation test below is
-the canary that catches anyone re-introducing that pattern.
-"""
-
-from __future__ import annotations
-
-from datetime import datetime, timedelta, timezone
-from unittest.mock import MagicMock, patch
-
-import pandas as pd
-import pytest
-
-from sift_client._internal.low_level_wrappers.data import (
-    DEFAULT_DATA_CACHE_MAX_BYTES,
-    ChannelCache,
-    ChannelCacheEntry,
-    DataLowLevelClient,
-    _new_cache_entry,
-)
-
-
-def _entry(rows: int, *, value_dtype: str = "float64") -> ChannelCacheEntry:
-    """Build a ChannelCacheEntry with ``rows`` rows of fake data."""
-    index = pd.date_range("2025-01-01", periods=rows, freq="ms", tz=timezone.utc)
-    data = pd.DataFrame({"value": range(rows)}, index=index).astype({"value": value_dtype})
-    return _new_cache_entry(
-        data=data,
-        start_time=index[0].to_pydatetime(),
-        end_time=index[-1].to_pydatetime(),
-    )
-
-
-def _invariant_holds(cache: ChannelCache) -> bool:
-    return cache.total_bytes == sum(e.size_bytes for e in cache._entries.values())
-
-
-class TestChannelCacheBookkeeping:
-    """Tight checks on the internal byte counter and ordering."""
-
-    def test_put_get_roundtrip(self) -> None:
-        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
-        entry = _entry(rows=10)
-        cache.put("c1", entry)
-
-        assert cache.get("c1") is entry
-        assert cache.total_bytes == entry.size_bytes
-        assert _invariant_holds(cache)
-
-    def test_put_replaces_size_accounting(self) -> None:
-        """A second put for the same key must reclaim the prior size first."""
-        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
-        small = _entry(rows=10)
-        big = _entry(rows=1000)
-
-        cache.put("c1", small)
-        cache.put("c1", big)
-
-        # Total reflects only the second entry, never small + big.
-        assert cache.total_bytes == big.size_bytes
-        assert cache.get("c1") is big
-        assert _invariant_holds(cache)
-
-    def test_invalidate_drops_byte_count(self) -> None:
-        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
-        cache.put("c1", _entry(rows=10))
-        cache.invalidate("c1")
-
-        assert cache.get("c1") is None
-        assert cache.total_bytes == 0
-        assert _invariant_holds(cache)
-
-    def test_invalidate_missing_is_noop(self) -> None:
-        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
-        cache.invalidate("nope")
-        assert cache.total_bytes == 0
-
-    def test_clear_empties_total(self) -> None:
-        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
-        cache.put("c1", _entry(rows=10))
-        cache.put("c2", _entry(rows=20))
-        cache.clear()
-
-        assert cache.total_bytes == 0
-        assert len(cache) == 0
-        assert _invariant_holds(cache)
-
-
-class TestChannelCacheEviction:
-    """Eviction policy: LRU, byte-bounded, oversized-entry-dropped."""
-
-    def test_oldest_entry_evicted_first(self) -> None:
-        """Insertion order determines who goes when only inserts have happened."""
-        a, b, c = _entry(rows=50), _entry(rows=50), _entry(rows=50)
-        cap = a.size_bytes + b.size_bytes  # room for exactly two
-        cache = ChannelCache(max_bytes=cap)
-
-        cache.put("a", a)
-        cache.put("b", b)
-        cache.put("c", c)  # forces eviction of "a"
-
-        assert "a" not in cache
-        assert "b" in cache
-        assert "c" in cache
-        assert cache.total_bytes <= cap
-        assert _invariant_holds(cache)
-
-    def test_get_promotes_to_most_recent(self) -> None:
-        """Reading an entry must protect it from the next eviction."""
-        a, b, c = _entry(rows=50), _entry(rows=50), _entry(rows=50)
-        cap = a.size_bytes + b.size_bytes
-        cache = ChannelCache(max_bytes=cap)
-
-        cache.put("a", a)
-        cache.put("b", b)
-        assert cache.get("a") is a  # promote
-        cache.put("c", c)  # now "b" is the oldest and should be evicted
-
-        assert "a" in cache
-        assert "b" not in cache
-        assert "c" in cache
-        assert _invariant_holds(cache)
-
-    def test_oversized_entry_evicts_with_neighbours(self) -> None:
-        """A single entry larger than the cap ends up evicted itself.
-
-        The alternative ("keep the oversized entry and accept that the cap is
-        soft") would silently reintroduce the unbounded-growth bug for any
-        workload whose typical entry is bigger than ``max_bytes``.
-        """
-        small_a, small_b = _entry(rows=10), _entry(rows=10)
-        oversized = _entry(rows=10_000)
-        cap = small_a.size_bytes + small_b.size_bytes  # comfortably below ``oversized``
-        cache = ChannelCache(max_bytes=cap)
-
-        cache.put("a", small_a)
-        cache.put("b", small_b)
-        cache.put("huge", oversized)
-
-        assert "huge" not in cache
-        # Every other entry was evicted in the failed attempt to make room.
-        assert "a" not in cache
-        assert "b" not in cache
-        assert cache.total_bytes == 0
-        assert _invariant_holds(cache)
-
-    def test_max_bytes_zero_disables_cache(self) -> None:
-        cache = ChannelCache(max_bytes=0)
-        cache.put("c1", _entry(rows=100))
-
-        assert not cache.enabled
-        assert cache.get("c1") is None
-        assert cache.total_bytes == 0
-        assert len(cache) == 0
-
-    def test_negative_max_bytes_raises(self) -> None:
-        with pytest.raises(ValueError, match="data_cache_max_bytes"):
-            ChannelCache(max_bytes=-1)
-
-    def test_repeated_concat_updates_stay_under_bound(self) -> None:
-        """Simulates the customer's sliding-window pull: same channel, growing.
-
-        Without size reclamation on update, ``total_bytes`` would creep above
-        the cap silently. We re-build the entry each iteration to mimic the
-        ``_update_cache`` concat path.
-        """
-        cap = 1_000_000  # ~1 MB
-        cache = ChannelCache(max_bytes=cap)
-        accumulated = pd.DataFrame()
-        for i in range(50):
-            chunk = pd.DataFrame(
-                {"value": range(1000)},
-                index=pd.date_range(
-                    datetime(2025, 1, 1, tzinfo=timezone.utc) + timedelta(seconds=i),
-                    periods=1000,
-                    freq="us",
-                ),
-            )
-            accumulated = pd.concat([accumulated, chunk])
-            cache.put(
-                "c1",
-                _new_cache_entry(
-                    data=accumulated,
-                    start_time=accumulated.index[0].to_pydatetime(),
-                    end_time=accumulated.index[-1].to_pydatetime(),
-                ),
-            )
-            assert cache.total_bytes <= cap, (
-                f"iteration {i}: total_bytes={cache.total_bytes} exceeded cap={cap}"
-            )
-            assert _invariant_holds(cache)
-
-
-class TestDataLowLevelClientIntegration:
-    """End-to-end checks on the constructor wiring and ignore_cache semantics."""
-
-    def test_per_instance_isolation(self) -> None:
-        """Two clients must not share cache state.
-
-        This is the regression test for the original OOM bug: ``channel_cache``
-        was a class attribute, so every ``SiftClient`` in the process appended
-        to the same dict. Construct two clients, populate one, the other must
-        stay empty.
-        """
-        client_a = DataLowLevelClient(MagicMock())
-        client_b = DataLowLevelClient(MagicMock())
-
-        client_a.channel_cache.put("c1", _entry(rows=10))
-
-        assert "c1" in client_a.channel_cache
-        assert "c1" not in client_b.channel_cache
-        assert client_b.channel_cache.total_bytes == 0
-
-    def test_ignore_cache_skips_writes(self) -> None:
-        """``ignore_cache=True`` must not populate the cache.
-
-        Previously the read path was bypassed but ``_update_cache`` still ran
-        unconditionally, so a "non-caching" workload still grew memory until
-        OOM. Verify by exercising ``_update_cache`` only when ``ignore_cache``
-        is false.
-        """
-        client = DataLowLevelClient(MagicMock())
-        client.channel_cache.name_id_map["chan"] = "c1"
-
-        index = pd.date_range("2025-01-01", periods=5, freq="ms", tz=timezone.utc)
-        df = pd.DataFrame({"value": range(5)}, index=index)
-
-        # Real ``get_channel_data`` would call ``_update_cache`` from inside an
-        # ``if not ignore_cache`` branch; assert the helper itself is what
-        # writes, and that ``get_channel_data`` doesn't invoke it when
-        # ``ignore_cache=True``. We verify the branch directly to keep this
-        # test free of gRPC stubbing.
-        client._update_cache(
-            channel_data={"chan": df},
-            start_time=index[0].to_pydatetime(),
-            end_time=index[-1].to_pydatetime(),
-        )
-        assert "c1" in client.channel_cache
-
-        # Skipping the call (as ``get_channel_data`` does when ignore_cache is
-        # true) leaves the cache untouched.
-        client.channel_cache.invalidate("c1")
-        assert "c1" not in client.channel_cache
-
-    def test_data_cache_max_bytes_zero_disables_caching(self) -> None:
-        """Constructor knob: ``data_cache_max_bytes=0`` → no cache writes land."""
-        client = DataLowLevelClient(MagicMock(), data_cache_max_bytes=0)
-        client.channel_cache.name_id_map["chan"] = "c1"
-
-        index = pd.date_range("2025-01-01", periods=5, freq="ms", tz=timezone.utc)
-        df = pd.DataFrame({"value": range(5)}, index=index)
-
-        client._update_cache(
-            channel_data={"chan": df},
-            start_time=index[0].to_pydatetime(),
-            end_time=index[-1].to_pydatetime(),
-        )
-        assert "c1" not in client.channel_cache
-        assert client.channel_cache.total_bytes == 0
-
-
-class TestMergePages:
-    """Behavioural tests for :meth:`DataLowLevelClient._merge_pages`.
-
-    The helper replaces a previously inline O(N²) per-page concat loop with a
-    single batched concat per channel. These tests pin the merge semantics so
-    a future refactor can't silently drift, in particular:
-
-    * Single-frame channels skip the concat entirely (cheap path).
-    * Multi-frame channels concat in the order frames were collected.
-    * Cached slices from ``_check_cache`` are folded in as the first frame so
-      fresher pages win on overlapping timestamps via ``groupby.last``.
-    """
-
-    @staticmethod
-    def _client_with_fake_deserializer(
-        sentinel_to_frames: dict[str, dict[str, pd.DataFrame]],
-    ):
-        """Build a DataLowLevelClient whose ``try_deserialize_channel_data``
-        translates string sentinels (passed in lieu of protos) to dicts of
-        already-built DataFrames. Lets the merge logic be tested without
-        constructing protos.
-        """
-        client = DataLowLevelClient(MagicMock())
-        patcher = patch.object(
-            DataLowLevelClient,
-            "try_deserialize_channel_data",
-            staticmethod(lambda data: sentinel_to_frames[data]),
-        )
-        patcher.start()
-        return client, patcher
-
-    @staticmethod
-    def _frame(channel: str, start: str, rows: int, offset: int = 0) -> pd.DataFrame:
-        index = pd.date_range(start, periods=rows, freq="ms", tz=timezone.utc)
-        return pd.DataFrame({channel: range(offset, offset + rows)}, index=index)
-
-    def test_empty_pages_returns_initial(self) -> None:
-        """No pages, no fresh data — initial passes through untouched."""
-        client, patcher = self._client_with_fake_deserializer({})
-        try:
-            initial_df = self._frame("chan", "2025-01-01", rows=5)
-            result = client._merge_pages(pages=[], initial={"chan": initial_df})
-            assert result["chan"] is initial_df
-        finally:
-            patcher.stop()
-
-    def test_single_frame_skips_concat(self) -> None:
-        """One frame for a channel → returned by identity, no concat call."""
-        only_df = self._frame("chan", "2025-01-01", rows=5)
-        client, patcher = self._client_with_fake_deserializer({"page_a": {"chan": only_df}})
-        try:
-            result = client._merge_pages(pages=[["page_a"]], initial={})
-            # Identity check: no concat happened, so the original frame is
-            # returned by reference.
-            assert result["chan"] is only_df
-        finally:
-            patcher.stop()
-
-    def test_disjoint_pages_concat_in_order(self) -> None:
-        """Multiple disjoint pages for one channel → single concat result."""
-        df1 = self._frame("chan", "2025-01-01", rows=10, offset=0)
-        df2 = self._frame("chan", "2025-01-02", rows=10, offset=10)
-        df3 = self._frame("chan", "2025-01-03", rows=10, offset=20)
-        client, patcher = self._client_with_fake_deserializer(
-            {
-                "p1": {"chan": df1},
-                "p2": {"chan": df2},
-                "p3": {"chan": df3},
-            }
-        )
-        try:
-            result = client._merge_pages(pages=[["p1", "p2"], ["p3"]], initial={})
-
-            expected = pd.concat([df1, df2, df3]).groupby(level=0).last()
-            pd.testing.assert_frame_equal(result["chan"].sort_index(), expected.sort_index())
-            assert len(result["chan"]) == 30
-        finally:
-            patcher.stop()
-
-    def test_overlapping_timestamps_later_page_wins(self) -> None:
-        """On overlapping timestamps, the later page's value survives groupby.last.
-
-        This pins the existing behavior: the loop's old shape did
-        ``concat([acc, new]).groupby(...).last()`` which kept the LATER value
-        on conflict; the batched concat must preserve that ordering.
-        """
-        index = pd.date_range("2025-01-01", periods=5, freq="ms", tz=timezone.utc)
-        df_first = pd.DataFrame({"chan": [0] * 5}, index=index)
-        df_second = pd.DataFrame({"chan": [99] * 5}, index=index)
-        client, patcher = self._client_with_fake_deserializer(
-            {"p1": {"chan": df_first}, "p2": {"chan": df_second}}
-        )
-        try:
-            result = client._merge_pages(pages=[["p1", "p2"]], initial={})
-            assert (result["chan"]["chan"] == 99).all()
-        finally:
-            patcher.stop()
-
-    def test_cached_slice_folded_in_first_and_loses_on_overlap(self) -> None:
-        """Cached slice from ``_check_cache`` is the first frame in the merge.
-
-        Fresh pages should overwrite cached values on duplicate timestamps,
-        matching the pre-existing semantic that the latest fetch wins.
-        """
-        index = pd.date_range("2025-01-01", periods=5, freq="ms", tz=timezone.utc)
-        cached = pd.DataFrame({"chan": [-1] * 5}, index=index)
-        fresh = pd.DataFrame({"chan": [42] * 5}, index=index)
-        client, patcher = self._client_with_fake_deserializer({"p1": {"chan": fresh}})
-        try:
-            result = client._merge_pages(pages=[["p1"]], initial={"chan": cached})
-            assert (result["chan"]["chan"] == 42).all()
-        finally:
-            patcher.stop()
-
-    def test_cached_only_no_pages_preserves_cache(self) -> None:
-        """Channels in ``initial`` with no fresh page data must survive intact."""
-        client, patcher = self._client_with_fake_deserializer({})
-        try:
-            cached = self._frame("chan", "2025-01-01", rows=5)
-            result = client._merge_pages(pages=[[]], initial={"chan": cached})
-            assert result["chan"] is cached
-        finally:
-            patcher.stop()
-
-    def test_multiple_channels_independent(self) -> None:
-        """Per-channel grouping is independent: one channel's pages don't bleed.
-
-        Same shape as a multi-channel ``get_data`` call where each channel
-        returns its own pages.
-        """
-        a1 = self._frame("a", "2025-01-01", rows=5, offset=0)
-        a2 = self._frame("a", "2025-01-02", rows=5, offset=5)
-        b1 = self._frame("b", "2025-01-01", rows=5, offset=100)
-        client, patcher = self._client_with_fake_deserializer(
-            {
-                "p_a1": {"a": a1},
-                "p_a2": {"a": a2},
-                "p_b1": {"b": b1},
-            }
-        )
-        try:
-            result = client._merge_pages(pages=[["p_a1", "p_b1"], ["p_a2"]], initial={})
-            assert len(result["a"]) == 10
-            assert len(result["b"]) == 5
-            assert (result["b"]["b"] >= 100).all()
-        finally:
-            patcher.stop()
-
-    def test_does_not_mutate_initial(self) -> None:
-        """``initial`` is a defensive copy; caller's dict isn't mutated."""
-        cached = self._frame("chan", "2025-01-01", rows=5)
-        initial = {"chan": cached}
-        fresh = self._frame("chan", "2025-01-02", rows=5, offset=10)
-        client, patcher = self._client_with_fake_deserializer({"p1": {"chan": fresh}})
-        try:
-            _ = client._merge_pages(pages=[["p1"]], initial=initial)
-            assert initial["chan"] is cached
-        finally:
-            patcher.stop()

From 526844073ce8434f067146aee7304a4048b47a8b Mon Sep 17 00:00:00 2001
From: Ian Later <ian@siftstack.com>
Date: Wed, 24 Jun 2026 11:56:01 -0700
Subject: [PATCH 06/14] move lru config to resource.

---
 python/CHANGELOG.md                           | 14 ++++---
 .../_internal/low_level_wrappers/data.py      | 14 +++++++
 .../_internal/low_level_wrappers/test_data.py | 36 ++++++++++++----
 .../_tests/resources/test_channels.py         | 37 +++++++++++++++++
 python/lib/sift_client/client.py              | 18 +++-----
 python/lib/sift_client/resources/channels.py  | 41 +++++++++++++++----
 6 files changed, 126 insertions(+), 34 deletions(-)

diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md
index e4016eb69..d58cc818b 100644
--- a/python/CHANGELOG.md
+++ b/python/CHANGELOG.md
@@ -13,17 +13,19 @@ Up to a ~80x speedup for some get_data calls.
 
 #### Bounded channel data cache
 
-A new `data_cache_max_bytes` constructor kwarg (default 512 MiB) caps the in-memory channel-data footprint; the least-recently-used cached channel is evicted once the bound is reached. Set `data_cache_max_bytes=0` to disable caching entirely.
+The in-memory channel data cache used by `client.channels.get_data(...)` is now byte-bounded with LRU eviction (default 512 MiB). Once the bound is reached, the least-recently-used cached channel is evicted.
 
-`ignore_cache=True` on `client.channels.get_data(...)` now also skips writing into the cache, matching its read-side bypass semantics. Previously a "non-caching" workload still appended to the shared cache on every call, which still caused increased memory usage.
+Configure the bound on the `channels` resource:
 
 ```python
-client = SiftClient(
-    connection_config=config,
-    data_cache_max_bytes=128 * 1024 * 1024,  # 128 MiB cap
-)
+client.channels.configure_data_cache(max_bytes=128 * 1024 * 1024)  # 128 MiB cap
+client.channels.configure_data_cache(max_bytes=0)                  # disable caching
 ```
 
+`configure_data_cache` may be called at any time; if the cache is already populated, the new bound is applied immediately and excess entries are evicted.
+
+`ignore_cache=True` on `client.channels.get_data(...)` now also skips writing into the cache, matching its read-side bypass semantics. Previously a "non-caching" workload still appended to the shared cache on every call, which still caused increased memory usage.
+
 The internal `DataLowLevelClient.channel_cache` is no longer a class attribute. Any external code that relied on `DataLowLevelClient.channel_cache.channels.clear()` as a workaround should remove it — the bounded cache no longer requires manual purging.
 
 #### Resource and principal attributes (ABAC)
diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data.py b/python/lib/sift_client/_internal/low_level_wrappers/data.py
index 381b6667d..97baf522c 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data.py
@@ -83,6 +83,20 @@ def enabled(self) -> bool:
     def max_bytes(self) -> int:
         return self._max_bytes
 
+    @max_bytes.setter
+    def max_bytes(self, value: int) -> None:
+        """Reconfigure the byte cap and immediately evict any excess.
+
+        Used by ``ChannelsAPIAsync.configure_data_cache`` to retune a live
+        cache. Lowering the cap below ``total_bytes`` triggers LRU eviction
+        in the same loop ``put`` uses, so the invariant ``total_bytes <=
+        max_bytes`` is restored before the setter returns.
+        """
+        if value < 0:
+            raise ValueError(f"data_cache_max_bytes must be >= 0, got {value}")
+        self._max_bytes = value
+        self._evict_until_under_bound()
+
     @property
     def total_bytes(self) -> int:
         return self._total_bytes
diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
index e2b12cecf..1f7f022fb 100644
--- a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
+++ b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
@@ -256,6 +256,30 @@ def test_negative_max_bytes_raises(self) -> None:
         with pytest.raises(ValueError, match="data_cache_max_bytes"):
             ChannelCache(max_bytes=-1)
 
+    def test_set_max_bytes_lower_evicts_immediately(self) -> None:
+        """Lowering ``max_bytes`` below ``total_bytes`` evicts LRU until it fits.
+
+        Used by ``ChannelsAPIAsync.configure_data_cache`` to retune a live
+        cache without forcing the caller to call ``clear()`` first.
+        """
+        a, b, c = _entry(rows=50), _entry(rows=50), _entry(rows=50)
+        cache = ChannelCache(max_bytes=a.size_bytes + b.size_bytes + c.size_bytes)
+        cache.put("a", a)
+        cache.put("b", b)
+        cache.put("c", c)
+        # Lower the cap to fit only one entry; LRU "a" and "b" must drop.
+        cache.max_bytes = c.size_bytes
+        assert cache.max_bytes == c.size_bytes
+        assert "a" not in cache
+        assert "b" not in cache
+        assert "c" in cache
+        assert _invariant_holds(cache)
+
+    def test_set_max_bytes_negative_raises(self) -> None:
+        cache = ChannelCache(max_bytes=100)
+        with pytest.raises(ValueError, match="data_cache_max_bytes"):
+            cache.max_bytes = -1
+
     def test_repeated_concat_updates_stay_under_bound(self) -> None:
         """Simulates the customer's sliding-window pull: same channel, growing.
 
@@ -297,9 +321,7 @@ class TestMergePages:
       frame so fresh pages still win on overlap.
     """
 
-    @pytest.mark.parametrize(
-        "pages", [[], [[]]], ids=["no_tasks_queued", "task_returned_empty"]
-    )
+    @pytest.mark.parametrize("pages", [[], [[]]], ids=["no_tasks_queued", "task_returned_empty"])
     def test_no_fresh_data_returns_initial(self, pages: list) -> None:
         """No fresh pages → initial dict passes through by identity."""
         client = DataLowLevelClient(MagicMock())
@@ -410,7 +432,9 @@ def test_data_cache_max_bytes_kwarg_propagates(self) -> None:
         test just verifies the constructor passes the kwarg through.
         """
         assert DataLowLevelClient(MagicMock(), data_cache_max_bytes=0).channel_cache.max_bytes == 0
-        assert DataLowLevelClient(MagicMock(), data_cache_max_bytes=42).channel_cache.max_bytes == 42
+        assert (
+            DataLowLevelClient(MagicMock(), data_cache_max_bytes=42).channel_cache.max_bytes == 42
+        )
 
 
 class TestGetChannelData:
@@ -510,9 +534,7 @@ async def test_partial_cache_hit_merges_cached_and_fresh(self) -> None:
 
         assert new_calls, "c2 should hit the wire on the second call"
         for call in new_calls:
-            assert call["channel_ids"] == ["c2"], (
-                f"only c2 should hit the wire, saw {call!r}"
-            )
+            assert call["channel_ids"] == ["c2"], f"only c2 should hit the wire, saw {call!r}"
         assert set(result.keys()) == {"c1", "c2"}
         pd.testing.assert_frame_equal(result["c1"].sort_index(), c1_df.sort_index())
         pd.testing.assert_frame_equal(result["c2"].sort_index(), c2_df.sort_index())
diff --git a/python/lib/sift_client/_tests/resources/test_channels.py b/python/lib/sift_client/_tests/resources/test_channels.py
index f337bd3f5..0bc3e1122 100644
--- a/python/lib/sift_client/_tests/resources/test_channels.py
+++ b/python/lib/sift_client/_tests/resources/test_channels.py
@@ -501,3 +501,40 @@ async def fake_update_channel(update):
 
         api._units_low_level_client.create_unit.assert_not_awaited()
         assert captured["update"].unit == ""
+
+
+class TestConfigureDataCache:
+    """``configure_data_cache`` is the resource-level knob for the in-memory
+    channel data cache. Before the cache is initialized, it stashes the value
+    for the lazy-init path; after, it retunes the live cache.
+    """
+
+    def test_before_lazy_init_propagates_to_cache(self):
+        """Configuring before the first ``get_data`` lands on the cache at init."""
+        api = _make_api()
+        api.configure_data_cache(max_bytes=123)
+        assert api._data_low_level_client is None  # still lazy
+        api._ensure_data_low_level_client()
+        assert api._data_low_level_client.channel_cache.max_bytes == 123
+
+    def test_after_lazy_init_updates_live_cache(self):
+        """Configuring after first use retunes the live cache in place."""
+        api = _make_api()
+        api._ensure_data_low_level_client()
+        original_client = api._data_low_level_client
+        api.configure_data_cache(max_bytes=456)
+        # Same wrapper instance — we mutated, not replaced.
+        assert api._data_low_level_client is original_client
+        assert api._data_low_level_client.channel_cache.max_bytes == 456
+
+    def test_zero_disables_cache_via_resource(self):
+        """Resource-level ``max_bytes=0`` end-to-end disables the cache."""
+        api = _make_api()
+        api.configure_data_cache(max_bytes=0)
+        api._ensure_data_low_level_client()
+        assert not api._data_low_level_client.channel_cache.enabled
+
+    def test_negative_raises(self):
+        api = _make_api()
+        with pytest.raises(ValueError, match="max_bytes"):
+            api.configure_data_cache(max_bytes=-1)
diff --git a/python/lib/sift_client/client.py b/python/lib/sift_client/client.py
index 7d20fbe85..6afc36386 100644
--- a/python/lib/sift_client/client.py
+++ b/python/lib/sift_client/client.py
@@ -136,7 +136,6 @@ def __init__(
         rest_url: str | None = None,
         connection_config: SiftConnectionConfig | None = None,
         app_url: str | None = None,
-        data_cache_max_bytes: int | None = None,
     ):
         """Initialize the SiftClient with specific connection parameters or a connection_config.
 
@@ -149,14 +148,12 @@ def __init__(
                 Set this for on-prem or custom deployments whose API host can't be
                 mapped to a frontend automatically; see the ``app_url`` property.
                 A value here takes precedence over ``connection_config.app_url``.
-            data_cache_max_bytes: Cap on the in-memory channel data cache used
-                by ``client.channels.get_data`` (bytes). When the bound is
-                reached, the least-recently-used cached channel is evicted.
-                Defaults to 512 MiB. Set to ``0`` to disable caching. Must be
-                ``>= 0``.
+
+        Resource-specific knobs live on the resource itself. For example,
+        to tune the channel data cache used by ``client.channels.get_data``::
+
+            client.channels.configure_data_cache(max_bytes=128 * 1024 * 1024)
         """
-        if data_cache_max_bytes is not None and data_cache_max_bytes < 0:
-            raise ValueError(f"data_cache_max_bytes must be >= 0, got {data_cache_max_bytes}")
         if not (api_key and grpc_url and rest_url) and not connection_config:
             raise ValueError(
                 "Either api_key, grpc_url and rest_url or connection_config must be provided to establish a connection."
@@ -187,11 +184,6 @@ def __init__(
         # pytest plugin's ``--sift-disabled`` mode.
         self._simulate: bool = False
 
-        # Read by ``ChannelsAPIAsync._ensure_data_low_level_client`` when it
-        # lazily constructs the data wrapper. ``None`` means "use the wrapper
-        # default" so we don't have to import the constant here.
-        self._data_cache_max_bytes: int | None = data_cache_max_bytes
-
         self.ping = PingAPI(self)
         self.assets = AssetsAPI(self)
         self.calculated_channels = CalculatedChannelsAPI(self)
diff --git a/python/lib/sift_client/resources/channels.py b/python/lib/sift_client/resources/channels.py
index 41d478d81..26ff2da65 100644
--- a/python/lib/sift_client/resources/channels.py
+++ b/python/lib/sift_client/resources/channels.py
@@ -64,6 +64,32 @@ def __init__(self, sift_client: SiftClient):
         self._low_level_client = ChannelsLowLevelClient(grpc_client=self.client.grpc_client)
         self._units_low_level_client = UnitsLowLevelClient(grpc_client=self.client.grpc_client)
         self._data_low_level_client = None
+        # Caller-supplied cache size; ``None`` means "use the wrapper default
+        # at lazy-init time" so we don't have to import ``data.py`` (and
+        # therefore pandas) just to remember the default.
+        self._data_cache_max_bytes: int | None = None
+
+    def configure_data_cache(self, *, max_bytes: int) -> None:
+        """Configure the in-memory channel data cache used by ``get_data``.
+
+        Args:
+            max_bytes: Byte cap on the cache. ``0`` disables caching
+                (every ``get_data`` call goes to the wire). Defaults to
+                512 MiB until explicitly configured. Must be ``>= 0``.
+
+        Safe to call before or after the first ``get_data``. If the cache is
+        already live, the new cap is applied immediately and least-recently-
+        used entries are evicted until ``total_bytes`` fits.
+
+        Example:
+            client.channels.configure_data_cache(max_bytes=128 * 1024 * 1024)
+            client.channels.configure_data_cache(max_bytes=0)  # disable
+        """
+        if max_bytes < 0:
+            raise ValueError(f"max_bytes must be >= 0, got {max_bytes}")
+        self._data_cache_max_bytes = max_bytes
+        if self._data_low_level_client is not None:
+            self._data_low_level_client.channel_cache.max_bytes = max_bytes
 
     async def get(
         self,
@@ -242,17 +268,16 @@ async def unarchive(self, channels: list[str | Channel]) -> None:
     def _ensure_data_low_level_client(self):
         """Ensure that the data low level client is initialized. Separated out like this to not require large dependencies (pandas/pyarrow) for the client if not fetching data."""
         if self._data_low_level_client is None:
-            from sift_client._internal.low_level_wrappers.data import (
-                DEFAULT_DATA_CACHE_MAX_BYTES,
-                DataLowLevelClient,
-            )
+            from sift_client._internal.low_level_wrappers.data import DataLowLevelClient
 
-            max_bytes = getattr(self.client, "_data_cache_max_bytes", None)
+            # Pass the kwarg only when explicitly configured so the wrapper's
+            # own default (currently 512 MiB) remains the single source of truth.
+            kwargs = {}
+            if self._data_cache_max_bytes is not None:
+                kwargs["data_cache_max_bytes"] = self._data_cache_max_bytes
             self._data_low_level_client = DataLowLevelClient(
                 grpc_client=self.client.grpc_client,
-                data_cache_max_bytes=(
-                    DEFAULT_DATA_CACHE_MAX_BYTES if max_bytes is None else max_bytes
-                ),
+                **kwargs,
             )
 
     async def get_data(

From bd3213e067c130958111937fcc9d5c2845bc5c78 Mon Sep 17 00:00:00 2001
From: Ian Later <ian@siftstack.com>
Date: Wed, 24 Jun 2026 12:39:59 -0700
Subject: [PATCH 07/14] add opt in diskcache support and test coverage

---
 python/CHANGELOG.md                           |  26 ++
 .../_internal/low_level_wrappers/data.py      | 290 ++++++++++++++++--
 .../_internal/low_level_wrappers/test_data.py | 219 ++++++++++++-
 .../_tests/resources/test_channels.py         |  87 ++++++
 python/lib/sift_client/resources/channels.py  | 101 +++++-
 python/pyproject.toml                         |   7 +
 python/uv.lock                                |  13 +-
 7 files changed, 714 insertions(+), 29 deletions(-)

diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md
index d58cc818b..8e61faa1b 100644
--- a/python/CHANGELOG.md
+++ b/python/CHANGELOG.md
@@ -28,6 +28,32 @@ client.channels.configure_data_cache(max_bytes=0)                  # disable cac
 
 The internal `DataLowLevelClient.channel_cache` is no longer a class attribute. Any external code that relied on `DataLowLevelClient.channel_cache.channels.clear()` as a workaround should remove it — the bounded cache no longer requires manual purging.
 
+#### On-disk channel data cache (opt-in)
+
+The channel data cache can now optionally persist to disk, surviving process restarts. The disk tier is a second-chance layer beneath the in-memory cache: on a memory miss, `get_data` checks disk before going to the wire. Re-running the same workload in a new session picks up the previously-cached windows for free.
+
+```python
+# Enable disk persistence at the default tmp location.
+client.channels.enable_data_cache_disk()
+
+# Or pick a custom directory and byte cap.
+client.channels.enable_data_cache_disk(path="/data/sift-cache", max_bytes=2 * 1024 ** 3)
+
+# Stop persisting (does not delete on-disk data).
+client.channels.disable_data_cache_disk()
+```
+
+To remove a stale cache directory from a previous session:
+
+```python
+client.channels.clear_data_cache_on_disk()                   # default tmp path
+client.channels.clear_data_cache_on_disk("/data/sift-cache") # custom path
+```
+
+`clear_data_cache_on_disk` refuses to delete directories that don't look like a sift channel data cache (missing the `diskcache` marker), so a typo'd path won't wipe unrelated data.
+
+The disk tier is powered by [`diskcache`](https://grantjenks.com/docs/diskcache/) (pure-Python, SQLite-backed) and has its own independent byte cap with LRU eviction. The in-memory tier remains the fast path — disk is only consulted on a memory miss.
+
 #### Resource and principal attributes (ABAC)
 
 Added a public API for attribute based access control (ABAC) attributes. `client.resource_attributes` manages attribute keys assigned to entities (assets, channels, runs), and `client.principal_attributes` manages attribute keys assigned to principals (users and user groups). Both are available synchronously and asynchronously via `client.async_`.
diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data.py b/python/lib/sift_client/_internal/low_level_wrappers/data.py
index 97baf522c..ae0bbf6e5 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data.py
@@ -2,8 +2,12 @@
 
 import asyncio
 import logging
+import os
+import shutil
+import tempfile
 from collections import OrderedDict
 from datetime import datetime, timezone
+from pathlib import Path
 from typing import TYPE_CHECKING, Any, cast
 
 import pandas as pd
@@ -23,6 +27,8 @@
 from sift_client.transport import WithGrpcClient
 
 if TYPE_CHECKING:
+    import diskcache
+
     from sift_client.transport.grpc_transport import GrpcClient
 
 # Configure logging
@@ -61,22 +67,106 @@ def _new_cache_entry(
 
 
 class ChannelCache:
-    """LRU-ordered, byte-bounded cache of per-channel DataFrames.
+    """Two-tier cache of per-channel DataFrames.
+
+    Tier 1: an LRU-ordered, byte-bounded in-memory dict (hot path). ``max_bytes
+    <= 0`` disables this tier: ``get`` always misses memory, ``put`` doesn't
+    populate it.
 
-    ``max_bytes <= 0`` disables retention: every ``get`` misses, ``put`` returns
-    without storing.
+    Tier 2 (optional, see ``enable_disk``): a ``diskcache``-backed write-through
+    layer that survives process restarts. When enabled, ``put`` writes to both
+    tiers, ``get`` falls back to disk on a memory miss (promoting the hit back
+    into memory), and ``invalidate``/``clear`` cascade to disk. The disk tier
+    has its own byte cap that ``diskcache`` enforces with LRU eviction.
+
+    The two tiers are independent: setting ``max_bytes=0`` keeps the disk layer
+    active, useful for "cold storage only" workloads.
     """
 
-    def __init__(self, max_bytes: int = DEFAULT_DATA_CACHE_MAX_BYTES):
+    #: Default directory for the on-disk tier. Lives under
+    #: ``tempfile.gettempdir()`` so it survives across sessions of the same
+    #: user but doesn't pollute the user's home dir. The suffix is fixed so
+    #: multiple processes (different ``SiftClient`` instances, notebooks, etc.)
+    #: naturally share the same store and can read each other's prior sessions.
+    DEFAULT_DISK_PATH: str = os.path.join(tempfile.gettempdir(), "sift-channel-data-cache")
+
+    #: Default byte cap for the disk tier when ``enable_disk`` is called
+    #: without an explicit ``max_bytes``. 4 GiB is a generous ceiling for the
+    #: typical ``/tmp`` filesystem; ``diskcache`` enforces it with its own
+    #: SQLite-backed LRU eviction once the bound is reached.
+    DEFAULT_DISK_MAX_BYTES: int = 4 * 1024 * 1024 * 1024
+
+    #: Marker file ``diskcache`` writes inside every cache directory. We
+    #: sanity-check for this before any ``shutil.rmtree`` so a typo in the
+    #: ``clear_disk`` ``path`` argument can't wipe out an unrelated directory.
+    _DISKCACHE_MARKER: str = "cache.db"
+
+    def __init__(
+        self,
+        max_bytes: int = DEFAULT_DATA_CACHE_MAX_BYTES,
+        *,
+        disk_path: str | os.PathLike[str] | None = None,
+        disk_max_bytes: int | None = None,
+    ):
+        """Construct an in-memory cache, optionally backed by disk.
+
+        Args:
+            max_bytes: Byte cap on the in-memory tier. ``0`` disables it.
+            disk_path: Directory for the disk tier. ``None`` (the default)
+                disables disk. A previously-populated directory is reused,
+                so subsequent sessions can read from existing entries.
+            disk_max_bytes: Byte cap on the disk tier. ``None`` falls back to
+                ``DEFAULT_DISK_MAX_BYTES``. Ignored when ``disk_path`` is
+                ``None``.
+        """
         if max_bytes < 0:
             raise ValueError(f"data_cache_max_bytes must be >= 0, got {max_bytes}")
         self.name_id_map: dict[str, str] = {}
         self._entries: OrderedDict[str, ChannelCacheEntry] = OrderedDict()
         self._total_bytes: int = 0
         self._max_bytes: int = max_bytes
+        self._disk: diskcache.Cache | None = None
+        self._disk_path: str | None = None
+        self._disk_max_bytes: int | None = None
+        if disk_path is not None:
+            self._open_disk(
+                str(disk_path),
+                disk_max_bytes if disk_max_bytes is not None else self.DEFAULT_DISK_MAX_BYTES,
+            )
+
+    @classmethod
+    def clear_disk(cls, path: str | os.PathLike[str] | None = None) -> None:
+        """Delete a previously-persisted on-disk cache directory.
+
+        Use this to drop stale caches from previous sessions, recover from a
+        corrupt cache, or reclaim disk space. The directory is removed
+        entirely; a future ``enable_disk`` call at the same path will see a
+        fresh empty cache.
+
+        Args:
+            path: Directory of the cache to clear. ``None`` (the default)
+                targets :attr:`DEFAULT_DISK_PATH`.
+
+        Raises:
+            ValueError: If ``path`` exists but does not look like a sift
+                channel data cache directory (missing the ``diskcache``
+                marker file). This guard makes accidental misuse a hard
+                error rather than silent data loss.
+        """
+        target = Path(path) if path is not None else Path(cls.DEFAULT_DISK_PATH)
+        if not target.exists():
+            return
+        if not (target / cls._DISKCACHE_MARKER).exists():
+            raise ValueError(
+                f"{str(target)!r} does not look like a sift channel data cache "
+                f"directory (missing {cls._DISKCACHE_MARKER!r} marker). "
+                f"Refusing to delete."
+            )
+        shutil.rmtree(target)
 
     @property
     def enabled(self) -> bool:
+        """Whether the in-memory tier accepts writes (``max_bytes > 0``)."""
         return self._max_bytes > 0
 
     @property
@@ -85,12 +175,13 @@ def max_bytes(self) -> int:
 
     @max_bytes.setter
     def max_bytes(self, value: int) -> None:
-        """Reconfigure the byte cap and immediately evict any excess.
+        """Reconfigure the in-memory byte cap and immediately evict any excess.
 
         Used by ``ChannelsAPIAsync.configure_data_cache`` to retune a live
         cache. Lowering the cap below ``total_bytes`` triggers LRU eviction
         in the same loop ``put`` uses, so the invariant ``total_bytes <=
-        max_bytes`` is restored before the setter returns.
+        max_bytes`` is restored before the setter returns. Does not touch
+        the disk tier.
         """
         if value < 0:
             raise ValueError(f"data_cache_max_bytes must be >= 0, got {value}")
@@ -101,46 +192,161 @@ def max_bytes(self, value: int) -> None:
     def total_bytes(self) -> int:
         return self._total_bytes
 
+    @property
+    def disk_enabled(self) -> bool:
+        """Whether the disk-backed second-chance tier is currently open."""
+        return self._disk is not None
+
+    @property
+    def disk_path(self) -> str | None:
+        """Filesystem path of the disk tier when enabled, else ``None``."""
+        return self._disk_path
+
+    @property
+    def disk_max_bytes(self) -> int | None:
+        """Configured byte cap on the disk tier, or ``None`` when disabled."""
+        return self._disk_max_bytes
+
     def __len__(self) -> int:
         return len(self._entries)
 
     def __contains__(self, channel_id: str) -> bool:
-        return channel_id in self._entries
+        """True if the channel is cached in memory OR on disk.
+
+        Used by ``_filter_cached_channels`` to decide whether ``get_data``
+        needs to hit the wire. Including the disk tier here lets a fresh
+        session served by a warm disk avoid re-fetching.
+        """
+        if channel_id in self._entries:
+            return True
+        if self._disk is not None and channel_id in self._disk:
+            return True
+        return False
+
+    def enable_disk(
+        self,
+        *,
+        path: str | os.PathLike[str] | None = None,
+        max_bytes: int | None = None,
+    ) -> None:
+        """Enable (or reconfigure) the disk-backed second-chance tier.
+
+        If a previous disk tier was open at a different path or with a
+        different size cap, it's closed first. Memory contents are left
+        intact; they are NOT replayed to disk so disk reflects only future
+        writes.
+
+        Args:
+            path: Directory to persist to. ``None`` uses
+                :attr:`DEFAULT_DISK_PATH`. The directory is created if
+                missing; an existing one is opened in place and its
+                contents become available to ``get``.
+            max_bytes: Byte cap for the disk tier (``None`` →
+                :attr:`DEFAULT_DISK_MAX_BYTES`).
+        """
+        target_path = str(path) if path is not None else self.DEFAULT_DISK_PATH
+        target_max = max_bytes if max_bytes is not None else self.DEFAULT_DISK_MAX_BYTES
+        if (
+            self._disk is not None
+            and self._disk_path == target_path
+            and self._disk_max_bytes == target_max
+        ):
+            return
+        self._close_disk()
+        self._open_disk(target_path, target_max)
+
+    def disable_disk(self) -> None:
+        """Close the disk tier (if open). Does not touch the disk contents.
+
+        Use ``sift_client.clear_data_cache_on_disk(path)`` to remove a
+        directory from disk.
+        """
+        self._close_disk()
 
     def get(self, channel_id: str) -> ChannelCacheEntry | None:
         """Return the entry for ``channel_id`` if cached, otherwise None.
 
-        Promotes the entry to most-recently-used on hit.
+        Memory is consulted first; on a miss, the disk tier (if enabled) is
+        checked. A disk hit is promoted back into memory (subject to the
+        in-memory cap) so subsequent accesses stay hot.
         """
         entry = self._entries.get(channel_id)
         if entry is not None:
             self._entries.move_to_end(channel_id)
-        return entry
+            return entry
+        if self._disk is None:
+            return None
+        try:
+            disk_entry = self._disk.get(channel_id, default=None, retry=True)
+        except Exception:
+            # diskcache surfaces ``sqlite3.DatabaseError`` (and friends) for
+            # corrupt or partially-written entries from a prior session.
+            # Treat as a miss; force ``invalidate`` to drop the bad row so
+            # we don't repeatedly trip the same path.
+            logger.warning("disk cache read failed for %s; invalidating", channel_id)
+            try:
+                del self._disk[channel_id]
+            except Exception:
+                pass
+            return None
+        if disk_entry is None or not isinstance(disk_entry, ChannelCacheEntry):
+            return None
+        if self.enabled:
+            # Promote disk hit into memory so subsequent reads are cheap.
+            self._put_memory(channel_id, disk_entry)
+        return disk_entry
 
     def put(self, channel_id: str, entry: ChannelCacheEntry) -> None:
-        """Insert or replace ``channel_id``, then evict LRU until within size bounds.
+        """Insert or replace ``channel_id`` in memory (if enabled) and on disk.
 
-        Reclaims any prior entry's byte count BEFORE adding the new one's, so a
-        re-insert (e.g. concat-merge of fresh data into an existing entry)
-        accounts for the size delta correctly rather than double-counting.
+        Memory reclaims any prior entry's byte count BEFORE adding the new
+        one's, so a re-insert (e.g. concat-merge of fresh data into an
+        existing entry) accounts for the size delta correctly. Disk writes
+        replace the prior row.
         """
-        if not self.enabled:
-            return
-        prior = self._entries.pop(channel_id, None)
-        if prior is not None:
-            self._total_bytes -= prior.size_bytes
-        self._entries[channel_id] = entry
-        self._total_bytes += entry.size_bytes
-        self._evict_until_under_bound()
+        if self.enabled:
+            self._put_memory(channel_id, entry)
+        if self._disk is not None:
+            try:
+                self._disk.set(channel_id, entry, retry=True)
+            except Exception:
+                # Best-effort persistence: keep going on disk errors so the
+                # in-memory cache (and the user's ``get_data`` call) still
+                # succeeds. Drop the (possibly partial) disk row.
+                logger.warning("disk cache write failed for %s; invalidating", channel_id)
+                try:
+                    self._disk.delete(channel_id, retry=True)
+                except Exception:
+                    pass
 
     def invalidate(self, channel_id: str) -> None:
         prior = self._entries.pop(channel_id, None)
         if prior is not None:
             self._total_bytes -= prior.size_bytes
+        if self._disk is not None:
+            try:
+                self._disk.delete(channel_id, retry=True)
+            except Exception:
+                pass
 
     def clear(self) -> None:
         self._entries.clear()
         self._total_bytes = 0
+        if self._disk is not None:
+            self._disk.clear()
+
+    def close(self) -> None:
+        """Release the disk-tier file handle. Safe to call without disk enabled."""
+        self._close_disk()
+
+    def _put_memory(self, channel_id: str, entry: ChannelCacheEntry) -> None:
+        """Memory-tier insert + eviction. Caller has already gated on ``enabled``."""
+        prior = self._entries.pop(channel_id, None)
+        if prior is not None:
+            self._total_bytes -= prior.size_bytes
+        self._entries[channel_id] = entry
+        self._total_bytes += entry.size_bytes
+        self._evict_until_under_bound()
 
     def _evict_until_under_bound(self) -> None:
         # ``popitem(last=False)`` drops the oldest entry. A single fresh entry
@@ -150,6 +356,33 @@ def _evict_until_under_bound(self) -> None:
             _, dropped = self._entries.popitem(last=False)
             self._total_bytes -= dropped.size_bytes
 
+    def _open_disk(self, path: str, max_bytes: int) -> None:
+        import diskcache
+
+        os.makedirs(path, exist_ok=True)
+        # ``least-recently-used`` matches the in-memory tier's eviction policy;
+        # statistics/tag_index are off because we only need plain k/v reads.
+        self._disk = diskcache.Cache(
+            directory=path,
+            size_limit=max_bytes,
+            eviction_policy="least-recently-used",
+            statistics=0,
+            tag_index=0,
+        )
+        self._disk_path = path
+        self._disk_max_bytes = max_bytes
+
+    def _close_disk(self) -> None:
+        if self._disk is None:
+            return
+        try:
+            self._disk.close()
+        except Exception:
+            pass
+        self._disk = None
+        self._disk_path = None
+        self._disk_max_bytes = None
+
 
 class DataLowLevelClient(LowLevelClientBase, WithGrpcClient):
     """Low-level client for fetching channel data.
@@ -162,16 +395,27 @@ def __init__(
         grpc_client: GrpcClient,
         *,
         data_cache_max_bytes: int = DEFAULT_DATA_CACHE_MAX_BYTES,
+        disk_cache_path: str | os.PathLike[str] | None = None,
+        disk_cache_max_bytes: int | None = None,
     ):
         """Initialize the DataLowLevelClient.
 
         Args:
             grpc_client: The gRPC client to use for making API calls.
             data_cache_max_bytes: Cap on the in-memory channel-data cache (bytes).
-                Set to ``0`` to disable caching. See ``ChannelCache``.
+                Set to ``0`` to disable in-memory caching. See ``ChannelCache``.
+            disk_cache_path: Directory for the disk-backed second-chance tier.
+                ``None`` disables disk persistence. See ``ChannelCache``.
+            disk_cache_max_bytes: Byte cap for the disk tier. ``None`` uses
+                ``DEFAULT_DISK_CACHE_MAX_BYTES``. Ignored when
+                ``disk_cache_path`` is ``None``.
         """
         super().__init__(grpc_client)
-        self.channel_cache = ChannelCache(max_bytes=data_cache_max_bytes)
+        self.channel_cache = ChannelCache(
+            max_bytes=data_cache_max_bytes,
+            disk_path=disk_cache_path,
+            disk_max_bytes=disk_cache_max_bytes,
+        )
 
     def _update_name_id_map(self, channels: list[Channel]):
         """Update the name id map with the new channels."""
diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
index 1f7f022fb..16c99a6dd 100644
--- a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
+++ b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
@@ -1,9 +1,13 @@
 """Tests for :mod:`sift_client._internal.low_level_wrappers.data`.
 
-Four classes, narrowest scope first:
+Five classes, narrowest scope first:
 
 * :class:`TestChannelCache` — pure ``ChannelCache`` unit tests (byte
   accounting, LRU promotion, eviction).
+* :class:`TestChannelCacheDisk` — disk-backed second-chance tier
+  (fresh open, cross-session reload, fall-through reads, disable).
+* :class:`TestChannelCacheClearDisk` — ``ChannelCache.clear_disk``
+  classmethod (default path, custom path, safety guard).
 * :class:`TestMergePages` — ``DataLowLevelClient._merge_pages``, the
   per-channel concat helper.
 * :class:`TestDataLowLevelClient` — constructor wiring and per-instance
@@ -307,6 +311,219 @@ def test_repeated_concat_updates_stay_under_bound(self) -> None:
             assert _invariant_holds(cache)
 
 
+class TestChannelCacheDisk:
+    """Disk-backed second-chance tier of :class:`ChannelCache`.
+
+    Three things must hold across these tests:
+
+    1. A fresh disk directory starts empty and accepts new writes.
+    2. Closing a populated cache and reopening at the same path surfaces
+       the previous entries on read (the "previous session" requirement).
+    3. The two tiers stay consistent across ``invalidate``/``clear`` and
+       ``disable_disk``, so the disk tier never becomes a stale shadow of
+       memory.
+
+    All tests confine writes to ``tmp_path`` so nothing leaks into the real
+    ``/tmp/sift-channel-data-cache``.
+    """
+
+    def test_disabled_by_default(self) -> None:
+        """No ``disk_path`` → disk tier stays off and untouched."""
+        cache = ChannelCache(max_bytes=10_000_000)
+        assert cache.disk_enabled is False
+        assert cache.disk_path is None
+        assert cache.disk_max_bytes is None
+
+    def test_fresh_cache_writes_and_reads(self, tmp_path) -> None:
+        """A fresh disk directory accepts writes and serves them back."""
+        path = tmp_path / "fresh"
+        cache = ChannelCache(max_bytes=10_000_000, disk_path=path)
+        try:
+            assert cache.disk_enabled
+            assert cache.disk_path == str(path)
+            assert cache.disk_max_bytes == ChannelCache.DEFAULT_DISK_MAX_BYTES
+            entry = _entry(rows=8)
+            cache.put("chan-1", entry)
+            # Same instance: memory hit takes precedence; disk is just a copy.
+            assert "chan-1" in cache
+            got = cache.get("chan-1")
+            assert got is not None
+            pd.testing.assert_frame_equal(got.data, entry.data)
+        finally:
+            cache.close()
+
+    def test_reopen_existing_dir_sees_prior_session_entries(self, tmp_path) -> None:
+        """Closing then reopening at the same path makes prior entries hit.
+
+        This is the "look for existing caches from previous sessions"
+        guarantee: a new ``ChannelCache`` with an empty in-memory tier
+        finds entries on disk and promotes them into memory on first read.
+        """
+        path = tmp_path / "prev-session"
+        df = _frame("chan-1", rows=12, freq="s")
+        original_entry = _new_cache_entry(
+            data=df,
+            start_time=df.index[0].to_pydatetime(),
+            end_time=df.index[-1].to_pydatetime(),
+        )
+        # Session 1: populate and close.
+        session1 = ChannelCache(max_bytes=10_000_000, disk_path=path)
+        session1.put("chan-1", original_entry)
+        session1.close()
+
+        # Session 2: fresh process simulated by a brand-new ChannelCache.
+        # Memory starts empty, but ``__contains__`` reports the entry from
+        # disk and ``get`` returns it with bytes intact.
+        session2 = ChannelCache(max_bytes=10_000_000, disk_path=path)
+        try:
+            assert len(session2) == 0  # in-memory tier starts cold
+            assert "chan-1" in session2  # disk-backed contains
+            got = session2.get("chan-1")
+            assert got is not None
+            pd.testing.assert_frame_equal(got.data, original_entry.data)
+            assert got.start_time == original_entry.start_time
+            assert got.end_time == original_entry.end_time
+            # After the disk hit, the entry is now promoted into memory.
+            assert len(session2) == 1
+        finally:
+            session2.close()
+
+    def test_disk_hit_promotes_into_memory(self, tmp_path) -> None:
+        """A disk-only entry becomes a memory entry after one ``get``."""
+        cache = ChannelCache(max_bytes=10_000_000, disk_path=tmp_path / "promote")
+        try:
+            cache.put("chan-1", _entry(rows=4))
+            # Drop from memory only (simulate eviction).
+            del cache._entries["chan-1"]
+            cache._total_bytes = 0
+            assert "chan-1" in cache  # still on disk
+            assert cache.get("chan-1") is not None
+            assert "chan-1" in cache._entries  # promoted back into memory
+        finally:
+            cache.close()
+
+    def test_disk_only_when_memory_disabled(self, tmp_path) -> None:
+        """``max_bytes=0`` (no memory) still routes writes/reads through disk.
+
+        Cold-storage configuration: caller wants persistence without
+        paying the in-memory footprint.
+        """
+        cache = ChannelCache(max_bytes=0, disk_path=tmp_path / "disk-only")
+        try:
+            assert not cache.enabled
+            assert cache.disk_enabled
+            cache.put("chan-1", _entry(rows=4))
+            assert "chan-1" not in cache._entries  # never landed in memory
+            got = cache.get("chan-1")
+            assert got is not None
+            assert "chan-1" not in cache._entries  # memory still bypassed
+        finally:
+            cache.close()
+
+    def test_invalidate_clears_both_tiers(self, tmp_path) -> None:
+        cache = ChannelCache(max_bytes=10_000_000, disk_path=tmp_path / "inval")
+        try:
+            cache.put("chan-1", _entry(rows=4))
+            cache.invalidate("chan-1")
+            assert "chan-1" not in cache._entries
+            assert "chan-1" not in cache  # contains() must check disk too
+        finally:
+            cache.close()
+
+    def test_clear_wipes_both_tiers(self, tmp_path) -> None:
+        cache = ChannelCache(max_bytes=10_000_000, disk_path=tmp_path / "clear")
+        try:
+            cache.put("chan-1", _entry(rows=4))
+            cache.put("chan-2", _entry(rows=4))
+            cache.clear()
+            assert len(cache) == 0
+            assert "chan-1" not in cache
+            assert "chan-2" not in cache
+        finally:
+            cache.close()
+
+    def test_disable_disk_preserves_memory(self, tmp_path) -> None:
+        """Turning off disk closes the handle but keeps memory intact."""
+        cache = ChannelCache(max_bytes=10_000_000, disk_path=tmp_path / "disable")
+        try:
+            cache.put("chan-1", _entry(rows=4))
+            cache.disable_disk()
+            assert not cache.disk_enabled
+            assert cache.disk_path is None
+            # Memory entry survives the disk-tier teardown.
+            assert "chan-1" in cache
+            assert cache.get("chan-1") is not None
+        finally:
+            cache.close()
+
+    def test_enable_disk_reconfigures_path(self, tmp_path) -> None:
+        """Reconfiguring to a different path closes the old handle."""
+        cache = ChannelCache(max_bytes=10_000_000, disk_path=tmp_path / "a")
+        try:
+            cache.put("chan-1", _entry(rows=4))
+            cache.enable_disk(path=tmp_path / "b")
+            assert cache.disk_path == str(tmp_path / "b")
+            # The new disk dir is fresh: nothing on disk yet under the new path.
+            # ``chan-1`` is still in memory, so __contains__ is still True.
+            assert "chan-1" in cache
+            # But the new disk dir is empty; drop from memory and the
+            # contains check now relies on disk, which won't find it.
+            del cache._entries["chan-1"]
+            cache._total_bytes = 0
+            assert "chan-1" not in cache
+        finally:
+            cache.close()
+
+    def test_enable_disk_noop_when_same_settings(self, tmp_path) -> None:
+        """Re-enabling with identical settings doesn't churn the disk handle."""
+        cache = ChannelCache(max_bytes=10_000_000, disk_path=tmp_path / "noop")
+        try:
+            handle_before = cache._disk
+            cache.enable_disk(path=tmp_path / "noop", max_bytes=ChannelCache.DEFAULT_DISK_MAX_BYTES)
+            # Same handle, no reopen.
+            assert cache._disk is handle_before
+        finally:
+            cache.close()
+
+
+class TestChannelCacheClearDisk:
+    """``ChannelCache.clear_disk`` removes a cache dir, refuses other dirs.
+
+    The classmethod is the source of truth that the resource-level
+    ``ChannelsAPIAsync.clear_data_cache_on_disk`` proxies through, so it
+    must be defensive against pointing at the wrong directory.
+    """
+
+    def test_clear_removes_directory(self, tmp_path) -> None:
+        path = tmp_path / "victim"
+        cache = ChannelCache(max_bytes=10_000_000, disk_path=path)
+        cache.put("chan-1", _entry(rows=4))
+        cache.close()
+        assert path.exists()
+        ChannelCache.clear_disk(path)
+        assert not path.exists()
+
+    def test_clear_missing_path_is_noop(self, tmp_path) -> None:
+        ChannelCache.clear_disk(tmp_path / "never-existed")  # no raise
+
+    def test_clear_refuses_non_diskcache_directory(self, tmp_path) -> None:
+        """A typo'd path with unrelated contents must not be wiped."""
+        target = tmp_path / "user-stuff"
+        target.mkdir()
+        (target / "important.txt").write_text("don't delete me")
+        with pytest.raises(ValueError, match="does not look like a sift channel data cache"):
+            ChannelCache.clear_disk(target)
+        # Unrelated contents preserved.
+        assert (target / "important.txt").read_text() == "don't delete me"
+
+    def test_default_path_constant_under_tmp(self) -> None:
+        """Default lives under the OS tmp dir, not a user directory."""
+        import tempfile
+
+        assert ChannelCache.DEFAULT_DISK_PATH.startswith(tempfile.gettempdir())
+        assert ChannelCache.DEFAULT_DISK_PATH.endswith("sift-channel-data-cache")
+
+
 class TestMergePages:
     """Behaviour of :meth:`DataLowLevelClient._merge_pages`.
 
diff --git a/python/lib/sift_client/_tests/resources/test_channels.py b/python/lib/sift_client/_tests/resources/test_channels.py
index 0bc3e1122..e3d29ab73 100644
--- a/python/lib/sift_client/_tests/resources/test_channels.py
+++ b/python/lib/sift_client/_tests/resources/test_channels.py
@@ -538,3 +538,90 @@ def test_negative_raises(self):
         api = _make_api()
         with pytest.raises(ValueError, match="max_bytes"):
             api.configure_data_cache(max_bytes=-1)
+
+
+class TestEnableDataCacheDisk:
+    """``enable_data_cache_disk`` / ``disable_data_cache_disk`` plumb the disk
+    tier setting to the underlying ``ChannelCache``, both pre- and post-init.
+
+    The disk tier itself is exercised directly in
+    ``test_data.py::TestChannelCacheDisk``; the tests here just verify the
+    resource-level wiring around it.
+    """
+
+    def test_disabled_by_default(self):
+        api = _make_api()
+        api._ensure_data_low_level_client()
+        assert not api._data_low_level_client.channel_cache.disk_enabled
+
+    def test_enable_before_lazy_init_propagates(self, tmp_path):
+        api = _make_api()
+        api.enable_data_cache_disk(path=str(tmp_path / "pre-init"), max_bytes=4096)
+        api._ensure_data_low_level_client()
+        cache = api._data_low_level_client.channel_cache
+        try:
+            assert cache.disk_enabled
+            assert cache.disk_path == str(tmp_path / "pre-init")
+            assert cache.disk_max_bytes == 4096
+        finally:
+            cache.close()
+
+    def test_enable_after_lazy_init_updates_live_cache(self, tmp_path):
+        api = _make_api()
+        api._ensure_data_low_level_client()
+        cache = api._data_low_level_client.channel_cache
+        try:
+            assert not cache.disk_enabled
+            api.enable_data_cache_disk(path=str(tmp_path / "post-init"))
+            assert cache.disk_enabled
+            assert cache.disk_path == str(tmp_path / "post-init")
+        finally:
+            cache.close()
+
+    def test_enable_with_default_path_lands_on_default(self, monkeypatch, tmp_path):
+        """Calling ``enable_data_cache_disk()`` with no args uses the default path.
+
+        Redirects ``ChannelCache.DEFAULT_DISK_PATH`` to ``tmp_path`` so the
+        test doesn't create the real ``/tmp/sift-channel-data-cache``
+        directory.
+        """
+        from sift_client._internal.low_level_wrappers.data import ChannelCache
+
+        fake_default = str(tmp_path / "fake-default")
+        monkeypatch.setattr(ChannelCache, "DEFAULT_DISK_PATH", fake_default)
+
+        api = _make_api()
+        api.enable_data_cache_disk()
+        api._ensure_data_low_level_client()
+        cache = api._data_low_level_client.channel_cache
+        try:
+            assert cache.disk_path == fake_default
+        finally:
+            cache.close()
+
+    def test_disable_closes_live_disk_handle(self, tmp_path):
+        api = _make_api()
+        api.enable_data_cache_disk(path=str(tmp_path / "to-close"))
+        api._ensure_data_low_level_client()
+        cache = api._data_low_level_client.channel_cache
+        try:
+            assert cache.disk_enabled
+            api.disable_data_cache_disk()
+            assert not cache.disk_enabled
+            assert cache.disk_path is None
+        finally:
+            cache.close()
+
+    def test_clear_data_cache_on_disk_proxies_to_cache(self, tmp_path):
+        """The resource method removes the directory by proxying to ChannelCache."""
+        from sift_client._internal.low_level_wrappers.data import ChannelCache
+
+        path = tmp_path / "to-clear"
+        # Populate a real disk-cache directory so the marker check passes.
+        cache = ChannelCache(max_bytes=10_000_000, disk_path=path)
+        cache.close()
+        assert path.exists()
+
+        api = _make_api()
+        api.clear_data_cache_on_disk(path)
+        assert not path.exists()
diff --git a/python/lib/sift_client/resources/channels.py b/python/lib/sift_client/resources/channels.py
index 26ff2da65..91322a65c 100644
--- a/python/lib/sift_client/resources/channels.py
+++ b/python/lib/sift_client/resources/channels.py
@@ -11,6 +11,7 @@
 from sift_client.util import cel_utils as cel
 
 if TYPE_CHECKING:
+    import os
     import re
     from datetime import datetime
 
@@ -68,6 +69,13 @@ def __init__(self, sift_client: SiftClient):
         # at lazy-init time" so we don't have to import ``data.py`` (and
         # therefore pandas) just to remember the default.
         self._data_cache_max_bytes: int | None = None
+        # Disk-tier configuration, stashed until lazy init (or applied
+        # immediately if the wrapper is already constructed). All three
+        # remain ``None`` / ``False`` when the disk tier is disabled, which
+        # is the default — disk persistence is opt-in.
+        self._disk_cache_enabled: bool = False
+        self._disk_cache_path: str | None = None
+        self._disk_cache_max_bytes: int | None = None
 
     def configure_data_cache(self, *, max_bytes: int) -> None:
         """Configure the in-memory channel data cache used by ``get_data``.
@@ -91,6 +99,81 @@ def configure_data_cache(self, *, max_bytes: int) -> None:
         if self._data_low_level_client is not None:
             self._data_low_level_client.channel_cache.max_bytes = max_bytes
 
+    def enable_data_cache_disk(
+        self,
+        *,
+        path: str | os.PathLike[str] | None = None,
+        max_bytes: int | None = None,
+    ) -> None:
+        """Persist the channel data cache to disk, surviving process restarts.
+
+        The disk-backed tier is a second-chance layer beneath the in-memory
+        cache: on a memory miss, ``get_data`` checks disk before going to the
+        wire. The default path lives under ``tempfile.gettempdir()`` and is
+        shared across sessions, so a re-run of the same workload picks up
+        previously-cached windows without a fetch.
+
+        Safe to call before or after the first ``get_data``. Reconfiguring
+        (different ``path`` or ``max_bytes``) closes the previous disk handle
+        and opens a new one; in-memory contents are preserved across the swap.
+
+        Args:
+            path: Directory to persist the cache to. ``None`` (the default)
+                uses ``DEFAULT_DISK_CACHE_PATH``. Existing entries at the path
+                become available as cache hits.
+            max_bytes: Byte cap on the disk tier. ``None`` uses
+                ``DEFAULT_DISK_CACHE_MAX_BYTES`` (4 GiB). When the bound is
+                reached, ``diskcache``'s LRU eviction takes over.
+
+        Example:
+            client.channels.enable_data_cache_disk()
+            client.channels.enable_data_cache_disk(path="/data/sift-cache")
+            client.channels.enable_data_cache_disk(max_bytes=1024 ** 3)  # 1 GiB
+        """
+        self._disk_cache_enabled = True
+        self._disk_cache_path = str(path) if path is not None else None
+        self._disk_cache_max_bytes = max_bytes
+        if self._data_low_level_client is not None:
+            self._data_low_level_client.channel_cache.enable_disk(path=path, max_bytes=max_bytes)
+
+    def disable_data_cache_disk(self) -> None:
+        """Stop persisting the channel data cache to disk.
+
+        Closes the disk-cache file handle. The on-disk directory is NOT
+        deleted — use :meth:`clear_data_cache_on_disk` to wipe it. In-memory
+        entries are preserved.
+        """
+        self._disk_cache_enabled = False
+        self._disk_cache_path = None
+        self._disk_cache_max_bytes = None
+        if self._data_low_level_client is not None:
+            self._data_low_level_client.channel_cache.disable_disk()
+
+    def clear_data_cache_on_disk(self, path: str | os.PathLike[str] | None = None) -> None:
+        """Delete a previously-persisted on-disk channel data cache directory.
+
+        Drops stale caches from previous sessions, recovers from a corrupt
+        cache, or reclaims disk space. Removes the directory entirely; a
+        future :meth:`enable_data_cache_disk` call at the same path will see
+        a fresh empty cache.
+
+        This is a thin proxy around
+        :meth:`ChannelCache.clear_disk <sift_client._internal.low_level_wrappers.data.ChannelCache.clear_disk>`
+        — exposed on the resource so callers don't need to reach into
+        ``_internal`` modules. But that is a class method so the user could call without a client if desired.
+
+        Args:
+            path: Directory of the cache to clear. ``None`` (the default)
+                targets ``ChannelCache.DEFAULT_DISK_PATH``.
+
+        Raises:
+            ValueError: If ``path`` exists but does not look like a sift
+                channel data cache directory.
+        """
+        from sift_client._internal.low_level_wrappers.data import ChannelCache
+
+        ChannelCache.clear_disk(path)
+
     async def get(
         self,
         *,
@@ -268,13 +351,23 @@ async def unarchive(self, channels: list[str | Channel]) -> None:
     def _ensure_data_low_level_client(self):
         """Ensure that the data low level client is initialized. Separated out like this to not require large dependencies (pandas/pyarrow) for the client if not fetching data."""
         if self._data_low_level_client is None:
-            from sift_client._internal.low_level_wrappers.data import DataLowLevelClient
+            from sift_client._internal.low_level_wrappers.data import (
+                ChannelCache,
+                DataLowLevelClient,
+            )
 
-            # Pass the kwarg only when explicitly configured so the wrapper's
-            # own default (currently 512 MiB) remains the single source of truth.
-            kwargs = {}
+            # Pass each kwarg only when explicitly configured so the wrapper's
+            # own defaults remain the single source of truth.
+            kwargs: dict = {}
             if self._data_cache_max_bytes is not None:
                 kwargs["data_cache_max_bytes"] = self._data_cache_max_bytes
+            if self._disk_cache_enabled:
+                # ``disk_path=None`` means "disabled" to ChannelCache; substitute
+                # the default explicitly so an explicit ``enable_data_cache_disk()``
+                # without a path still opens the disk tier.
+                kwargs["disk_cache_path"] = self._disk_cache_path or ChannelCache.DEFAULT_DISK_PATH
+                if self._disk_cache_max_bytes is not None:
+                    kwargs["disk_cache_max_bytes"] = self._disk_cache_max_bytes
             self._data_low_level_client = DataLowLevelClient(
                 grpc_client=self.client.grpc_client,
                 **kwargs,
diff --git a/python/pyproject.toml b/python/pyproject.toml
index b435022e7..b12c29cb0 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -36,6 +36,7 @@ dependencies = [
     "requests~=2.25",
     "requests-toolbelt~=1.0",
     "alive-progress~=3.0",
+    "diskcache~=5.6",
     # May move these to optional dependencies in the future.
     "pandas-stubs>=2.0,<4.0",
     "types-PyYAML~=6.0",
@@ -350,6 +351,12 @@ ignore_errors = true
 [[tool.mypy.overrides]]
 module = "nptdms"
 ignore_missing_imports = true
+
+# diskcache ships without inline type hints or PEP 561 marker. Used by the
+# channel data cache's optional on-disk tier.
+[[tool.mypy.overrides]]
+module = "diskcache"
+ignore_missing_imports = true
 ignore_errors = true
 
 # alive-progress 3.3.0 ships py.typed but its `alive_it` signature is too
diff --git a/python/uv.lock b/python/uv.lock
index d152551a9..43c24b552 100644
--- a/python/uv.lock
+++ b/python/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.8"
 resolution-markers = [
     "python_full_version >= '3.8.2' and python_full_version < '3.9'",
@@ -638,6 +638,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/02/c3/253a89ee03fc9b9682f1541728eb66db7db22148cd94f89ab22528cd1e1b/deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a", size = 11178, upload-time = "2020-04-20T14:23:36.581Z" },
 ]
 
+[[package]]
+name = "diskcache"
+version = "5.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc", size = 67916, upload-time = "2023-08-31T06:12:00.316Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19", size = 45550, upload-time = "2023-08-31T06:11:58.822Z" },
+]
+
 [[package]]
 name = "eval-type-backport"
 version = "0.3.1"
@@ -4334,6 +4343,7 @@ source = { editable = "." }
 dependencies = [
     { name = "alive-progress", version = "3.1.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
     { name = "alive-progress", version = "3.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+    { name = "diskcache" },
     { name = "eval-type-backport" },
     { name = "filelock", version = "3.16.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
     { name = "filelock", version = "3.19.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
@@ -4562,6 +4572,7 @@ requires-dist = [
     { name = "cffi", marker = "extra == 'dev-all'", specifier = "~=1.14" },
     { name = "cffi", marker = "extra == 'docs-build'", specifier = "~=1.14" },
     { name = "cffi", marker = "extra == 'openssl'", specifier = "~=1.14" },
+    { name = "diskcache", specifier = "~=5.6" },
     { name = "eval-type-backport", specifier = "~=0.2" },
     { name = "filelock", specifier = "~=3.15" },
     { name = "googleapis-common-protos", specifier = ">=1.60" },

From 4737ae30c37f7ea24b03f546144a3d72522ca9f5 Mon Sep 17 00:00:00 2001
From: Ian Later <ian@siftstack.com>
Date: Wed, 24 Jun 2026 14:36:35 -0700
Subject: [PATCH 08/14] stubs

---
 .../resources/sync_stubs/__init__.pyi         | 81 +++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index c37c3aed3..704e3b8c0 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -5,6 +5,7 @@ from __future__ import annotations
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
+    import os
     import re
     from datetime import datetime, timedelta
     from pathlib import Path
@@ -452,6 +453,86 @@ class ChannelsAPI:
         """
         ...
 
+    def clear_data_cache_on_disk(self, path: str | os.PathLike[str] | None = None) -> None:
+        """Delete a previously-persisted on-disk channel data cache directory.
+
+        Drops stale caches from previous sessions, recovers from a corrupt
+        cache, or reclaims disk space. Removes the directory entirely; a
+        future :meth:`enable_data_cache_disk` call at the same path will see
+        a fresh empty cache.
+
+        This is a thin proxy around
+        :meth:`ChannelCache.clear_disk <sift_client._internal.low_level_wrappers.data.ChannelCache.clear_disk>`
+        — exposed on the resource so callers don't need to reach into
+        ``_internal`` modules. But that is a class method so the user could call without a client if desired.
+
+        Args:
+            path: Directory of the cache to clear. ``None`` (the default)
+                targets ``ChannelCache.DEFAULT_DISK_PATH``.
+
+        Raises:
+            ValueError: If ``path`` exists but does not look like a sift
+                channel data cache directory.
+        """
+        ...
+
+    def configure_data_cache(self, *, max_bytes: int) -> None:
+        """Configure the in-memory channel data cache used by ``get_data``.
+
+        Args:
+            max_bytes: Byte cap on the cache. ``0`` disables caching
+                (every ``get_data`` call goes to the wire). Defaults to
+                512 MiB until explicitly configured. Must be ``>= 0``.
+
+        Safe to call before or after the first ``get_data``. If the cache is
+        already live, the new cap is applied immediately and least-recently-
+        used entries are evicted until ``total_bytes`` fits.
+
+        Example:
+            client.channels.configure_data_cache(max_bytes=128 * 1024 * 1024)
+            client.channels.configure_data_cache(max_bytes=0)  # disable
+        """
+        ...
+
+    def disable_data_cache_disk(self) -> None:
+        """Stop persisting the channel data cache to disk.
+
+        Closes the disk-cache file handle. The on-disk directory is NOT
+        deleted — use :meth:`clear_data_cache_on_disk` to wipe it. In-memory
+        entries are preserved.
+        """
+        ...
+
+    def enable_data_cache_disk(
+        self, *, path: str | os.PathLike[str] | None = None, max_bytes: int | None = None
+    ) -> None:
+        """Persist the channel data cache to disk, surviving process restarts.
+
+        The disk-backed tier is a second-chance layer beneath the in-memory
+        cache: on a memory miss, ``get_data`` checks disk before going to the
+        wire. The default path lives under ``tempfile.gettempdir()`` and is
+        shared across sessions, so a re-run of the same workload picks up
+        previously-cached windows without a fetch.
+
+        Safe to call before or after the first ``get_data``. Reconfiguring
+        (different ``path`` or ``max_bytes``) closes the previous disk handle
+        and opens a new one; in-memory contents are preserved across the swap.
+
+        Args:
+            path: Directory to persist the cache to. ``None`` (the default)
+                uses ``DEFAULT_DISK_CACHE_PATH``. Existing entries at the path
+                become available as cache hits.
+            max_bytes: Byte cap on the disk tier. ``None`` uses
+                ``DEFAULT_DISK_CACHE_MAX_BYTES`` (4 GiB). When the bound is
+                reached, ``diskcache``'s LRU eviction takes over.
+
+        Example:
+            client.channels.enable_data_cache_disk()
+            client.channels.enable_data_cache_disk(path="/data/sift-cache")
+            client.channels.enable_data_cache_disk(max_bytes=1024 ** 3)  # 1 GiB
+        """
+        ...
+
     def find(self, **kwargs) -> Channel | None:
         """Find a single channel matching the given query. Takes the same arguments as `list`. If more than one channel is found,
         raises an error.

From cf46f32b9983d860f3f987945386a3fac939a574 Mon Sep 17 00:00:00 2001
From: Ian Later <ian@siftstack.com>
Date: Thu, 25 Jun 2026 13:01:32 -0700
Subject: [PATCH 09/14] Move disk cache out of resource.

---
 python/CHANGELOG.md                           |  24 ++--
 .../_internal/disk_cache_config.py            |  87 ++++++++++++++
 .../_internal/low_level_wrappers/test_data.py |  19 ++-
 .../_internal/test_disk_cache_config.py       | 112 ++++++++++++++++++
 python/lib/sift_client/_tests/conftest.py     |  27 +++++
 .../_tests/resources/test_channels.py         |  88 ++++++++++++--
 python/lib/sift_client/client.py              |   4 -
 python/lib/sift_client/resources/channels.py  | 106 +++++++++++------
 .../resources/sync_stubs/__init__.pyi         |  40 ++++---
 python/pyproject.toml                         |   8 ++
 python/uv.lock                                |   2 +-
 11 files changed, 435 insertions(+), 82 deletions(-)
 create mode 100644 python/lib/sift_client/_internal/disk_cache_config.py
 create mode 100644 python/lib/sift_client/_tests/_internal/test_disk_cache_config.py

diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md
index 8e61faa1b..e33995a8e 100644
--- a/python/CHANGELOG.md
+++ b/python/CHANGELOG.md
@@ -26,31 +26,27 @@ client.channels.configure_data_cache(max_bytes=0)                  # disable cac
 
 `ignore_cache=True` on `client.channels.get_data(...)` now also skips writing into the cache, matching its read-side bypass semantics. Previously a "non-caching" workload still appended to the shared cache on every call, which still caused increased memory usage.
 
-The internal `DataLowLevelClient.channel_cache` is no longer a class attribute. Any external code that relied on `DataLowLevelClient.channel_cache.channels.clear()` as a workaround should remove it — the bounded cache no longer requires manual purging.
+#### On-disk channel data cache (opt-out, on by default)
 
-#### On-disk channel data cache (opt-in)
+The channel data cache now persists to disk by default, surviving process restarts. The disk tier is a second-chance layer beneath the in-memory cache: on a memory miss, `get_data` checks disk before going to the wire. Re-running the same workload in a new session picks up the previously-cached windows for free — no configuration required.
 
-The channel data cache can now optionally persist to disk, surviving process restarts. The disk tier is a second-chance layer beneath the in-memory cache: on a memory miss, `get_data` checks disk before going to the wire. Re-running the same workload in a new session picks up the previously-cached windows for free.
+The default location is `<tempfile.gettempdir()>/sift-channel-data-cache`, capped at 4 GiB with LRU eviction. If the default path can't be opened (read-only filesystem, restricted container, etc.), the client logs a warning and falls back to the in-memory cache only — `get_data` continues to work.
 
-```python
-# Enable disk persistence at the default tmp location.
-client.channels.enable_data_cache_disk()
-
-# Or pick a custom directory and byte cap.
-client.channels.enable_data_cache_disk(path="/data/sift-cache", max_bytes=2 * 1024 ** 3)
+Opt out, reconfigure, or wipe the on-disk cache from the `channels` resource:
 
-# Stop persisting (does not delete on-disk data).
+```python
+# Opt out — no data persisted to disk.
 client.channels.disable_data_cache_disk()
-```
 
-To remove a stale cache directory from a previous session:
+# Reconfigure the location or byte cap.
+client.channels.enable_data_cache_disk(path="/data/sift-cache", max_bytes=2 * 1024 ** 3)
 
-```python
+# Remove a stale or corrupted cache directory.
 client.channels.clear_data_cache_on_disk()                   # default tmp path
 client.channels.clear_data_cache_on_disk("/data/sift-cache") # custom path
 ```
 
-`clear_data_cache_on_disk` refuses to delete directories that don't look like a sift channel data cache (missing the `diskcache` marker), so a typo'd path won't wipe unrelated data.
+`enable_data_cache_disk` is also the way to turn the tier back on after a prior `disable_data_cache_disk` call.
 
 The disk tier is powered by [`diskcache`](https://grantjenks.com/docs/diskcache/) (pure-Python, SQLite-backed) and has its own independent byte cap with LRU eviction. The in-memory tier remains the fast path — disk is only consulted on a memory miss.
 
diff --git a/python/lib/sift_client/_internal/disk_cache_config.py b/python/lib/sift_client/_internal/disk_cache_config.py
new file mode 100644
index 000000000..c49eaf442
--- /dev/null
+++ b/python/lib/sift_client/_internal/disk_cache_config.py
@@ -0,0 +1,87 @@
+"""User-expressed configuration for a resource's optional disk-cache tier."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import os
+
+
+class DiskCacheConfig:
+    """Holds a resource's disk-cache enable/path/max-bytes intent.
+
+    Resources own one instance, mutate it via :meth:`enable` / :meth:`disable`
+    in response to user calls, and read the properties at lazy-init time to
+    decide what kwargs to forward to their cache-aware wrapper.
+
+    The :attr:`using_default_path` property is the key invariant for the
+    silent-fallback-vs-loud-raise distinction in resource lazy-init code:
+    if the user picked a specific path and opening fails, the failure
+    surfaces; if the user left the default and opening fails, the resource
+    falls back to memory-only without disrupting the call.
+
+    Args:
+        enabled: Initial enabled state. Pass ``True`` for opt-out (the disk
+            tier is on by default and users call ``disable`` to turn it off);
+            pass ``False`` for opt-in (users call ``enable`` to turn it on).
+    """
+
+    def __init__(self, *, enabled: bool = True) -> None:
+        self._enabled = enabled
+        self._path: str | None = None
+        self._max_bytes: int | None = None
+
+    @property
+    def enabled(self) -> bool:
+        """Whether the disk tier should be opened on the next lazy init."""
+        return self._enabled
+
+    @property
+    def path(self) -> str | None:
+        """User-supplied disk-cache path, or ``None`` to defer to the cache's default."""
+        return self._path
+
+    @property
+    def max_bytes(self) -> int | None:
+        """User-supplied disk-cache byte cap, or ``None`` to defer to the cache's default."""
+        return self._max_bytes
+
+    @property
+    def using_default_path(self) -> bool:
+        """``True`` when the disk tier is enabled *and* the path is the cache's default.
+
+        Resources use this to decide whether to silently fall back to memory
+        on a disk-open failure (default path: the user didn't ask for it
+        specifically, so degrade gracefully) or to re-raise (explicit path:
+        the user asked for it, so failure must surface).
+        """
+        return self._enabled and self._path is None
+
+    def enable(
+        self,
+        *,
+        path: str | os.PathLike[str] | None = None,
+        max_bytes: int | None = None,
+    ) -> None:
+        """Mark the disk tier as enabled, optionally with a custom path or byte cap.
+
+        Args:
+            path: Directory to persist to. ``None`` leaves the cache's
+                default in effect.
+            max_bytes: Byte cap on the disk tier. ``None`` leaves the
+                cache's default in effect.
+        """
+        self._enabled = True
+        self._path = str(path) if path is not None else None
+        self._max_bytes = max_bytes
+
+    def disable(self) -> None:
+        """Mark the disk tier as disabled and clear any custom path / byte cap.
+
+        Subsequent :meth:`enable` calls re-enable at the cache's defaults
+        unless overrides are supplied.
+        """
+        self._enabled = False
+        self._path = None
+        self._max_bytes = None
diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
index 16c99a6dd..0ace402d2 100644
--- a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
+++ b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
@@ -42,6 +42,13 @@
 _NOW = datetime(2025, 1, 1, tzinfo=timezone.utc)
 _WINDOW_END = _NOW + timedelta(days=1)
 
+# Snapshot of the real ``DEFAULT_DISK_PATH`` constant captured at module import.
+# The autouse ``_isolate_default_disk_cache_path`` fixture in ``conftest.py``
+# overrides the class attribute on every test for isolation; the
+# ``TestChannelCacheClearDisk::test_default_path_constant_under_tmp`` test still
+# needs to see the production value to verify its shape.
+_PRODUCTION_DEFAULT_DISK_PATH = ChannelCache.DEFAULT_DISK_PATH
+
 
 # ---------- shared helpers -----------
 
@@ -517,11 +524,17 @@ def test_clear_refuses_non_diskcache_directory(self, tmp_path) -> None:
         assert (target / "important.txt").read_text() == "don't delete me"
 
     def test_default_path_constant_under_tmp(self) -> None:
-        """Default lives under the OS tmp dir, not a user directory."""
+        """Default lives under the OS tmp dir, not a user directory.
+
+        Reads the module-level snapshot captured at import time rather than
+        ``ChannelCache.DEFAULT_DISK_PATH`` directly, because the autouse
+        ``_isolate_default_disk_cache_path`` fixture monkeypatches that
+        attribute for every test to keep ``/tmp`` clean.
+        """
         import tempfile
 
-        assert ChannelCache.DEFAULT_DISK_PATH.startswith(tempfile.gettempdir())
-        assert ChannelCache.DEFAULT_DISK_PATH.endswith("sift-channel-data-cache")
+        assert _PRODUCTION_DEFAULT_DISK_PATH.startswith(tempfile.gettempdir())
+        assert _PRODUCTION_DEFAULT_DISK_PATH.endswith("sift-channel-data-cache")
 
 
 class TestMergePages:
diff --git a/python/lib/sift_client/_tests/_internal/test_disk_cache_config.py b/python/lib/sift_client/_tests/_internal/test_disk_cache_config.py
new file mode 100644
index 000000000..bce8a4ab9
--- /dev/null
+++ b/python/lib/sift_client/_tests/_internal/test_disk_cache_config.py
@@ -0,0 +1,112 @@
+"""Tests for :class:`sift_client._internal.disk_cache_config.DiskCacheConfig`.
+
+The class is a small intent holder; the tests pin three things that
+resource lazy-init code relies on:
+
+* Enable / disable round-trips preserve the right state and clear overrides.
+* ``using_default_path`` reflects "enabled AND no user override", which
+  drives the silent-fallback-vs-loud-raise distinction in resources.
+* ``enable`` accepts ``os.PathLike`` and stringifies it eagerly so consumers
+  never need to handle ``pathlib.Path`` vs ``str``.
+"""
+
+from __future__ import annotations
+
+import pathlib
+
+import pytest
+
+from sift_client._internal.disk_cache_config import DiskCacheConfig
+
+
+class TestDiskCacheConfig:
+    def test_opt_out_initial_state_enabled_no_overrides(self) -> None:
+        """``enabled=True`` (opt-out) starts on with no overrides."""
+        config = DiskCacheConfig(enabled=True)
+        assert config.enabled
+        assert config.path is None
+        assert config.max_bytes is None
+        assert config.using_default_path
+
+    def test_opt_in_initial_state_disabled(self) -> None:
+        """``enabled=False`` (opt-in) starts off; ``using_default_path`` is False."""
+        config = DiskCacheConfig(enabled=False)
+        assert not config.enabled
+        assert config.path is None
+        assert config.max_bytes is None
+        assert not config.using_default_path
+
+    def test_enable_with_no_args_keeps_defaults(self) -> None:
+        """``enable()`` with no args turns on and clears any prior overrides."""
+        config = DiskCacheConfig(enabled=False)
+        config.enable()
+        assert config.enabled
+        assert config.path is None
+        assert config.max_bytes is None
+        assert config.using_default_path
+
+    def test_enable_with_path_marks_non_default(self) -> None:
+        """A user-supplied path flips ``using_default_path`` off."""
+        config = DiskCacheConfig(enabled=True)
+        config.enable(path="/custom/path")
+        assert config.enabled
+        assert config.path == "/custom/path"
+        assert not config.using_default_path
+
+    def test_enable_with_max_bytes_keeps_default_path(self) -> None:
+        """Setting ``max_bytes`` alone doesn't make the path non-default."""
+        config = DiskCacheConfig(enabled=True)
+        config.enable(max_bytes=1024)
+        assert config.enabled
+        assert config.path is None
+        assert config.max_bytes == 1024
+        assert config.using_default_path
+
+    def test_enable_stringifies_pathlike(self) -> None:
+        """``os.PathLike`` inputs are stored as strings so consumers can be dumb."""
+        config = DiskCacheConfig(enabled=True)
+        config.enable(path=pathlib.Path("/some/path"))
+        assert isinstance(config.path, str)
+        assert config.path == "/some/path"
+
+    def test_disable_clears_overrides(self) -> None:
+        """``disable()`` zeroes path and max_bytes so a future re-enable starts clean."""
+        config = DiskCacheConfig(enabled=True)
+        config.enable(path="/custom", max_bytes=4096)
+        config.disable()
+        assert not config.enabled
+        assert config.path is None
+        assert config.max_bytes is None
+        assert not config.using_default_path
+
+    def test_reenable_after_disable_returns_to_defaults(self) -> None:
+        """``disable`` then ``enable()`` (no args) restores the opt-out starting state."""
+        config = DiskCacheConfig(enabled=True)
+        config.enable(path="/custom", max_bytes=4096)
+        config.disable()
+        config.enable()
+        assert config.enabled
+        assert config.path is None
+        assert config.max_bytes is None
+        assert config.using_default_path
+
+    @pytest.mark.parametrize(
+        ("enabled", "path", "expected"),
+        [
+            (True, None, True),
+            (True, "/custom", False),
+            (False, None, False),
+            (False, "/custom", False),  # disabled wins even with a stashed path
+        ],
+        ids=["enabled+default", "enabled+custom", "disabled+default", "disabled+custom"],
+    )
+    def test_using_default_path_matrix(
+        self, enabled: bool, path: str | None, expected: bool
+    ) -> None:
+        """``using_default_path`` is the AND of ``enabled`` and ``path is None``."""
+        config = DiskCacheConfig(enabled=enabled)
+        if path is not None:
+            # Bypass enable() so we can exercise the disabled+custom combo
+            # without enable() flipping enabled back on.
+            config._path = path
+        assert config.using_default_path is expected
diff --git a/python/lib/sift_client/_tests/conftest.py b/python/lib/sift_client/_tests/conftest.py
index 41469dac5..31aebf03a 100644
--- a/python/lib/sift_client/_tests/conftest.py
+++ b/python/lib/sift_client/_tests/conftest.py
@@ -9,6 +9,33 @@
 from sift_client.util.util import AsyncAPIs
 
 
+@pytest.fixture(autouse=True)
+def _isolate_default_disk_cache_path(monkeypatch, tmp_path):
+    """Redirect ``ChannelCache.DEFAULT_DISK_PATH`` to a per-test tmp dir.
+
+    The channel data disk cache is **opt-out** — any test that triggers the
+    lazy ``DataLowLevelClient`` init through ``ChannelsAPIAsync`` would
+    otherwise create the real ``/tmp/sift-channel-data-cache`` directory and
+    leak state across runs. Redirecting the default to ``tmp_path`` keeps
+    every test self-contained without each test having to know that the disk
+    tier is on by default.
+
+    The override deliberately preserves the ``sift-channel-data-cache``
+    suffix so ``TestChannelCacheClearDisk::test_default_path_constant_under_tmp``
+    keeps validating the real shape of the constant.
+
+    Importing ``ChannelCache`` here pulls in pandas, but only once per
+    session — fixture body still runs per-test, just the monkeypatch.
+    """
+    from sift_client._internal.low_level_wrappers.data import ChannelCache
+
+    monkeypatch.setattr(
+        ChannelCache,
+        "DEFAULT_DISK_PATH",
+        str(tmp_path / "sift-channel-data-cache"),
+    )
+
+
 @pytest.fixture(scope="session")
 def sift_client() -> SiftClient:
     """Create a SiftClient instance for testing.
diff --git a/python/lib/sift_client/_tests/resources/test_channels.py b/python/lib/sift_client/_tests/resources/test_channels.py
index e3d29ab73..3ed3826b1 100644
--- a/python/lib/sift_client/_tests/resources/test_channels.py
+++ b/python/lib/sift_client/_tests/resources/test_channels.py
@@ -507,6 +507,11 @@ class TestConfigureDataCache:
     """``configure_data_cache`` is the resource-level knob for the in-memory
     channel data cache. Before the cache is initialized, it stashes the value
     for the lazy-init path; after, it retunes the live cache.
+
+    Each test that triggers ``_ensure_data_low_level_client`` opens the
+    opt-out disk tier (redirected to ``tmp_path`` by the conftest fixture)
+    and closes the handle in ``finally`` so the diskcache lock doesn't leak
+    into the next test.
     """
 
     def test_before_lazy_init_propagates_to_cache(self):
@@ -515,24 +520,33 @@ def test_before_lazy_init_propagates_to_cache(self):
         api.configure_data_cache(max_bytes=123)
         assert api._data_low_level_client is None  # still lazy
         api._ensure_data_low_level_client()
-        assert api._data_low_level_client.channel_cache.max_bytes == 123
+        try:
+            assert api._data_low_level_client.channel_cache.max_bytes == 123
+        finally:
+            api._data_low_level_client.channel_cache.close()
 
     def test_after_lazy_init_updates_live_cache(self):
         """Configuring after first use retunes the live cache in place."""
         api = _make_api()
         api._ensure_data_low_level_client()
-        original_client = api._data_low_level_client
-        api.configure_data_cache(max_bytes=456)
-        # Same wrapper instance — we mutated, not replaced.
-        assert api._data_low_level_client is original_client
-        assert api._data_low_level_client.channel_cache.max_bytes == 456
+        try:
+            original_client = api._data_low_level_client
+            api.configure_data_cache(max_bytes=456)
+            # Same wrapper instance — we mutated, not replaced.
+            assert api._data_low_level_client is original_client
+            assert api._data_low_level_client.channel_cache.max_bytes == 456
+        finally:
+            api._data_low_level_client.channel_cache.close()
 
     def test_zero_disables_cache_via_resource(self):
         """Resource-level ``max_bytes=0`` end-to-end disables the cache."""
         api = _make_api()
         api.configure_data_cache(max_bytes=0)
         api._ensure_data_low_level_client()
-        assert not api._data_low_level_client.channel_cache.enabled
+        try:
+            assert not api._data_low_level_client.channel_cache.enabled
+        finally:
+            api._data_low_level_client.channel_cache.close()
 
     def test_negative_raises(self):
         api = _make_api()
@@ -549,10 +563,24 @@ class TestEnableDataCacheDisk:
     resource-level wiring around it.
     """
 
-    def test_disabled_by_default(self):
+    def test_enabled_by_default(self):
+        """Disk persistence is opt-out: the default-constructed resource
+        lands at ``ChannelCache.DEFAULT_DISK_PATH`` on first ``get_data``.
+
+        The autouse ``_isolate_default_disk_cache_path`` fixture in
+        ``conftest.py`` redirects the constant to a per-test tmp dir so this
+        doesn't litter the real ``/tmp``.
+        """
+        from sift_client._internal.low_level_wrappers.data import ChannelCache
+
         api = _make_api()
         api._ensure_data_low_level_client()
-        assert not api._data_low_level_client.channel_cache.disk_enabled
+        cache = api._data_low_level_client.channel_cache
+        try:
+            assert cache.disk_enabled
+            assert cache.disk_path == ChannelCache.DEFAULT_DISK_PATH
+        finally:
+            cache.close()
 
     def test_enable_before_lazy_init_propagates(self, tmp_path):
         api = _make_api()
@@ -567,7 +595,13 @@ def test_enable_before_lazy_init_propagates(self, tmp_path):
             cache.close()
 
     def test_enable_after_lazy_init_updates_live_cache(self, tmp_path):
+        """``disable_data_cache_disk`` → ``enable_data_cache_disk`` round-trip
+        on a live cache swaps the disk handle without recreating the wrapper.
+        """
         api = _make_api()
+        # Start from the disk-off state so the test exercises the "off → on"
+        # transition rather than "default-on → reconfigured-on".
+        api.disable_data_cache_disk()
         api._ensure_data_low_level_client()
         cache = api._data_low_level_client.channel_cache
         try:
@@ -625,3 +659,39 @@ def test_clear_data_cache_on_disk_proxies_to_cache(self, tmp_path):
         api = _make_api()
         api.clear_data_cache_on_disk(path)
         assert not path.exists()
+
+    def test_default_path_failure_falls_back_to_memory(self, monkeypatch, tmp_path):
+        """If the opt-out default disk path can't be opened, the wrapper logs
+        a warning and continues with the in-memory cache only.
+
+        Simulated by pointing ``DEFAULT_DISK_PATH`` at a path that already
+        exists as a regular file — ``os.makedirs(..., exist_ok=True)`` raises
+        ``FileExistsError`` for non-directory targets.
+        """
+        from sift_client._internal.low_level_wrappers.data import ChannelCache
+
+        blocker = tmp_path / "not-a-dir"
+        blocker.write_text("i am a file, not a directory")
+        monkeypatch.setattr(ChannelCache, "DEFAULT_DISK_PATH", str(blocker))
+
+        api = _make_api()
+        api._ensure_data_low_level_client()  # must not raise
+        cache = api._data_low_level_client.channel_cache
+        try:
+            # Disk silently dropped, memory still working.
+            assert not cache.disk_enabled
+            assert cache.enabled
+        finally:
+            cache.close()
+
+    def test_explicit_path_failure_propagates(self, tmp_path):
+        """An explicit ``enable_data_cache_disk(path=...)`` that can't open
+        propagates the OSError — silent fallback would hide a user mistake.
+        """
+        blocker = tmp_path / "not-a-dir"
+        blocker.write_text("i am a file, not a directory")
+
+        api = _make_api()
+        api.enable_data_cache_disk(path=str(blocker))
+        with pytest.raises(FileExistsError):
+            api._ensure_data_low_level_client()
diff --git a/python/lib/sift_client/client.py b/python/lib/sift_client/client.py
index 6afc36386..5db5bf473 100644
--- a/python/lib/sift_client/client.py
+++ b/python/lib/sift_client/client.py
@@ -149,10 +149,6 @@ def __init__(
                 mapped to a frontend automatically; see the ``app_url`` property.
                 A value here takes precedence over ``connection_config.app_url``.
 
-        Resource-specific knobs live on the resource itself. For example,
-        to tune the channel data cache used by ``client.channels.get_data``::
-
-            client.channels.configure_data_cache(max_bytes=128 * 1024 * 1024)
         """
         if not (api_key and grpc_url and rest_url) and not connection_config:
             raise ValueError(
diff --git a/python/lib/sift_client/resources/channels.py b/python/lib/sift_client/resources/channels.py
index 91322a65c..794930fda 100644
--- a/python/lib/sift_client/resources/channels.py
+++ b/python/lib/sift_client/resources/channels.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
 
+import logging
 from typing import TYPE_CHECKING
 
+from sift_client._internal.disk_cache_config import DiskCacheConfig
 from sift_client._internal.low_level_wrappers.channels import ChannelsLowLevelClient
 from sift_client._internal.low_level_wrappers.units import UnitsLowLevelClient
 from sift_client.resources._base import ResourceBase
@@ -20,6 +22,8 @@
 
     from sift_client.client import SiftClient
 
+logger = logging.getLogger(__name__)
+
 
 def _channel_ids_from_list(items: list[str | Channel]) -> list[str]:
     """Resolve a list of channel IDs or Channel objects to a list of channel IDs.
@@ -69,13 +73,7 @@ def __init__(self, sift_client: SiftClient):
         # at lazy-init time" so we don't have to import ``data.py`` (and
         # therefore pandas) just to remember the default.
         self._data_cache_max_bytes: int | None = None
-        # Disk-tier configuration, stashed until lazy init (or applied
-        # immediately if the wrapper is already constructed). All three
-        # remain ``None`` / ``False`` when the disk tier is disabled, which
-        # is the default — disk persistence is opt-in.
-        self._disk_cache_enabled: bool = False
-        self._disk_cache_path: str | None = None
-        self._disk_cache_max_bytes: int | None = None
+        self._disk_cache_config = DiskCacheConfig(enabled=True)
 
     def configure_data_cache(self, *, max_bytes: int) -> None:
         """Configure the in-memory channel data cache used by ``get_data``.
@@ -105,7 +103,11 @@ def enable_data_cache_disk(
         path: str | os.PathLike[str] | None = None,
         max_bytes: int | None = None,
     ) -> None:
-        """Persist the channel data cache to disk, surviving process restarts.
+        """Configure (or re-enable after ``disable_data_cache_disk``) the disk cache.
+
+        Disk persistence is **on by default** at ``ChannelCache.DEFAULT_DISK_PATH``;
+        use this method when you want to override the path or size, or to turn
+        the tier back on after a prior ``disable_data_cache_disk`` call.
 
         The disk-backed tier is a second-chance layer beneath the in-memory
         cache: on a memory miss, ``get_data`` checks disk before going to the
@@ -117,35 +119,37 @@ def enable_data_cache_disk(
         (different ``path`` or ``max_bytes``) closes the previous disk handle
         and opens a new one; in-memory contents are preserved across the swap.
 
+        An explicit ``path`` that can't be opened (e.g. permission denied,
+        read-only filesystem) raises so the caller knows the request didn't
+        take. The default-path open does *not* raise — see
+        ``_ensure_data_low_level_client`` for the fall-back-to-memory path.
+
         Args:
             path: Directory to persist the cache to. ``None`` (the default)
-                uses ``DEFAULT_DISK_CACHE_PATH``. Existing entries at the path
-                become available as cache hits.
+                uses ``ChannelCache.DEFAULT_DISK_PATH``. Existing entries at
+                the path become available as cache hits.
             max_bytes: Byte cap on the disk tier. ``None`` uses
-                ``DEFAULT_DISK_CACHE_MAX_BYTES`` (4 GiB). When the bound is
-                reached, ``diskcache``'s LRU eviction takes over.
+                ``ChannelCache.DEFAULT_DISK_MAX_BYTES`` (4 GiB). When the
+                bound is reached, ``diskcache``'s LRU eviction takes over.
 
         Example:
-            client.channels.enable_data_cache_disk()
             client.channels.enable_data_cache_disk(path="/data/sift-cache")
             client.channels.enable_data_cache_disk(max_bytes=1024 ** 3)  # 1 GiB
         """
-        self._disk_cache_enabled = True
-        self._disk_cache_path = str(path) if path is not None else None
-        self._disk_cache_max_bytes = max_bytes
+        self._disk_cache_config.enable(path=path, max_bytes=max_bytes)
         if self._data_low_level_client is not None:
             self._data_low_level_client.channel_cache.enable_disk(path=path, max_bytes=max_bytes)
 
     def disable_data_cache_disk(self) -> None:
-        """Stop persisting the channel data cache to disk.
+        """Opt out of disk persistence for the channel data cache.
 
-        Closes the disk-cache file handle. The on-disk directory is NOT
-        deleted — use :meth:`clear_data_cache_on_disk` to wipe it. In-memory
-        entries are preserved.
+        Disk persistence is on by default; call this when you don't want any
+        cached data written to disk. Closes any open disk-cache file handle.
+        The on-disk directory is NOT deleted — use
+        :meth:`clear_data_cache_on_disk` to wipe it. In-memory entries are
+        preserved.
         """
-        self._disk_cache_enabled = False
-        self._disk_cache_path = None
-        self._disk_cache_max_bytes = None
+        self._disk_cache_config.disable()
         if self._data_low_level_client is not None:
             self._data_low_level_client.channel_cache.disable_disk()
 
@@ -153,14 +157,16 @@ def clear_data_cache_on_disk(self, path: str | os.PathLike[str] | None = None) -
         """Delete a previously-persisted on-disk channel data cache directory.
 
         Drops stale caches from previous sessions, recovers from a corrupt
-        cache, or reclaims disk space. Removes the directory entirely; a
-        future :meth:`enable_data_cache_disk` call at the same path will see
-        a fresh empty cache.
+        cache, or reclaims disk space. Removes the directory entirely; if disk
+        persistence is on, the next ``get_data`` re-opens an empty cache at
+        the same path.
 
         This is a thin proxy around
         :meth:`ChannelCache.clear_disk <sift_client._internal.low_level_wrappers.data.ChannelCache.clear_disk>`
         — exposed on the resource so callers don't need to reach into
-        ``_internal`` modules. But that is a class method so the user could call without a client if desired.
+        ``_internal`` modules. The underlying classmethod is also reachable
+        directly (``ChannelCache.clear_disk(...)``) if the caller doesn't have
+        a ``SiftClient`` handy.
 
         Args:
             path: Directory of the cache to clear. ``None`` (the default)
@@ -361,17 +367,43 @@ def _ensure_data_low_level_client(self):
             kwargs: dict = {}
             if self._data_cache_max_bytes is not None:
                 kwargs["data_cache_max_bytes"] = self._data_cache_max_bytes
-            if self._disk_cache_enabled:
+            disk_config = self._disk_cache_config
+            if disk_config.enabled:
                 # ``disk_path=None`` means "disabled" to ChannelCache; substitute
-                # the default explicitly so an explicit ``enable_data_cache_disk()``
-                # without a path still opens the disk tier.
-                kwargs["disk_cache_path"] = self._disk_cache_path or ChannelCache.DEFAULT_DISK_PATH
-                if self._disk_cache_max_bytes is not None:
-                    kwargs["disk_cache_max_bytes"] = self._disk_cache_max_bytes
-            self._data_low_level_client = DataLowLevelClient(
-                grpc_client=self.client.grpc_client,
-                **kwargs,
-            )
+                # the default explicitly so the opt-out default still opens
+                # the disk tier. ``DEFAULT_DISK_PATH`` is read here (not at
+                # config construction) so test fixtures that monkeypatch the
+                # class attribute see the override.
+                kwargs["disk_cache_path"] = disk_config.path or ChannelCache.DEFAULT_DISK_PATH
+                if disk_config.max_bytes is not None:
+                    kwargs["disk_cache_max_bytes"] = disk_config.max_bytes
+            try:
+                self._data_low_level_client = DataLowLevelClient(
+                    grpc_client=self.client.grpc_client,
+                    **kwargs,
+                )
+            except Exception:
+                # Explicit user-supplied disk path failures propagate so the
+                # caller knows their request didn't take. Default-path failures
+                # (read-only ``/tmp``, restricted containers, etc.) degrade
+                # silently to memory-only so ``get_data`` still works.
+                if not disk_config.using_default_path:
+                    raise
+                logger.warning(
+                    "Could not open the default channel data disk cache at %r; "
+                    "falling back to in-memory cache only. Call "
+                    "``client.channels.disable_data_cache_disk()`` to silence "
+                    "this warning, or pass an explicit path via "
+                    "``enable_data_cache_disk(path=...)``.",
+                    kwargs.get("disk_cache_path"),
+                    exc_info=True,
+                )
+                kwargs.pop("disk_cache_path", None)
+                kwargs.pop("disk_cache_max_bytes", None)
+                self._data_low_level_client = DataLowLevelClient(
+                    grpc_client=self.client.grpc_client,
+                    **kwargs,
+                )
 
     async def get_data(
         self,
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index 704e3b8c0..8e76a56ff 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -457,14 +457,16 @@ class ChannelsAPI:
         """Delete a previously-persisted on-disk channel data cache directory.
 
         Drops stale caches from previous sessions, recovers from a corrupt
-        cache, or reclaims disk space. Removes the directory entirely; a
-        future :meth:`enable_data_cache_disk` call at the same path will see
-        a fresh empty cache.
+        cache, or reclaims disk space. Removes the directory entirely; if disk
+        persistence is on, the next ``get_data`` re-opens an empty cache at
+        the same path.
 
         This is a thin proxy around
         :meth:`ChannelCache.clear_disk <sift_client._internal.low_level_wrappers.data.ChannelCache.clear_disk>`
         — exposed on the resource so callers don't need to reach into
-        ``_internal`` modules. But that is a class method so the user could call without a client if desired.
+        ``_internal`` modules. The underlying classmethod is also reachable
+        directly (``ChannelCache.clear_disk(...)``) if the caller doesn't have
+        a ``SiftClient`` handy.
 
         Args:
             path: Directory of the cache to clear. ``None`` (the default)
@@ -495,18 +497,24 @@ class ChannelsAPI:
         ...
 
     def disable_data_cache_disk(self) -> None:
-        """Stop persisting the channel data cache to disk.
+        """Opt out of disk persistence for the channel data cache.
 
-        Closes the disk-cache file handle. The on-disk directory is NOT
-        deleted — use :meth:`clear_data_cache_on_disk` to wipe it. In-memory
-        entries are preserved.
+        Disk persistence is on by default; call this when you don't want any
+        cached data written to disk. Closes any open disk-cache file handle.
+        The on-disk directory is NOT deleted — use
+        :meth:`clear_data_cache_on_disk` to wipe it. In-memory entries are
+        preserved.
         """
         ...
 
     def enable_data_cache_disk(
         self, *, path: str | os.PathLike[str] | None = None, max_bytes: int | None = None
     ) -> None:
-        """Persist the channel data cache to disk, surviving process restarts.
+        """Configure (or re-enable after ``disable_data_cache_disk``) the disk cache.
+
+        Disk persistence is **on by default** at ``ChannelCache.DEFAULT_DISK_PATH``;
+        use this method when you want to override the path or size, or to turn
+        the tier back on after a prior ``disable_data_cache_disk`` call.
 
         The disk-backed tier is a second-chance layer beneath the in-memory
         cache: on a memory miss, ``get_data`` checks disk before going to the
@@ -518,16 +526,20 @@ class ChannelsAPI:
         (different ``path`` or ``max_bytes``) closes the previous disk handle
         and opens a new one; in-memory contents are preserved across the swap.
 
+        An explicit ``path`` that can't be opened (e.g. permission denied,
+        read-only filesystem) raises so the caller knows the request didn't
+        take. The default-path open does *not* raise — see
+        ``_ensure_data_low_level_client`` for the fall-back-to-memory path.
+
         Args:
             path: Directory to persist the cache to. ``None`` (the default)
-                uses ``DEFAULT_DISK_CACHE_PATH``. Existing entries at the path
-                become available as cache hits.
+                uses ``ChannelCache.DEFAULT_DISK_PATH``. Existing entries at
+                the path become available as cache hits.
             max_bytes: Byte cap on the disk tier. ``None`` uses
-                ``DEFAULT_DISK_CACHE_MAX_BYTES`` (4 GiB). When the bound is
-                reached, ``diskcache``'s LRU eviction takes over.
+                ``ChannelCache.DEFAULT_DISK_MAX_BYTES`` (4 GiB). When the
+                bound is reached, ``diskcache``'s LRU eviction takes over.
 
         Example:
-            client.channels.enable_data_cache_disk()
             client.channels.enable_data_cache_disk(path="/data/sift-cache")
             client.channels.enable_data_cache_disk(max_bytes=1024 ** 3)  # 1 GiB
         """
diff --git a/python/pyproject.toml b/python/pyproject.toml
index b12c29cb0..dfe94c043 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -252,6 +252,13 @@ dev-all = ["development", "all", "build"]
 docs-build = ["dev-all", "docs"] # Note python 3.9+
 
 [tool.uv]
+# Pin uv to a version that writes lockfile revision 3 (introduced in uv 0.8.4
+# by astral-sh/uv#14489, which added ``exclude-newer-package`` to the lock
+# schema). Older uv silently rolls the lockfile back to revision 2 on the
+# next ``uv lock`` / ``uv sync`` (a no-op-looking change), then a teammate
+# on a newer uv re-bumps it — churning the revision field in PRs.
+# ``required-version`` blocks the older uv up front with a clear error.
+required-version = ">=0.8.4"
 # Fork resolution per Python minor in the support range. Each fork resolves
 # independently, which lets 3.8 pick numpy 1.24.x + rosbags 0.9.23 without
 # being constrained by the 3.9+ universe (numpy 2.0 drops 3.8).
@@ -351,6 +358,7 @@ ignore_errors = true
 [[tool.mypy.overrides]]
 module = "nptdms"
 ignore_missing_imports = true
+ignore_errors = true
 
 # diskcache ships without inline type hints or PEP 561 marker. Used by the
 # channel data cache's optional on-disk tier.
diff --git a/python/uv.lock b/python/uv.lock
index 43c24b552..7a0c68645 100644
--- a/python/uv.lock
+++ b/python/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.8"
 resolution-markers = [
     "python_full_version >= '3.8.2' and python_full_version < '3.9'",

From 3c6a2ca149a5ddd8fd2034051d3741310da06427 Mon Sep 17 00:00:00 2001
From: Ian Later <ian@siftstack.com>
Date: Thu, 25 Jun 2026 14:03:49 -0700
Subject: [PATCH 10/14] handle single large fetch

---
 .../_internal/low_level_wrappers/data.py      |  54 +++++-
 .../_internal/low_level_wrappers/test_data.py | 159 +++++++++++++++++-
 2 files changed, 203 insertions(+), 10 deletions(-)

diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data.py b/python/lib/sift_client/_internal/low_level_wrappers/data.py
index ae0bbf6e5..e0bcf67a3 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data.py
@@ -125,6 +125,13 @@ def __init__(
         self._entries: OrderedDict[str, ChannelCacheEntry] = OrderedDict()
         self._total_bytes: int = 0
         self._max_bytes: int = max_bytes
+        # Channels we've already logged an "entry exceeds tier cap" warning
+        # for. The check on the put path would otherwise spam the log once
+        # per ``get_data`` call for any channel whose typical entry is bigger
+        # than the cap. A successful normal put for the same channel clears
+        # the bit so a future regression re-warns.
+        self._oversized_memory_warned: set[str] = set()
+        self._oversized_disk_warned: set[str] = set()
         self._disk: diskcache.Cache | None = None
         self._disk_path: str | None = None
         self._disk_max_bytes: int | None = None
@@ -307,8 +314,30 @@ def put(self, channel_id: str, entry: ChannelCacheEntry) -> None:
         if self.enabled:
             self._put_memory(channel_id, entry)
         if self._disk is not None:
+            if (
+                self._disk_max_bytes is not None
+                and entry.size_bytes > self._disk_max_bytes
+            ):
+                if channel_id not in self._oversized_disk_warned:
+                    logger.warning(
+                        "Channel %s data (%d bytes) is larger than the disk "
+                        "cache cap (%d bytes); skipping disk cache for this "
+                        "channel so other entries aren't evicted. Raise the "
+                        "cap via ``client.channels.enable_data_cache_disk("
+                        "max_bytes=...)`` to cache this channel on disk.",
+                        channel_id,
+                        entry.size_bytes,
+                        self._disk_max_bytes,
+                    )
+                    self._oversized_disk_warned.add(channel_id)
+                try:
+                    self._disk.delete(channel_id, retry=True)
+                except Exception:
+                    pass
+                return
             try:
                 self._disk.set(channel_id, entry, retry=True)
+                self._oversized_disk_warned.discard(channel_id)
             except Exception:
                 # Best-effort persistence: keep going on disk errors so the
                 # in-memory cache (and the user's ``get_data`` call) still
@@ -323,6 +352,11 @@ def invalidate(self, channel_id: str) -> None:
         prior = self._entries.pop(channel_id, None)
         if prior is not None:
             self._total_bytes -= prior.size_bytes
+        # Invalidation is a fresh start for this channel; if it was warned
+        # about as oversized previously, the next put should re-evaluate
+        # against the current cap and re-warn if still too big.
+        self._oversized_memory_warned.discard(channel_id)
+        self._oversized_disk_warned.discard(channel_id)
         if self._disk is not None:
             try:
                 self._disk.delete(channel_id, retry=True)
@@ -332,6 +366,8 @@ def invalidate(self, channel_id: str) -> None:
     def clear(self) -> None:
         self._entries.clear()
         self._total_bytes = 0
+        self._oversized_memory_warned.clear()
+        self._oversized_disk_warned.clear()
         if self._disk is not None:
             self._disk.clear()
 
@@ -340,10 +376,26 @@ def close(self) -> None:
         self._close_disk()
 
     def _put_memory(self, channel_id: str, entry: ChannelCacheEntry) -> None:
-        """Memory-tier insert + eviction. Caller has already gated on ``enabled``."""
+        """Memory-tier insert + eviction. Caller has already gated on ``enabled``.
+        """
         prior = self._entries.pop(channel_id, None)
         if prior is not None:
             self._total_bytes -= prior.size_bytes
+        if entry.size_bytes > self._max_bytes:
+            if channel_id not in self._oversized_memory_warned:
+                logger.warning(
+                    "Channel %s data (%d bytes) is larger than the in-memory "
+                    "cache cap (%d bytes); skipping cache for this channel so "
+                    "other entries aren't evicted. Raise the cap via "
+                    "``client.channels.configure_data_cache(max_bytes=...)`` "
+                    "to cache this channel.",
+                    channel_id,
+                    entry.size_bytes,
+                    self._max_bytes,
+                )
+                self._oversized_memory_warned.add(channel_id)
+            return
+        self._oversized_memory_warned.discard(channel_id)
         self._entries[channel_id] = entry
         self._total_bytes += entry.size_bytes
         self._evict_until_under_bound()
diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
index 0ace402d2..5ef05b9d7 100644
--- a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
+++ b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
@@ -22,6 +22,7 @@
 
 from __future__ import annotations
 
+import logging
 from contextlib import contextmanager
 from datetime import datetime, timedelta, timezone
 from typing import Any, Iterator
@@ -102,6 +103,31 @@ def _invariant_holds(cache: ChannelCache) -> bool:
     return cache.total_bytes == sum(e.size_bytes for e in cache._entries.values())
 
 
+@contextmanager
+def _capture_data_warnings() -> Iterator[list[logging.LogRecord]]:
+    """Capture warnings emitted by the ``data`` module's logger directly.
+
+    Pytest's ``caplog`` reads from the root logger, but the Sift pytest plugin
+    sets ``propagate=False`` on the ``sift_client`` logger when audit logging
+    is active, so records emitted from any descendant don't reach the root.
+    Attaching a list-backed handler at the leaf logger bypasses that and
+    surfaces exactly the records we emit.
+    """
+    target = logging.getLogger("sift_client._internal.low_level_wrappers.data")
+    records: list[logging.LogRecord] = []
+
+    class _ListHandler(logging.Handler):
+        def emit(self, record: logging.LogRecord) -> None:
+            records.append(record)
+
+    handler = _ListHandler(level=logging.WARNING)
+    target.addHandler(handler)
+    try:
+        yield records
+    finally:
+        target.removeHandler(handler)
+
+
 def _patch_deserializer(sentinel_to_frames: dict[str, dict[str, pd.DataFrame]]) -> Any:
     """Patch ``try_deserialize_channel_data`` to translate string sentinels.
 
@@ -236,25 +262,101 @@ def test_get_promotes_to_most_recent(self) -> None:
         assert "c" in cache
         assert _invariant_holds(cache)
 
-    def test_oversized_entry_evicts_with_neighbours(self) -> None:
-        """A single entry larger than the cap ends up evicted itself.
+    def test_oversized_entry_skips_cache_preserves_neighbours(self) -> None:
+        """A single entry larger than the cap is rejected without evicting peers.
 
-        The alternative ("keep the oversized entry and accept that the cap
-        is soft") would silently reintroduce unbounded growth for any
-        workload whose typical entry is bigger than ``max_bytes``.
+        Before this guard, ``_put_memory`` would insert the oversized entry,
+        then loop popping LRU until the cap was satisfied — but since no
+        amount of eviction makes an oversized entry fit, the loop drained
+        every other entry *and* the oversized one, wiping the cache on every
+        fetch of that channel. The fix: detect the oversized case up front,
+        warn, and skip the insert.
         """
         small_a, small_b, oversized = _entry(rows=10), _entry(rows=10), _entry(rows=10_000)
         cache = ChannelCache(max_bytes=small_a.size_bytes + small_b.size_bytes)
         cache.put("a", small_a)
         cache.put("b", small_b)
-        cache.put("huge", oversized)
+        with _capture_data_warnings() as records:
+            cache.put("huge", oversized)
         assert "huge" not in cache
-        # Every other entry was evicted in the failed attempt to make room.
-        assert "a" not in cache
-        assert "b" not in cache
+        # Critical: the previously cached entries survive.
+        assert "a" in cache
+        assert "b" in cache
+        assert cache.total_bytes == small_a.size_bytes + small_b.size_bytes
+        assert _invariant_holds(cache)
+        # User gets a clear, actionable warning.
+        assert any("larger than the in-memory cache cap" in r.getMessage() for r in records)
+
+    def test_oversized_put_drops_prior_entry(self) -> None:
+        """An oversized re-insert must drop the prior slice, not silently keep it.
+
+        Otherwise a stale subrange would masquerade as a hit on the next
+        ``get`` even though the caller's intent was to refresh the entry.
+        """
+        small, oversized = _entry(rows=10), _entry(rows=10_000)
+        cache = ChannelCache(max_bytes=small.size_bytes)
+        cache.put("chan", small)
+        assert "chan" in cache
+        cache.put("chan", oversized)
+        assert "chan" not in cache
         assert cache.total_bytes == 0
         assert _invariant_holds(cache)
 
+    def test_oversized_put_warns_once_per_channel(self) -> None:
+        """Repeated oversized puts for the same channel log once, not on every call.
+
+        Without dedup, every ``get_data`` for an oversized channel would
+        write a fresh WARNING line — quickly drowning out other signal in
+        the logs.
+        """
+        oversized = _entry(rows=10_000)
+        cache = ChannelCache(max_bytes=oversized.size_bytes // 4)
+        with _capture_data_warnings() as records:
+            for _ in range(5):
+                cache.put("chan", oversized)
+        warnings = [r for r in records if "larger than the in-memory cache cap" in r.getMessage()]
+        assert len(warnings) == 1
+
+    def test_oversized_warning_resets_after_normal_put(self) -> None:
+        """A successful normal-sized put clears the dedup bit.
+
+        Used by callers who narrow a time window after seeing the warning:
+        the next oversized regression should re-warn rather than stay silent.
+        """
+        oversized = _entry(rows=10_000)
+        small = _entry(rows=10)
+        cache = ChannelCache(max_bytes=small.size_bytes * 2)
+        with _capture_data_warnings() as records:
+            cache.put("chan", oversized)  # 1st warning
+            cache.put("chan", small)  # resets state
+            cache.put("chan", oversized)  # 2nd warning
+        warnings = [r for r in records if "larger than the in-memory cache cap" in r.getMessage()]
+        assert len(warnings) == 2
+
+    def test_invalidate_resets_oversized_warning(self) -> None:
+        """``invalidate`` is a fresh start; the next oversized put re-warns."""
+        oversized = _entry(rows=10_000)
+        cache = ChannelCache(max_bytes=oversized.size_bytes // 4)
+        with _capture_data_warnings() as records:
+            cache.put("chan", oversized)
+            cache.invalidate("chan")
+            cache.put("chan", oversized)
+        warnings = [r for r in records if "larger than the in-memory cache cap" in r.getMessage()]
+        assert len(warnings) == 2
+
+    def test_clear_resets_oversized_warning(self) -> None:
+        """``clear`` resets all dedup state across channels."""
+        oversized = _entry(rows=10_000)
+        cache = ChannelCache(max_bytes=oversized.size_bytes // 4)
+        with _capture_data_warnings() as records:
+            cache.put("chan-a", oversized)
+            cache.put("chan-b", oversized)
+            cache.clear()
+            cache.put("chan-a", oversized)
+            cache.put("chan-b", oversized)
+        warnings = [r for r in records if "larger than the in-memory cache cap" in r.getMessage()]
+        assert len(warnings) == 4
+
     def test_max_bytes_zero_disables_cache(self) -> None:
         cache = ChannelCache(max_bytes=0)
         cache.put("c1", _entry(rows=100))
@@ -449,6 +551,45 @@ def test_clear_wipes_both_tiers(self, tmp_path) -> None:
         finally:
             cache.close()
 
+    def test_oversized_entry_skips_disk_preserves_other_entries(self, tmp_path) -> None:
+        """An entry larger than the disk cap is skipped on disk too.
+
+        Without the guard, ``diskcache``'s cull() would evict every other
+        on-disk row trying to fit an unfittable entry, then drop the entry
+        itself — the same wipe-everything failure mode as the memory tier.
+
+        Memory is sized to accept small entries but reject the oversized one
+        too, so memory-tier writes don't compete with disk-tier writes. We
+        assert on the disk ``_disk`` mapping directly because that's where
+        the contested behavior lives.
+        """
+        small = _entry(rows=4)
+        oversized = _entry(rows=10_000)
+        # ``disk_max_bytes`` has to leave room for ``diskcache``'s pickle
+        # envelope around each small entry (a few KB) AND be small enough
+        # that the oversized entry trips the guard. Half the oversized
+        # DataFrame's raw byte size hits both constraints comfortably.
+        cache = ChannelCache(
+            max_bytes=oversized.size_bytes * 2,
+            disk_path=tmp_path / "disk-oversize",
+            disk_max_bytes=oversized.size_bytes // 2,
+        )
+        try:
+            cache.put("small-1", small)
+            cache.put("small-2", small)
+            assert cache._disk is not None
+            with _capture_data_warnings() as records:
+                cache.put("huge", oversized)
+            # Disk-side prior entries survive; oversized one was not written.
+            assert "small-1" in cache._disk
+            assert "small-2" in cache._disk
+            assert "huge" not in cache._disk
+            assert any(
+                "larger than the disk cache cap" in r.getMessage() for r in records
+            )
+        finally:
+            cache.close()
+
     def test_disable_disk_preserves_memory(self, tmp_path) -> None:
         """Turning off disk closes the handle but keeps memory intact."""
         cache = ChannelCache(max_bytes=10_000_000, disk_path=tmp_path / "disable")

From 814280c2bb8d29c2507fc7e6fea67bbd33d4d376 Mon Sep 17 00:00:00 2001
From: Ian Later <ian@siftstack.com>
Date: Thu, 25 Jun 2026 14:16:56 -0700
Subject: [PATCH 11/14] fmt

---
 .../lib/sift_client/_internal/low_level_wrappers/data.py  | 8 ++------
 .../_tests/_internal/low_level_wrappers/test_data.py      | 4 +---
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data.py b/python/lib/sift_client/_internal/low_level_wrappers/data.py
index e0bcf67a3..04f75dd47 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data.py
@@ -314,10 +314,7 @@ def put(self, channel_id: str, entry: ChannelCacheEntry) -> None:
         if self.enabled:
             self._put_memory(channel_id, entry)
         if self._disk is not None:
-            if (
-                self._disk_max_bytes is not None
-                and entry.size_bytes > self._disk_max_bytes
-            ):
+            if self._disk_max_bytes is not None and entry.size_bytes > self._disk_max_bytes:
                 if channel_id not in self._oversized_disk_warned:
                     logger.warning(
                         "Channel %s data (%d bytes) is larger than the disk "
@@ -376,8 +373,7 @@ def close(self) -> None:
         self._close_disk()
 
     def _put_memory(self, channel_id: str, entry: ChannelCacheEntry) -> None:
-        """Memory-tier insert + eviction. Caller has already gated on ``enabled``.
-        """
+        """Memory-tier insert + eviction. Caller has already gated on ``enabled``."""
         prior = self._entries.pop(channel_id, None)
         if prior is not None:
             self._total_bytes -= prior.size_bytes
diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
index 5ef05b9d7..d06307a71 100644
--- a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
+++ b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
@@ -584,9 +584,7 @@ def test_oversized_entry_skips_disk_preserves_other_entries(self, tmp_path) -> N
             assert "small-1" in cache._disk
             assert "small-2" in cache._disk
             assert "huge" not in cache._disk
-            assert any(
-                "larger than the disk cache cap" in r.getMessage() for r in records
-            )
+            assert any("larger than the disk cache cap" in r.getMessage() for r in records)
         finally:
             cache.close()
 

From 63cc0452496721a1b50acf8f75fc09fbf932c868 Mon Sep 17 00:00:00 2001
From: Ian Later <ian@siftstack.com>
Date: Thu, 25 Jun 2026 15:18:45 -0700
Subject: [PATCH 12/14] Remove in memory cache layer.

---
 python/CHANGELOG.md                           |  29 +-
 .../_internal/low_level_wrappers/data.py      | 291 +++----
 .../_internal/low_level_wrappers/test_data.py | 731 ++++++++----------
 .../_tests/resources/test_channels.py         |  72 +-
 python/lib/sift_client/resources/channels.py  |  76 +-
 .../resources/sync_stubs/__init__.pyi         |  49 +-
 6 files changed, 464 insertions(+), 784 deletions(-)

diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md
index e33995a8e..d3ffaeb65 100644
--- a/python/CHANGELOG.md
+++ b/python/CHANGELOG.md
@@ -11,31 +11,18 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 
 Up to a ~80x speedup for some get_data calls.
 
-#### Bounded channel data cache
+#### Channel data cache (opt-out, on by default)
 
-The in-memory channel data cache used by `client.channels.get_data(...)` is now byte-bounded with LRU eviction (default 512 MiB). Once the bound is reached, the least-recently-used cached channel is evicted.
+`client.channels.get_data(...)` now caches the channel windows it returns to disk by default. Subsequent calls covering the same channel/time range — including from a fresh process — read straight out of the cache instead of going to the wire. This also bounds memory: nothing is held in process after the call returns, which fixes the OOM seen on long sustained pulls (~5–7 GB of cache for a 145M-point pull in earlier versions).
 
-Configure the bound on the `channels` resource:
+The default location is `<tempfile.gettempdir()>/sift-channel-data-cache`, capped at 4 GiB with LRU eviction. If the default path can't be opened (read-only filesystem, restricted container, etc.), the client logs a warning and continues with caching disabled — `get_data` still works, it just always goes to the wire.
 
-```python
-client.channels.configure_data_cache(max_bytes=128 * 1024 * 1024)  # 128 MiB cap
-client.channels.configure_data_cache(max_bytes=0)                  # disable caching
-```
-
-`configure_data_cache` may be called at any time; if the cache is already populated, the new bound is applied immediately and excess entries are evicted.
-
-`ignore_cache=True` on `client.channels.get_data(...)` now also skips writing into the cache, matching its read-side bypass semantics. Previously a "non-caching" workload still appended to the shared cache on every call, which still caused increased memory usage.
-
-#### On-disk channel data cache (opt-out, on by default)
-
-The channel data cache now persists to disk by default, surviving process restarts. The disk tier is a second-chance layer beneath the in-memory cache: on a memory miss, `get_data` checks disk before going to the wire. Re-running the same workload in a new session picks up the previously-cached windows for free — no configuration required.
-
-The default location is `<tempfile.gettempdir()>/sift-channel-data-cache`, capped at 4 GiB with LRU eviction. If the default path can't be opened (read-only filesystem, restricted container, etc.), the client logs a warning and falls back to the in-memory cache only — `get_data` continues to work.
+`ignore_cache=True` on `client.channels.get_data(...)` now skips writing into the cache as well as reading from it. Previously a "non-caching" workload still appended to the shared cache on every call.
 
-Opt out, reconfigure, or wipe the on-disk cache from the `channels` resource:
+Opt out, reconfigure, or wipe the cache from the `channels` resource:
 
 ```python
-# Opt out — no data persisted to disk.
+# Opt out — no data persisted to disk; every get_data call goes to the wire.
 client.channels.disable_data_cache_disk()
 
 # Reconfigure the location or byte cap.
@@ -46,9 +33,9 @@ client.channels.clear_data_cache_on_disk()                   # default tmp path
 client.channels.clear_data_cache_on_disk("/data/sift-cache") # custom path
 ```
 
-`enable_data_cache_disk` is also the way to turn the tier back on after a prior `disable_data_cache_disk` call.
+`enable_data_cache_disk` is also the way to turn the cache back on after a prior `disable_data_cache_disk` call.
 
-The disk tier is powered by [`diskcache`](https://grantjenks.com/docs/diskcache/) (pure-Python, SQLite-backed) and has its own independent byte cap with LRU eviction. The in-memory tier remains the fast path — disk is only consulted on a memory miss.
+The cache is powered by [`diskcache`](https://grantjenks.com/docs/diskcache/) (pure-Python, SQLite-backed) with LRU eviction.
 
 #### Resource and principal attributes (ABAC)
 
diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data.py b/python/lib/sift_client/_internal/low_level_wrappers/data.py
index 04f75dd47..03ab29268 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data.py
@@ -5,7 +5,6 @@
 import os
 import shutil
 import tempfile
-from collections import OrderedDict
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, cast
@@ -41,11 +40,6 @@
 # has been resolved. In the mean time each channel gets its own request.
 REQUEST_BATCH_SIZE = 1
 
-# Default in-memory budget for cached channel DataFrames, per ``DataLowLevelClient``
-# instance. 512 MiB is well below typical limits while still letting common
-# interactive workloads stay in cache. Override via ``SiftClient(data_cache_max_bytes=...)``.
-DEFAULT_DATA_CACHE_MAX_BYTES = 512 * 1024 * 1024
-
 
 class ChannelCacheEntry(BaseModel):
     model_config = ConfigDict(arbitrary_types_allowed=True)
@@ -67,33 +61,37 @@ def _new_cache_entry(
 
 
 class ChannelCache:
-    """Two-tier cache of per-channel DataFrames.
-
-    Tier 1: an LRU-ordered, byte-bounded in-memory dict (hot path). ``max_bytes
-    <= 0`` disables this tier: ``get`` always misses memory, ``put`` doesn't
-    populate it.
-
-    Tier 2 (optional, see ``enable_disk``): a ``diskcache``-backed write-through
-    layer that survives process restarts. When enabled, ``put`` writes to both
-    tiers, ``get`` falls back to disk on a memory miss (promoting the hit back
-    into memory), and ``invalidate``/``clear`` cascade to disk. The disk tier
-    has its own byte cap that ``diskcache`` enforces with LRU eviction.
-
-    The two tiers are independent: setting ``max_bytes=0`` keeps the disk layer
-    active, useful for "cold storage only" workloads.
+    """Disk-backed cache of per-channel DataFrames.
+
+    A ``diskcache``-backed key/value store that survives process restarts.
+    ``put`` writes through to disk, ``get`` reads from disk, and
+    ``invalidate``/``clear`` remove entries. The disk tier has a byte cap
+    that ``diskcache`` enforces with its own LRU eviction.
+
+    When no ``disk_path`` is supplied the cache is a no-op: ``get`` always
+    returns ``None``, ``__contains__`` is always ``False``, and ``put`` is
+    silently dropped. This is the "caching disabled" mode used after a
+    :meth:`disable_disk` call (or when disk persistence is turned off on
+    the owning resource).
+
+    An in-memory tier previously sat in front of disk. It was removed once
+    benchmarks showed that for the workloads driving the OOM regression the
+    extra memory footprint outweighed the per-call pickle/deserialize cost
+    on a warm disk hit; if profiling shows the disk reads dominating again,
+    re-introduce a small front cache here.
     """
 
-    #: Default directory for the on-disk tier. Lives under
-    #: ``tempfile.gettempdir()`` so it survives across sessions of the same
-    #: user but doesn't pollute the user's home dir. The suffix is fixed so
-    #: multiple processes (different ``SiftClient`` instances, notebooks, etc.)
-    #: naturally share the same store and can read each other's prior sessions.
+    #: Default directory for the cache. Lives under ``tempfile.gettempdir()``
+    #: so it survives across sessions of the same user but doesn't pollute
+    #: the user's home dir. The suffix is fixed so multiple processes
+    #: (different ``SiftClient`` instances, notebooks, etc.) naturally share
+    #: the same store and can read each other's prior sessions.
     DEFAULT_DISK_PATH: str = os.path.join(tempfile.gettempdir(), "sift-channel-data-cache")
 
-    #: Default byte cap for the disk tier when ``enable_disk`` is called
-    #: without an explicit ``max_bytes``. 4 GiB is a generous ceiling for the
-    #: typical ``/tmp`` filesystem; ``diskcache`` enforces it with its own
-    #: SQLite-backed LRU eviction once the bound is reached.
+    #: Default byte cap for the cache when ``enable_disk`` is called without
+    #: an explicit ``max_bytes``. 4 GiB is a generous ceiling for the typical
+    #: ``/tmp`` filesystem; ``diskcache`` enforces it with its own SQLite-
+    #: backed LRU eviction once the bound is reached.
     DEFAULT_DISK_MAX_BYTES: int = 4 * 1024 * 1024 * 1024
 
     #: Marker file ``diskcache`` writes inside every cache directory. We
@@ -103,34 +101,27 @@ class ChannelCache:
 
     def __init__(
         self,
-        max_bytes: int = DEFAULT_DATA_CACHE_MAX_BYTES,
         *,
         disk_path: str | os.PathLike[str] | None = None,
         disk_max_bytes: int | None = None,
     ):
-        """Construct an in-memory cache, optionally backed by disk.
+        """Construct a disk-backed cache.
 
         Args:
-            max_bytes: Byte cap on the in-memory tier. ``0`` disables it.
-            disk_path: Directory for the disk tier. ``None`` (the default)
-                disables disk. A previously-populated directory is reused,
-                so subsequent sessions can read from existing entries.
-            disk_max_bytes: Byte cap on the disk tier. ``None`` falls back to
+            disk_path: Directory for the cache. ``None`` disables caching
+                entirely (every operation becomes a no-op). A previously-
+                populated directory is reused, so subsequent sessions can
+                read existing entries.
+            disk_max_bytes: Byte cap on disk usage. ``None`` falls back to
                 ``DEFAULT_DISK_MAX_BYTES``. Ignored when ``disk_path`` is
                 ``None``.
         """
-        if max_bytes < 0:
-            raise ValueError(f"data_cache_max_bytes must be >= 0, got {max_bytes}")
         self.name_id_map: dict[str, str] = {}
-        self._entries: OrderedDict[str, ChannelCacheEntry] = OrderedDict()
-        self._total_bytes: int = 0
-        self._max_bytes: int = max_bytes
-        # Channels we've already logged an "entry exceeds tier cap" warning
+        # Channels we've already logged an "entry exceeds disk cap" warning
         # for. The check on the put path would otherwise spam the log once
         # per ``get_data`` call for any channel whose typical entry is bigger
         # than the cap. A successful normal put for the same channel clears
         # the bit so a future regression re-warns.
-        self._oversized_memory_warned: set[str] = set()
         self._oversized_disk_warned: set[str] = set()
         self._disk: diskcache.Cache | None = None
         self._disk_path: str | None = None
@@ -171,64 +162,31 @@ def clear_disk(cls, path: str | os.PathLike[str] | None = None) -> None:
             )
         shutil.rmtree(target)
 
-    @property
-    def enabled(self) -> bool:
-        """Whether the in-memory tier accepts writes (``max_bytes > 0``)."""
-        return self._max_bytes > 0
-
-    @property
-    def max_bytes(self) -> int:
-        return self._max_bytes
-
-    @max_bytes.setter
-    def max_bytes(self, value: int) -> None:
-        """Reconfigure the in-memory byte cap and immediately evict any excess.
-
-        Used by ``ChannelsAPIAsync.configure_data_cache`` to retune a live
-        cache. Lowering the cap below ``total_bytes`` triggers LRU eviction
-        in the same loop ``put`` uses, so the invariant ``total_bytes <=
-        max_bytes`` is restored before the setter returns. Does not touch
-        the disk tier.
-        """
-        if value < 0:
-            raise ValueError(f"data_cache_max_bytes must be >= 0, got {value}")
-        self._max_bytes = value
-        self._evict_until_under_bound()
-
-    @property
-    def total_bytes(self) -> int:
-        return self._total_bytes
-
     @property
     def disk_enabled(self) -> bool:
-        """Whether the disk-backed second-chance tier is currently open."""
+        """Whether the disk-backed store is currently open."""
         return self._disk is not None
 
     @property
     def disk_path(self) -> str | None:
-        """Filesystem path of the disk tier when enabled, else ``None``."""
+        """Filesystem path of the cache when enabled, else ``None``."""
         return self._disk_path
 
     @property
     def disk_max_bytes(self) -> int | None:
-        """Configured byte cap on the disk tier, or ``None`` when disabled."""
+        """Configured byte cap on disk usage, or ``None`` when disabled."""
         return self._disk_max_bytes
 
-    def __len__(self) -> int:
-        return len(self._entries)
-
     def __contains__(self, channel_id: str) -> bool:
-        """True if the channel is cached in memory OR on disk.
+        """True if the channel is cached on disk.
 
         Used by ``_filter_cached_channels`` to decide whether ``get_data``
-        needs to hit the wire. Including the disk tier here lets a fresh
-        session served by a warm disk avoid re-fetching.
+        needs to hit the wire. A warm disk lets a fresh session avoid
+        re-fetching previously-served windows.
         """
-        if channel_id in self._entries:
-            return True
-        if self._disk is not None and channel_id in self._disk:
-            return True
-        return False
+        if self._disk is None:
+            return False
+        return channel_id in self._disk
 
     def enable_disk(
         self,
@@ -236,19 +194,19 @@ def enable_disk(
         path: str | os.PathLike[str] | None = None,
         max_bytes: int | None = None,
     ) -> None:
-        """Enable (or reconfigure) the disk-backed second-chance tier.
+        """Enable (or reconfigure) the disk-backed cache.
 
-        If a previous disk tier was open at a different path or with a
-        different size cap, it's closed first. Memory contents are left
-        intact; they are NOT replayed to disk so disk reflects only future
-        writes.
+        If a previous disk handle was open at a different path or with a
+        different size cap, it's closed first. Disk contents at the new
+        path are NOT recreated from anywhere — only future writes land in
+        the new location.
 
         Args:
             path: Directory to persist to. ``None`` uses
                 :attr:`DEFAULT_DISK_PATH`. The directory is created if
                 missing; an existing one is opened in place and its
                 contents become available to ``get``.
-            max_bytes: Byte cap for the disk tier (``None`` →
+            max_bytes: Byte cap on disk usage (``None`` →
                 :attr:`DEFAULT_DISK_MAX_BYTES`).
         """
         target_path = str(path) if path is not None else self.DEFAULT_DISK_PATH
@@ -263,24 +221,15 @@ def enable_disk(
         self._open_disk(target_path, target_max)
 
     def disable_disk(self) -> None:
-        """Close the disk tier (if open). Does not touch the disk contents.
+        """Close the disk handle (if open). Does not touch the disk contents.
 
-        Use ``sift_client.clear_data_cache_on_disk(path)`` to remove a
+        Use ``client.channels.clear_data_cache_on_disk(path)`` to remove a
         directory from disk.
         """
         self._close_disk()
 
     def get(self, channel_id: str) -> ChannelCacheEntry | None:
-        """Return the entry for ``channel_id`` if cached, otherwise None.
-
-        Memory is consulted first; on a miss, the disk tier (if enabled) is
-        checked. A disk hit is promoted back into memory (subject to the
-        in-memory cap) so subsequent accesses stay hot.
-        """
-        entry = self._entries.get(channel_id)
-        if entry is not None:
-            self._entries.move_to_end(channel_id)
-            return entry
+        """Return the entry for ``channel_id`` if cached, otherwise None."""
         if self._disk is None:
             return None
         try:
@@ -288,8 +237,8 @@ def get(self, channel_id: str) -> ChannelCacheEntry | None:
         except Exception:
             # diskcache surfaces ``sqlite3.DatabaseError`` (and friends) for
             # corrupt or partially-written entries from a prior session.
-            # Treat as a miss; force ``invalidate`` to drop the bad row so
-            # we don't repeatedly trip the same path.
+            # Treat as a miss; force-drop the bad row so we don't repeatedly
+            # trip the same path.
             logger.warning("disk cache read failed for %s; invalidating", channel_id)
             try:
                 del self._disk[channel_id]
@@ -298,61 +247,55 @@ def get(self, channel_id: str) -> ChannelCacheEntry | None:
             return None
         if disk_entry is None or not isinstance(disk_entry, ChannelCacheEntry):
             return None
-        if self.enabled:
-            # Promote disk hit into memory so subsequent reads are cheap.
-            self._put_memory(channel_id, disk_entry)
         return disk_entry
 
     def put(self, channel_id: str, entry: ChannelCacheEntry) -> None:
-        """Insert or replace ``channel_id`` in memory (if enabled) and on disk.
+        """Insert or replace ``channel_id`` on disk.
 
-        Memory reclaims any prior entry's byte count BEFORE adding the new
-        one's, so a re-insert (e.g. concat-merge of fresh data into an
-        existing entry) accounts for the size delta correctly. Disk writes
-        replace the prior row.
+        No-op when the disk tier is disabled. Entries larger than
+        ``disk_max_bytes`` are skipped (with a one-shot warning per
+        channel) instead of being inserted, since diskcache's eviction
+        loop would otherwise drain every other row trying — and failing —
+        to fit them.
         """
-        if self.enabled:
-            self._put_memory(channel_id, entry)
-        if self._disk is not None:
-            if self._disk_max_bytes is not None and entry.size_bytes > self._disk_max_bytes:
-                if channel_id not in self._oversized_disk_warned:
-                    logger.warning(
-                        "Channel %s data (%d bytes) is larger than the disk "
-                        "cache cap (%d bytes); skipping disk cache for this "
-                        "channel so other entries aren't evicted. Raise the "
-                        "cap via ``client.channels.enable_data_cache_disk("
-                        "max_bytes=...)`` to cache this channel on disk.",
-                        channel_id,
-                        entry.size_bytes,
-                        self._disk_max_bytes,
-                    )
-                    self._oversized_disk_warned.add(channel_id)
-                try:
-                    self._disk.delete(channel_id, retry=True)
-                except Exception:
-                    pass
-                return
+        if self._disk is None:
+            return
+        if self._disk_max_bytes is not None and entry.size_bytes > self._disk_max_bytes:
+            if channel_id not in self._oversized_disk_warned:
+                logger.warning(
+                    "Channel %s data (%d bytes) is larger than the disk "
+                    "cache cap (%d bytes); skipping disk cache for this "
+                    "channel so other entries aren't evicted. Raise the "
+                    "cap via ``client.channels.enable_data_cache_disk("
+                    "max_bytes=...)`` to cache this channel on disk.",
+                    channel_id,
+                    entry.size_bytes,
+                    self._disk_max_bytes,
+                )
+                self._oversized_disk_warned.add(channel_id)
             try:
-                self._disk.set(channel_id, entry, retry=True)
-                self._oversized_disk_warned.discard(channel_id)
+                self._disk.delete(channel_id, retry=True)
             except Exception:
-                # Best-effort persistence: keep going on disk errors so the
-                # in-memory cache (and the user's ``get_data`` call) still
-                # succeeds. Drop the (possibly partial) disk row.
-                logger.warning("disk cache write failed for %s; invalidating", channel_id)
-                try:
-                    self._disk.delete(channel_id, retry=True)
-                except Exception:
-                    pass
+                pass
+            return
+        try:
+            self._disk.set(channel_id, entry, retry=True)
+            self._oversized_disk_warned.discard(channel_id)
+        except Exception:
+            # Best-effort persistence: keep going on disk errors so the
+            # user's ``get_data`` call still succeeds. Drop the (possibly
+            # partial) disk row.
+            logger.warning("disk cache write failed for %s; invalidating", channel_id)
+            try:
+                self._disk.delete(channel_id, retry=True)
+            except Exception:
+                pass
 
     def invalidate(self, channel_id: str) -> None:
-        prior = self._entries.pop(channel_id, None)
-        if prior is not None:
-            self._total_bytes -= prior.size_bytes
+        """Remove ``channel_id`` from the cache. Safe to call when absent."""
         # Invalidation is a fresh start for this channel; if it was warned
         # about as oversized previously, the next put should re-evaluate
         # against the current cap and re-warn if still too big.
-        self._oversized_memory_warned.discard(channel_id)
         self._oversized_disk_warned.discard(channel_id)
         if self._disk is not None:
             try:
@@ -361,55 +304,19 @@ def invalidate(self, channel_id: str) -> None:
                 pass
 
     def clear(self) -> None:
-        self._entries.clear()
-        self._total_bytes = 0
-        self._oversized_memory_warned.clear()
+        """Wipe all entries from disk. The directory itself remains."""
         self._oversized_disk_warned.clear()
         if self._disk is not None:
             self._disk.clear()
 
     def close(self) -> None:
-        """Release the disk-tier file handle. Safe to call without disk enabled."""
+        """Release the disk file handle. Safe to call without disk enabled."""
         self._close_disk()
 
-    def _put_memory(self, channel_id: str, entry: ChannelCacheEntry) -> None:
-        """Memory-tier insert + eviction. Caller has already gated on ``enabled``."""
-        prior = self._entries.pop(channel_id, None)
-        if prior is not None:
-            self._total_bytes -= prior.size_bytes
-        if entry.size_bytes > self._max_bytes:
-            if channel_id not in self._oversized_memory_warned:
-                logger.warning(
-                    "Channel %s data (%d bytes) is larger than the in-memory "
-                    "cache cap (%d bytes); skipping cache for this channel so "
-                    "other entries aren't evicted. Raise the cap via "
-                    "``client.channels.configure_data_cache(max_bytes=...)`` "
-                    "to cache this channel.",
-                    channel_id,
-                    entry.size_bytes,
-                    self._max_bytes,
-                )
-                self._oversized_memory_warned.add(channel_id)
-            return
-        self._oversized_memory_warned.discard(channel_id)
-        self._entries[channel_id] = entry
-        self._total_bytes += entry.size_bytes
-        self._evict_until_under_bound()
-
-    def _evict_until_under_bound(self) -> None:
-        # ``popitem(last=False)`` drops the oldest entry. A single fresh entry
-        # whose ``size_bytes`` alone exceeds ``max_bytes`` ends up evicted on
-        # the final iteration.
-        while self._entries and self._total_bytes > self._max_bytes:
-            _, dropped = self._entries.popitem(last=False)
-            self._total_bytes -= dropped.size_bytes
-
     def _open_disk(self, path: str, max_bytes: int) -> None:
         import diskcache
 
         os.makedirs(path, exist_ok=True)
-        # ``least-recently-used`` matches the in-memory tier's eviction policy;
-        # statistics/tag_index are off because we only need plain k/v reads.
         self._disk = diskcache.Cache(
             directory=path,
             size_limit=max_bytes,
@@ -442,7 +349,6 @@ def __init__(
         self,
         grpc_client: GrpcClient,
         *,
-        data_cache_max_bytes: int = DEFAULT_DATA_CACHE_MAX_BYTES,
         disk_cache_path: str | os.PathLike[str] | None = None,
         disk_cache_max_bytes: int | None = None,
     ):
@@ -450,17 +356,14 @@ def __init__(
 
         Args:
             grpc_client: The gRPC client to use for making API calls.
-            data_cache_max_bytes: Cap on the in-memory channel-data cache (bytes).
-                Set to ``0`` to disable in-memory caching. See ``ChannelCache``.
-            disk_cache_path: Directory for the disk-backed second-chance tier.
-                ``None`` disables disk persistence. See ``ChannelCache``.
-            disk_cache_max_bytes: Byte cap for the disk tier. ``None`` uses
-                ``DEFAULT_DISK_CACHE_MAX_BYTES``. Ignored when
+            disk_cache_path: Directory for the disk-backed channel-data cache.
+                ``None`` disables caching entirely. See ``ChannelCache``.
+            disk_cache_max_bytes: Byte cap for disk usage. ``None`` uses
+                ``ChannelCache.DEFAULT_DISK_MAX_BYTES``. Ignored when
                 ``disk_cache_path`` is ``None``.
         """
         super().__init__(grpc_client)
         self.channel_cache = ChannelCache(
-            max_bytes=data_cache_max_bytes,
             disk_path=disk_cache_path,
             disk_max_bytes=disk_cache_max_bytes,
         )
diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
index d06307a71..4fc094440 100644
--- a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
+++ b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
@@ -1,11 +1,10 @@
 """Tests for :mod:`sift_client._internal.low_level_wrappers.data`.
 
-Five classes, narrowest scope first:
+Four classes, narrowest scope first:
 
-* :class:`TestChannelCache` — pure ``ChannelCache`` unit tests (byte
-  accounting, LRU promotion, eviction).
-* :class:`TestChannelCacheDisk` — disk-backed second-chance tier
-  (fresh open, cross-session reload, fall-through reads, disable).
+* :class:`TestChannelCache` — disk-backed :class:`ChannelCache` unit tests
+  (fresh open, cross-session reload, invalidate/clear, oversized guards,
+  disable/reconfigure).
 * :class:`TestChannelCacheClearDisk` — ``ChannelCache.clear_disk``
   classmethod (default path, custom path, safety guard).
 * :class:`TestMergePages` — ``DataLowLevelClient._merge_pages``, the
@@ -32,7 +31,6 @@
 import pytest
 
 from sift_client._internal.low_level_wrappers.data import (
-    DEFAULT_DATA_CACHE_MAX_BYTES,
     ChannelCache,
     ChannelCacheEntry,
     DataLowLevelClient,
@@ -98,9 +96,15 @@ def _channel(cid: str) -> Channel:
     )
 
 
-def _invariant_holds(cache: ChannelCache) -> bool:
-    """``total_bytes`` must equal the sum of per-entry sizes at all times."""
-    return cache.total_bytes == sum(e.size_bytes for e in cache._entries.values())
+def _client_with_cache(tmp_path, subdir: str = "cache") -> DataLowLevelClient:
+    """Build a ``DataLowLevelClient`` whose ``ChannelCache`` points at ``tmp_path``.
+
+    Tests that exercise cache behaviour (hits/misses/eviction) need an
+    actual disk-backed cache, so ``disk_cache_path`` must be supplied. A
+    plain ``DataLowLevelClient(MagicMock())`` defaults to no-cache mode
+    and would silently turn every cache test into a wire-path test.
+    """
+    return DataLowLevelClient(MagicMock(), disk_cache_path=tmp_path / subdir)
 
 
 @contextmanager
@@ -198,266 +202,55 @@ async def fake_impl(
 
 
 class TestChannelCache:
-    """Byte accounting, LRU promotion, eviction."""
-
-    def test_put_get_roundtrip_and_size_replacement(self) -> None:
-        """First put records size; second put on same key replaces it.
-
-        Without size reclamation on the second put, ``total_bytes`` would
-        double-count and trip the eviction loop on the next insert.
-        """
-        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
-        small, big = _entry(rows=10), _entry(rows=1000)
-        cache.put("c1", small)
-        assert cache.get("c1") is small
-        assert cache.total_bytes == small.size_bytes
-        cache.put("c1", big)
-        assert cache.get("c1") is big
-        assert cache.total_bytes == big.size_bytes  # not small + big
-        assert _invariant_holds(cache)
-
-    def test_invalidate(self) -> None:
-        """Removes a present entry and decrements bytes; no-op for missing keys."""
-        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
-        cache.invalidate("never_added")  # safe before any puts
-        assert cache.total_bytes == 0
-        cache.put("c1", _entry(rows=10))
-        cache.invalidate("c1")
-        assert cache.get("c1") is None
-        assert cache.total_bytes == 0
-        assert _invariant_holds(cache)
-
-    def test_clear(self) -> None:
-        cache = ChannelCache(max_bytes=DEFAULT_DATA_CACHE_MAX_BYTES)
-        cache.put("c1", _entry(rows=10))
-        cache.put("c2", _entry(rows=20))
-        cache.clear()
-        assert cache.total_bytes == 0
-        assert len(cache) == 0
-        assert _invariant_holds(cache)
-
-    def test_oldest_entry_evicted_first(self) -> None:
-        """Insertion order determines eviction when only puts have happened."""
-        a, b, c = _entry(rows=50), _entry(rows=50), _entry(rows=50)
-        cache = ChannelCache(max_bytes=a.size_bytes + b.size_bytes)  # room for two
-        cache.put("a", a)
-        cache.put("b", b)
-        cache.put("c", c)  # evicts "a"
-        assert "a" not in cache
-        assert "b" in cache
-        assert "c" in cache
-        assert cache.total_bytes <= a.size_bytes + b.size_bytes
-        assert _invariant_holds(cache)
-
-    def test_get_promotes_to_most_recent(self) -> None:
-        """Reading an entry must protect it from the next eviction."""
-        a, b, c = _entry(rows=50), _entry(rows=50), _entry(rows=50)
-        cache = ChannelCache(max_bytes=a.size_bytes + b.size_bytes)
-        cache.put("a", a)
-        cache.put("b", b)
-        assert cache.get("a") is a  # promote a
-        cache.put("c", c)  # b is now oldest, gets evicted
-        assert "a" in cache
-        assert "b" not in cache
-        assert "c" in cache
-        assert _invariant_holds(cache)
-
-    def test_oversized_entry_skips_cache_preserves_neighbours(self) -> None:
-        """A single entry larger than the cap is rejected without evicting peers.
-
-        Before this guard, ``_put_memory`` would insert the oversized entry,
-        then loop popping LRU until the cap was satisfied — but since no
-        amount of eviction makes an oversized entry fit, the loop drained
-        every other entry *and* the oversized one, wiping the cache on every
-        fetch of that channel. The fix: detect the oversized case up front,
-        warn, and skip the insert.
-        """
-        small_a, small_b, oversized = _entry(rows=10), _entry(rows=10), _entry(rows=10_000)
-        cache = ChannelCache(max_bytes=small_a.size_bytes + small_b.size_bytes)
-        cache.put("a", small_a)
-        cache.put("b", small_b)
-        with _capture_data_warnings() as records:
-            cache.put("huge", oversized)
-        assert "huge" not in cache
-        # Critical: the previously cached entries survive.
-        assert "a" in cache
-        assert "b" in cache
-        assert cache.total_bytes == small_a.size_bytes + small_b.size_bytes
-        assert _invariant_holds(cache)
-        # User gets a clear, actionable warning.
-        assert any("larger than the in-memory cache cap" in r.getMessage() for r in records)
-
-    def test_oversized_put_drops_prior_entry(self) -> None:
-        """An oversized re-insert must drop the prior slice, not silently keep it.
-
-        Otherwise a stale subrange would masquerade as a hit on the next
-        ``get`` even though the caller's intent was to refresh the entry.
-        """
-        small, oversized = _entry(rows=10), _entry(rows=10_000)
-        cache = ChannelCache(max_bytes=small.size_bytes)
-        cache.put("chan", small)
-        assert "chan" in cache
-        cache.put("chan", oversized)
-        assert "chan" not in cache
-        assert cache.total_bytes == 0
-        assert _invariant_holds(cache)
-
-    def test_oversized_put_warns_once_per_channel(self) -> None:
-        """Repeated oversized puts for the same channel log once, not on every call.
-
-        Without dedup, every ``get_data`` for an oversized channel would
-        write a fresh WARNING line — quickly drowning out other signal in
-        the logs.
-        """
-        oversized = _entry(rows=10_000)
-        cache = ChannelCache(max_bytes=oversized.size_bytes // 4)
-        with _capture_data_warnings() as records:
-            for _ in range(5):
-                cache.put("chan", oversized)
-        warnings = [r for r in records if "larger than the in-memory cache cap" in r.getMessage()]
-        assert len(warnings) == 1
-
-    def test_oversized_warning_resets_after_normal_put(self) -> None:
-        """A successful normal-sized put clears the dedup bit.
-
-        Used by callers who narrow a time window after seeing the warning:
-        the next oversized regression should re-warn rather than stay silent.
-        """
-        oversized = _entry(rows=10_000)
-        small = _entry(rows=10)
-        cache = ChannelCache(max_bytes=small.size_bytes * 2)
-        with _capture_data_warnings() as records:
-            cache.put("chan", oversized)  # 1st warning
-            cache.put("chan", small)  # resets state
-            cache.put("chan", oversized)  # 2nd warning
-        warnings = [r for r in records if "larger than the in-memory cache cap" in r.getMessage()]
-        assert len(warnings) == 2
-
-    def test_invalidate_resets_oversized_warning(self) -> None:
-        """``invalidate`` is a fresh start; the next oversized put re-warns."""
-        oversized = _entry(rows=10_000)
-        cache = ChannelCache(max_bytes=oversized.size_bytes // 4)
-        with _capture_data_warnings() as records:
-            cache.put("chan", oversized)
-            cache.invalidate("chan")
-            cache.put("chan", oversized)
-        warnings = [r for r in records if "larger than the in-memory cache cap" in r.getMessage()]
-        assert len(warnings) == 2
-
-    def test_clear_resets_oversized_warning(self) -> None:
-        """``clear`` resets all dedup state across channels."""
-        oversized = _entry(rows=10_000)
-        cache = ChannelCache(max_bytes=oversized.size_bytes // 4)
-        with _capture_data_warnings() as records:
-            cache.put("chan-a", oversized)
-            cache.put("chan-b", oversized)
-            cache.clear()
-            cache.put("chan-a", oversized)
-            cache.put("chan-b", oversized)
-        warnings = [r for r in records if "larger than the in-memory cache cap" in r.getMessage()]
-        assert len(warnings) == 4
-
-    def test_max_bytes_zero_disables_cache(self) -> None:
-        cache = ChannelCache(max_bytes=0)
-        cache.put("c1", _entry(rows=100))
-        assert not cache.enabled
-        assert cache.get("c1") is None
-        assert cache.total_bytes == 0
-        assert len(cache) == 0
-
-    def test_negative_max_bytes_raises(self) -> None:
-        with pytest.raises(ValueError, match="data_cache_max_bytes"):
-            ChannelCache(max_bytes=-1)
-
-    def test_set_max_bytes_lower_evicts_immediately(self) -> None:
-        """Lowering ``max_bytes`` below ``total_bytes`` evicts LRU until it fits.
-
-        Used by ``ChannelsAPIAsync.configure_data_cache`` to retune a live
-        cache without forcing the caller to call ``clear()`` first.
-        """
-        a, b, c = _entry(rows=50), _entry(rows=50), _entry(rows=50)
-        cache = ChannelCache(max_bytes=a.size_bytes + b.size_bytes + c.size_bytes)
-        cache.put("a", a)
-        cache.put("b", b)
-        cache.put("c", c)
-        # Lower the cap to fit only one entry; LRU "a" and "b" must drop.
-        cache.max_bytes = c.size_bytes
-        assert cache.max_bytes == c.size_bytes
-        assert "a" not in cache
-        assert "b" not in cache
-        assert "c" in cache
-        assert _invariant_holds(cache)
-
-    def test_set_max_bytes_negative_raises(self) -> None:
-        cache = ChannelCache(max_bytes=100)
-        with pytest.raises(ValueError, match="data_cache_max_bytes"):
-            cache.max_bytes = -1
-
-    def test_repeated_concat_updates_stay_under_bound(self) -> None:
-        """Simulates the customer's sliding-window pull: same channel, growing.
-
-        Without size reclamation on update, ``total_bytes`` would creep
-        above the cap silently. We re-build the entry each iteration to
-        mimic the ``_update_cache`` concat path.
-        """
-        cap = 1_000_000  # ~1 MB
-        cache = ChannelCache(max_bytes=cap)
-        accumulated = pd.DataFrame()
-        for i in range(50):
-            chunk = _frame(rows=1000, start=_NOW + timedelta(seconds=i), freq="us")
-            accumulated = pd.concat([accumulated, chunk])
-            cache.put(
-                "c1",
-                _new_cache_entry(
-                    data=accumulated,
-                    start_time=accumulated.index[0].to_pydatetime(),
-                    end_time=accumulated.index[-1].to_pydatetime(),
-                ),
-            )
-            assert cache.total_bytes <= cap, (
-                f"iteration {i}: total_bytes={cache.total_bytes} exceeded cap={cap}"
-            )
-            assert _invariant_holds(cache)
-
-
-class TestChannelCacheDisk:
-    """Disk-backed second-chance tier of :class:`ChannelCache`.
-
-    Three things must hold across these tests:
-
-    1. A fresh disk directory starts empty and accepts new writes.
-    2. Closing a populated cache and reopening at the same path surfaces
-       the previous entries on read (the "previous session" requirement).
-    3. The two tiers stay consistent across ``invalidate``/``clear`` and
-       ``disable_disk``, so the disk tier never becomes a stale shadow of
-       memory.
-
-    All tests confine writes to ``tmp_path`` so nothing leaks into the real
-    ``/tmp/sift-channel-data-cache``.
+    """Disk-backed :class:`ChannelCache` behaviour.
+
+    Five invariants must hold across these tests:
+
+    1. Constructing without a ``disk_path`` yields a no-op cache (every
+       operation is silent; ``__contains__`` returns ``False``).
+    2. A fresh disk directory starts empty and accepts new writes.
+    3. Closing a populated cache and reopening at the same path surfaces
+       the previous entries on read (the "previous session" requirement
+       that powers cold-start reuse).
+    4. Oversized entries are skipped with a deduped warning rather than
+       being inserted and triggering an eviction storm.
+    5. ``invalidate``/``clear`` reset the oversized-warning dedup state
+       so a future regression re-warns.
+
+    All tests confine writes to ``tmp_path`` so nothing leaks into the
+    real ``/tmp/sift-channel-data-cache``.
     """
 
-    def test_disabled_by_default(self) -> None:
-        """No ``disk_path`` → disk tier stays off and untouched."""
-        cache = ChannelCache(max_bytes=10_000_000)
+    def test_disabled_when_no_path(self) -> None:
+        """``ChannelCache()`` with no ``disk_path`` is a silent no-op."""
+        cache = ChannelCache()
         assert cache.disk_enabled is False
         assert cache.disk_path is None
         assert cache.disk_max_bytes is None
+        # Operations don't raise; the cache just stays empty.
+        cache.put("chan-1", _entry(rows=4))
+        assert "chan-1" not in cache
+        assert cache.get("chan-1") is None
+        cache.invalidate("chan-1")
+        cache.clear()
+        cache.close()
 
     def test_fresh_cache_writes_and_reads(self, tmp_path) -> None:
         """A fresh disk directory accepts writes and serves them back."""
         path = tmp_path / "fresh"
-        cache = ChannelCache(max_bytes=10_000_000, disk_path=path)
+        cache = ChannelCache(disk_path=path)
         try:
             assert cache.disk_enabled
             assert cache.disk_path == str(path)
             assert cache.disk_max_bytes == ChannelCache.DEFAULT_DISK_MAX_BYTES
             entry = _entry(rows=8)
             cache.put("chan-1", entry)
-            # Same instance: memory hit takes precedence; disk is just a copy.
             assert "chan-1" in cache
             got = cache.get("chan-1")
             assert got is not None
             pd.testing.assert_frame_equal(got.data, entry.data)
+            assert got.start_time == entry.start_time
+            assert got.end_time == entry.end_time
         finally:
             cache.close()
 
@@ -465,8 +258,8 @@ def test_reopen_existing_dir_sees_prior_session_entries(self, tmp_path) -> None:
         """Closing then reopening at the same path makes prior entries hit.
 
         This is the "look for existing caches from previous sessions"
-        guarantee: a new ``ChannelCache`` with an empty in-memory tier
-        finds entries on disk and promotes them into memory on first read.
+        guarantee: a new ``ChannelCache`` at a populated directory finds
+        entries on disk and returns them on the next read.
         """
         path = tmp_path / "prev-session"
         df = _frame("chan-1", rows=12, freq="s")
@@ -476,101 +269,122 @@ def test_reopen_existing_dir_sees_prior_session_entries(self, tmp_path) -> None:
             end_time=df.index[-1].to_pydatetime(),
         )
         # Session 1: populate and close.
-        session1 = ChannelCache(max_bytes=10_000_000, disk_path=path)
+        session1 = ChannelCache(disk_path=path)
         session1.put("chan-1", original_entry)
         session1.close()
 
-        # Session 2: fresh process simulated by a brand-new ChannelCache.
-        # Memory starts empty, but ``__contains__`` reports the entry from
-        # disk and ``get`` returns it with bytes intact.
-        session2 = ChannelCache(max_bytes=10_000_000, disk_path=path)
+        # Session 2: fresh process simulated by a brand-new ChannelCache
+        # at the same directory.
+        session2 = ChannelCache(disk_path=path)
         try:
-            assert len(session2) == 0  # in-memory tier starts cold
-            assert "chan-1" in session2  # disk-backed contains
+            assert "chan-1" in session2
             got = session2.get("chan-1")
             assert got is not None
             pd.testing.assert_frame_equal(got.data, original_entry.data)
             assert got.start_time == original_entry.start_time
             assert got.end_time == original_entry.end_time
-            # After the disk hit, the entry is now promoted into memory.
-            assert len(session2) == 1
         finally:
             session2.close()
 
-    def test_disk_hit_promotes_into_memory(self, tmp_path) -> None:
-        """A disk-only entry becomes a memory entry after one ``get``."""
-        cache = ChannelCache(max_bytes=10_000_000, disk_path=tmp_path / "promote")
+    def test_repeated_put_overwrites(self, tmp_path) -> None:
+        """A second ``put`` on the same key replaces the prior entry."""
+        cache = ChannelCache(disk_path=tmp_path / "overwrite")
         try:
-            cache.put("chan-1", _entry(rows=4))
-            # Drop from memory only (simulate eviction).
-            del cache._entries["chan-1"]
-            cache._total_bytes = 0
-            assert "chan-1" in cache  # still on disk
-            assert cache.get("chan-1") is not None
-            assert "chan-1" in cache._entries  # promoted back into memory
+            small = _entry(rows=10)
+            bigger = _entry(rows=100)
+            cache.put("chan", small)
+            cache.put("chan", bigger)
+            got = cache.get("chan")
+            assert got is not None
+            pd.testing.assert_frame_equal(got.data, bigger.data)
         finally:
             cache.close()
 
-    def test_disk_only_when_memory_disabled(self, tmp_path) -> None:
-        """``max_bytes=0`` (no memory) still routes writes/reads through disk.
-
-        Cold-storage configuration: caller wants persistence without
-        paying the in-memory footprint.
-        """
-        cache = ChannelCache(max_bytes=0, disk_path=tmp_path / "disk-only")
+    def test_invalidate_removes_entry(self, tmp_path) -> None:
+        """``invalidate`` drops the entry; safe to call when absent."""
+        cache = ChannelCache(disk_path=tmp_path / "inval")
         try:
-            assert not cache.enabled
-            assert cache.disk_enabled
+            cache.invalidate("never_added")  # safe before any puts
             cache.put("chan-1", _entry(rows=4))
-            assert "chan-1" not in cache._entries  # never landed in memory
-            got = cache.get("chan-1")
-            assert got is not None
-            assert "chan-1" not in cache._entries  # memory still bypassed
+            cache.invalidate("chan-1")
+            assert "chan-1" not in cache
+            assert cache.get("chan-1") is None
         finally:
             cache.close()
 
-    def test_invalidate_clears_both_tiers(self, tmp_path) -> None:
-        cache = ChannelCache(max_bytes=10_000_000, disk_path=tmp_path / "inval")
+    def test_clear_wipes_disk(self, tmp_path) -> None:
+        cache = ChannelCache(disk_path=tmp_path / "clear")
         try:
             cache.put("chan-1", _entry(rows=4))
-            cache.invalidate("chan-1")
-            assert "chan-1" not in cache._entries
-            assert "chan-1" not in cache  # contains() must check disk too
+            cache.put("chan-2", _entry(rows=4))
+            cache.clear()
+            assert "chan-1" not in cache
+            assert "chan-2" not in cache
         finally:
             cache.close()
 
-    def test_clear_wipes_both_tiers(self, tmp_path) -> None:
-        cache = ChannelCache(max_bytes=10_000_000, disk_path=tmp_path / "clear")
+    def test_disable_disk_closes_handle(self, tmp_path) -> None:
+        """Turning off disk closes the handle and silences subsequent ops."""
+        cache = ChannelCache(disk_path=tmp_path / "disable")
         try:
             cache.put("chan-1", _entry(rows=4))
+            cache.disable_disk()
+            assert not cache.disk_enabled
+            assert cache.disk_path is None
+            assert "chan-1" not in cache  # no handle → no hits
+            assert cache.get("chan-1") is None
+            # Subsequent puts are silently dropped.
             cache.put("chan-2", _entry(rows=4))
-            cache.clear()
-            assert len(cache) == 0
-            assert "chan-1" not in cache
             assert "chan-2" not in cache
         finally:
             cache.close()
 
-    def test_oversized_entry_skips_disk_preserves_other_entries(self, tmp_path) -> None:
-        """An entry larger than the disk cap is skipped on disk too.
+    def test_enable_disk_reconfigures_path(self, tmp_path) -> None:
+        """Reconfiguring to a different path closes the old handle.
+
+        The new directory starts empty: ``chan-1`` lived in the old
+        directory's diskcache, so the lookup at the new path misses.
+        """
+        cache = ChannelCache(disk_path=tmp_path / "a")
+        try:
+            cache.put("chan-1", _entry(rows=4))
+            cache.enable_disk(path=tmp_path / "b")
+            assert cache.disk_path == str(tmp_path / "b")
+            assert "chan-1" not in cache  # fresh directory
+        finally:
+            cache.close()
+
+    def test_enable_disk_noop_when_same_settings(self, tmp_path) -> None:
+        """Re-enabling with identical settings doesn't churn the disk handle."""
+        cache = ChannelCache(disk_path=tmp_path / "noop")
+        try:
+            handle_before = cache._disk
+            cache.enable_disk(path=tmp_path / "noop", max_bytes=ChannelCache.DEFAULT_DISK_MAX_BYTES)
+            assert cache._disk is handle_before
+        finally:
+            cache.close()
+
+    def test_oversized_entry_skips_cache_preserves_neighbours(self, tmp_path) -> None:
+        """An entry larger than the cap is skipped without evicting peers.
 
-        Without the guard, ``diskcache``'s cull() would evict every other
-        on-disk row trying to fit an unfittable entry, then drop the entry
-        itself — the same wipe-everything failure mode as the memory tier.
+        Without this guard, ``diskcache``'s cull would evict every other
+        row trying to fit an unfittable entry, then drop the entry itself
+        — the wipe-everything failure mode the bounded-cache work
+        originally fixed. The disk-tier guard mirrors that fix.
 
         Memory is sized to accept small entries but reject the oversized one
-        too, so memory-tier writes don't compete with disk-tier writes. We
+        so memory-tier writes don't compete with disk-tier writes. We
         assert on the disk ``_disk`` mapping directly because that's where
         the contested behavior lives.
+
+        ``disk_max_bytes`` has to leave room for ``diskcache``'s pickle
+        envelope around each small entry (a few KB) AND be small enough
+        that the oversized entry trips the guard. Half the oversized
+        DataFrame's raw byte size hits both constraints comfortably.
         """
         small = _entry(rows=4)
         oversized = _entry(rows=10_000)
-        # ``disk_max_bytes`` has to leave room for ``diskcache``'s pickle
-        # envelope around each small entry (a few KB) AND be small enough
-        # that the oversized entry trips the guard. Half the oversized
-        # DataFrame's raw byte size hits both constraints comfortably.
         cache = ChannelCache(
-            max_bytes=oversized.size_bytes * 2,
             disk_path=tmp_path / "disk-oversize",
             disk_max_bytes=oversized.size_bytes // 2,
         )
@@ -580,54 +394,110 @@ def test_oversized_entry_skips_disk_preserves_other_entries(self, tmp_path) -> N
             assert cache._disk is not None
             with _capture_data_warnings() as records:
                 cache.put("huge", oversized)
-            # Disk-side prior entries survive; oversized one was not written.
-            assert "small-1" in cache._disk
-            assert "small-2" in cache._disk
-            assert "huge" not in cache._disk
+            # Prior entries survive; oversized one was not written.
+            assert "small-1" in cache
+            assert "small-2" in cache
+            assert "huge" not in cache
             assert any("larger than the disk cache cap" in r.getMessage() for r in records)
         finally:
             cache.close()
 
-    def test_disable_disk_preserves_memory(self, tmp_path) -> None:
-        """Turning off disk closes the handle but keeps memory intact."""
-        cache = ChannelCache(max_bytes=10_000_000, disk_path=tmp_path / "disable")
+    def test_oversized_put_drops_prior_entry(self, tmp_path) -> None:
+        """An oversized re-insert must drop the prior slice, not silently keep it.
+
+        Otherwise a stale subrange would masquerade as a hit on the next
+        ``get`` even though the caller's intent was to refresh the entry.
+        """
+        small = _entry(rows=4)
+        oversized = _entry(rows=10_000)
+        cache = ChannelCache(
+            disk_path=tmp_path / "drop-prior",
+            disk_max_bytes=oversized.size_bytes // 2,
+        )
         try:
-            cache.put("chan-1", _entry(rows=4))
-            cache.disable_disk()
-            assert not cache.disk_enabled
-            assert cache.disk_path is None
-            # Memory entry survives the disk-tier teardown.
-            assert "chan-1" in cache
-            assert cache.get("chan-1") is not None
+            cache.put("chan", small)
+            assert "chan" in cache
+            cache.put("chan", oversized)
+            assert "chan" not in cache
         finally:
             cache.close()
 
-    def test_enable_disk_reconfigures_path(self, tmp_path) -> None:
-        """Reconfiguring to a different path closes the old handle."""
-        cache = ChannelCache(max_bytes=10_000_000, disk_path=tmp_path / "a")
+    def test_oversized_put_warns_once_per_channel(self, tmp_path) -> None:
+        """Repeated oversized puts for the same channel log once, not on every call.
+
+        Without dedup, every ``get_data`` for an oversized channel would
+        write a fresh WARNING line — quickly drowning out other signal in
+        the logs.
+        """
+        oversized = _entry(rows=10_000)
+        cache = ChannelCache(
+            disk_path=tmp_path / "dedup",
+            disk_max_bytes=oversized.size_bytes // 2,
+        )
         try:
-            cache.put("chan-1", _entry(rows=4))
-            cache.enable_disk(path=tmp_path / "b")
-            assert cache.disk_path == str(tmp_path / "b")
-            # The new disk dir is fresh: nothing on disk yet under the new path.
-            # ``chan-1`` is still in memory, so __contains__ is still True.
-            assert "chan-1" in cache
-            # But the new disk dir is empty; drop from memory and the
-            # contains check now relies on disk, which won't find it.
-            del cache._entries["chan-1"]
-            cache._total_bytes = 0
-            assert "chan-1" not in cache
+            with _capture_data_warnings() as records:
+                for _ in range(5):
+                    cache.put("chan", oversized)
+            warnings = [r for r in records if "larger than the disk cache cap" in r.getMessage()]
+            assert len(warnings) == 1
         finally:
             cache.close()
 
-    def test_enable_disk_noop_when_same_settings(self, tmp_path) -> None:
-        """Re-enabling with identical settings doesn't churn the disk handle."""
-        cache = ChannelCache(max_bytes=10_000_000, disk_path=tmp_path / "noop")
+    def test_oversized_warning_resets_after_normal_put(self, tmp_path) -> None:
+        """A successful normal-sized put clears the dedup bit.
+
+        Used by callers who narrow a time window after seeing the warning:
+        the next oversized regression should re-warn rather than stay silent.
+        """
+        small = _entry(rows=4)
+        oversized = _entry(rows=10_000)
+        cache = ChannelCache(
+            disk_path=tmp_path / "reset-after-normal",
+            disk_max_bytes=oversized.size_bytes // 2,
+        )
         try:
-            handle_before = cache._disk
-            cache.enable_disk(path=tmp_path / "noop", max_bytes=ChannelCache.DEFAULT_DISK_MAX_BYTES)
-            # Same handle, no reopen.
-            assert cache._disk is handle_before
+            with _capture_data_warnings() as records:
+                cache.put("chan", oversized)  # 1st warning
+                cache.put("chan", small)  # resets state
+                cache.put("chan", oversized)  # 2nd warning
+            warnings = [r for r in records if "larger than the disk cache cap" in r.getMessage()]
+            assert len(warnings) == 2
+        finally:
+            cache.close()
+
+    def test_invalidate_resets_oversized_warning(self, tmp_path) -> None:
+        """``invalidate`` is a fresh start; the next oversized put re-warns."""
+        oversized = _entry(rows=10_000)
+        cache = ChannelCache(
+            disk_path=tmp_path / "reset-invalidate",
+            disk_max_bytes=oversized.size_bytes // 2,
+        )
+        try:
+            with _capture_data_warnings() as records:
+                cache.put("chan", oversized)
+                cache.invalidate("chan")
+                cache.put("chan", oversized)
+            warnings = [r for r in records if "larger than the disk cache cap" in r.getMessage()]
+            assert len(warnings) == 2
+        finally:
+            cache.close()
+
+    def test_clear_resets_oversized_warning(self, tmp_path) -> None:
+        """``clear`` resets dedup state across channels."""
+        oversized = _entry(rows=10_000)
+        cache = ChannelCache(
+            disk_path=tmp_path / "reset-clear",
+            disk_max_bytes=oversized.size_bytes // 2,
+        )
+        try:
+            with _capture_data_warnings() as records:
+                cache.put("chan-a", oversized)
+                cache.put("chan-b", oversized)
+                cache.clear()
+                cache.put("chan-a", oversized)
+                cache.put("chan-b", oversized)
+            warnings = [r for r in records if "larger than the disk cache cap" in r.getMessage()]
+            assert len(warnings) == 4
         finally:
             cache.close()
 
@@ -642,7 +512,7 @@ class TestChannelCacheClearDisk:
 
     def test_clear_removes_directory(self, tmp_path) -> None:
         path = tmp_path / "victim"
-        cache = ChannelCache(max_bytes=10_000_000, disk_path=path)
+        cache = ChannelCache(disk_path=path)
         cache.put("chan-1", _entry(rows=4))
         cache.close()
         assert path.exists()
@@ -659,7 +529,6 @@ def test_clear_refuses_non_diskcache_directory(self, tmp_path) -> None:
         (target / "important.txt").write_text("don't delete me")
         with pytest.raises(ValueError, match="does not look like a sift channel data cache"):
             ChannelCache.clear_disk(target)
-        # Unrelated contents preserved.
         assert (target / "important.txt").read_text() == "don't delete me"
 
     def test_default_path_constant_under_tmp(self) -> None:
@@ -779,31 +648,49 @@ class TestDataLowLevelClient:
     :class:`TestGetChannelData`.
     """
 
-    def test_per_instance_isolation(self) -> None:
-        """Two clients must not share cache state.
+    def test_no_cache_when_disk_path_omitted(self) -> None:
+        """Default construction leaves the cache in no-op mode.
 
-        Regression test for the original OOM bug: ``channel_cache`` was a
-        class attribute, so every ``SiftClient`` in the process appended to
-        the same dict. Two fresh clients must have independent caches.
+        The ``ChannelsAPIAsync`` resource is the public surface for
+        opting into disk persistence; the bare ``DataLowLevelClient``
+        keeps caching off so unit tests don't accidentally write to
+        ``/tmp`` just by instantiating the wrapper.
         """
-        client_a = DataLowLevelClient(MagicMock())
-        client_b = DataLowLevelClient(MagicMock())
-        client_a.channel_cache.put("c1", _entry(rows=10))
-        assert "c1" in client_a.channel_cache
-        assert "c1" not in client_b.channel_cache
-        assert client_b.channel_cache.total_bytes == 0
-
-    def test_data_cache_max_bytes_kwarg_propagates(self) -> None:
-        """``data_cache_max_bytes`` is forwarded to the underlying cache.
-
-        The disabled-cache *behaviour* itself is covered by
-        :meth:`TestChannelCache.test_max_bytes_zero_disables_cache`; this
-        test just verifies the constructor passes the kwarg through.
+        client = DataLowLevelClient(MagicMock())
+        assert not client.channel_cache.disk_enabled
+
+    def test_per_instance_isolation(self, tmp_path) -> None:
+        """Two clients with separate disk paths must not share cache state.
+
+        Regression test for the original OOM bug: ``channel_cache`` was a
+        class attribute, so every ``SiftClient`` in the process appended
+        to the same dict. Two fresh clients with distinct directories must
+        have independent caches.
         """
-        assert DataLowLevelClient(MagicMock(), data_cache_max_bytes=0).channel_cache.max_bytes == 0
-        assert (
-            DataLowLevelClient(MagicMock(), data_cache_max_bytes=42).channel_cache.max_bytes == 42
+        client_a = _client_with_cache(tmp_path, "a")
+        client_b = _client_with_cache(tmp_path, "b")
+        try:
+            client_a.channel_cache.put("c1", _entry(rows=10))
+            assert "c1" in client_a.channel_cache
+            assert "c1" not in client_b.channel_cache
+        finally:
+            client_a.channel_cache.close()
+            client_b.channel_cache.close()
+
+    def test_disk_cache_kwargs_propagate(self, tmp_path) -> None:
+        """Constructor kwargs land on the underlying ``ChannelCache``."""
+        path = tmp_path / "kwargs"
+        client = DataLowLevelClient(
+            MagicMock(),
+            disk_cache_path=path,
+            disk_cache_max_bytes=8_192,
         )
+        try:
+            assert client.channel_cache.disk_enabled
+            assert client.channel_cache.disk_path == str(path)
+            assert client.channel_cache.disk_max_bytes == 8_192
+        finally:
+            client.channel_cache.close()
 
 
 class TestGetChannelData:
@@ -850,81 +737,89 @@ async def test_multi_page_response_concatenated_per_channel(self) -> None:
         pd.testing.assert_frame_equal(result["c1"].sort_index(), expected.sort_index())
 
     @pytest.mark.asyncio
-    async def test_cache_hit_short_circuits_grpc(self) -> None:
+    async def test_cache_hit_short_circuits_grpc(self, tmp_path) -> None:
         """Second request for the same channel + window skips ``_get_data_impl``.
 
         Stages two pages-worth of data so a faulty cache that falls through
         wouldn't silently pass by hitting EOF — any second-call invocation
         would consume the second page and bump ``len(call_log)``.
         """
-        client = DataLowLevelClient(MagicMock())
+        client = _client_with_cache(tmp_path)
         df = _frame("c1")
-        with _fake_grpc(client, {"c1": [df, df]}) as call_log:
-            first = await client.get_channel_data(
-                channels=[_channel("c1")],
-                start_time=_NOW,
-                end_time=_WINDOW_END,
-            )
-            calls_after_first = len(call_log)
-            assert calls_after_first >= 1
-
-            second = await client.get_channel_data(
-                channels=[_channel("c1")],
-                start_time=_NOW,
-                end_time=_WINDOW_END,
-            )
-            assert len(call_log) == calls_after_first, (
-                "second call should be served from cache without invoking _get_data_impl"
-            )
-        pd.testing.assert_frame_equal(first["c1"].sort_index(), second["c1"].sort_index())
+        try:
+            with _fake_grpc(client, {"c1": [df, df]}) as call_log:
+                first = await client.get_channel_data(
+                    channels=[_channel("c1")],
+                    start_time=_NOW,
+                    end_time=_WINDOW_END,
+                )
+                calls_after_first = len(call_log)
+                assert calls_after_first >= 1
+
+                second = await client.get_channel_data(
+                    channels=[_channel("c1")],
+                    start_time=_NOW,
+                    end_time=_WINDOW_END,
+                )
+                assert len(call_log) == calls_after_first, (
+                    "second call should be served from cache without invoking _get_data_impl"
+                )
+            pd.testing.assert_frame_equal(first["c1"].sort_index(), second["c1"].sort_index())
+        finally:
+            client.channel_cache.close()
 
     @pytest.mark.asyncio
-    async def test_partial_cache_hit_merges_cached_and_fresh(self) -> None:
+    async def test_partial_cache_hit_merges_cached_and_fresh(self, tmp_path) -> None:
         """Cached + uncached channels resolved together in one return dict.
 
         Only the uncached channel triggers ``_get_data_impl``.
         """
-        client = DataLowLevelClient(MagicMock())
+        client = _client_with_cache(tmp_path)
         c1_df, c2_df = _frame("c1"), _frame("c2", offset=100)
-        with _fake_grpc(client, {"c1": [c1_df], "c2": [c2_df]}) as call_log:
-            await client.get_channel_data(
-                channels=[_channel("c1")],
-                start_time=_NOW,
-                end_time=_WINDOW_END,
-            )
-            calls_after_warmup = len(call_log)
-
-            result = await client.get_channel_data(
-                channels=[_channel("c1"), _channel("c2")],
-                start_time=_NOW,
-                end_time=_WINDOW_END,
-            )
-            new_calls = call_log[calls_after_warmup:]
-
-        assert new_calls, "c2 should hit the wire on the second call"
-        for call in new_calls:
-            assert call["channel_ids"] == ["c2"], f"only c2 should hit the wire, saw {call!r}"
-        assert set(result.keys()) == {"c1", "c2"}
-        pd.testing.assert_frame_equal(result["c1"].sort_index(), c1_df.sort_index())
-        pd.testing.assert_frame_equal(result["c2"].sort_index(), c2_df.sort_index())
+        try:
+            with _fake_grpc(client, {"c1": [c1_df], "c2": [c2_df]}) as call_log:
+                await client.get_channel_data(
+                    channels=[_channel("c1")],
+                    start_time=_NOW,
+                    end_time=_WINDOW_END,
+                )
+                calls_after_warmup = len(call_log)
+
+                result = await client.get_channel_data(
+                    channels=[_channel("c1"), _channel("c2")],
+                    start_time=_NOW,
+                    end_time=_WINDOW_END,
+                )
+                new_calls = call_log[calls_after_warmup:]
+
+            assert new_calls, "c2 should hit the wire on the second call"
+            for call in new_calls:
+                assert call["channel_ids"] == ["c2"], f"only c2 should hit the wire, saw {call!r}"
+            assert set(result.keys()) == {"c1", "c2"}
+            pd.testing.assert_frame_equal(result["c1"].sort_index(), c1_df.sort_index())
+            pd.testing.assert_frame_equal(result["c2"].sort_index(), c2_df.sort_index())
+        finally:
+            client.channel_cache.close()
 
     @pytest.mark.asyncio
-    async def test_ignore_cache_true_returns_fresh_and_skips_write(self) -> None:
+    async def test_ignore_cache_true_returns_fresh_and_skips_write(self, tmp_path) -> None:
         """``ignore_cache=True`` returns mock data and leaves the cache empty.
 
         End-to-end version of the latent bug that compounded the customer's
         OOM: pre-fix, ``_update_cache`` ran even when the caller had asked
         the cache to be ignored.
         """
-        client = DataLowLevelClient(MagicMock())
+        client = _client_with_cache(tmp_path)
         df = _frame("c1")
-        with _fake_grpc(client, {"c1": [df]}):
-            result = await client.get_channel_data(
-                channels=[_channel("c1")],
-                start_time=_NOW,
-                end_time=_WINDOW_END,
-                ignore_cache=True,
-            )
-        pd.testing.assert_frame_equal(result["c1"], df)
-        assert "c1" not in client.channel_cache
-        assert client.channel_cache.total_bytes == 0
+        try:
+            with _fake_grpc(client, {"c1": [df]}):
+                result = await client.get_channel_data(
+                    channels=[_channel("c1")],
+                    start_time=_NOW,
+                    end_time=_WINDOW_END,
+                    ignore_cache=True,
+                )
+            pd.testing.assert_frame_equal(result["c1"], df)
+            assert "c1" not in client.channel_cache
+        finally:
+            client.channel_cache.close()
diff --git a/python/lib/sift_client/_tests/resources/test_channels.py b/python/lib/sift_client/_tests/resources/test_channels.py
index 3ed3826b1..ceee9ddef 100644
--- a/python/lib/sift_client/_tests/resources/test_channels.py
+++ b/python/lib/sift_client/_tests/resources/test_channels.py
@@ -503,69 +503,18 @@ async def fake_update_channel(update):
         assert captured["update"].unit == ""
 
 
-class TestConfigureDataCache:
-    """``configure_data_cache`` is the resource-level knob for the in-memory
-    channel data cache. Before the cache is initialized, it stashes the value
-    for the lazy-init path; after, it retunes the live cache.
-
-    Each test that triggers ``_ensure_data_low_level_client`` opens the
-    opt-out disk tier (redirected to ``tmp_path`` by the conftest fixture)
-    and closes the handle in ``finally`` so the diskcache lock doesn't leak
-    into the next test.
-    """
-
-    def test_before_lazy_init_propagates_to_cache(self):
-        """Configuring before the first ``get_data`` lands on the cache at init."""
-        api = _make_api()
-        api.configure_data_cache(max_bytes=123)
-        assert api._data_low_level_client is None  # still lazy
-        api._ensure_data_low_level_client()
-        try:
-            assert api._data_low_level_client.channel_cache.max_bytes == 123
-        finally:
-            api._data_low_level_client.channel_cache.close()
-
-    def test_after_lazy_init_updates_live_cache(self):
-        """Configuring after first use retunes the live cache in place."""
-        api = _make_api()
-        api._ensure_data_low_level_client()
-        try:
-            original_client = api._data_low_level_client
-            api.configure_data_cache(max_bytes=456)
-            # Same wrapper instance — we mutated, not replaced.
-            assert api._data_low_level_client is original_client
-            assert api._data_low_level_client.channel_cache.max_bytes == 456
-        finally:
-            api._data_low_level_client.channel_cache.close()
-
-    def test_zero_disables_cache_via_resource(self):
-        """Resource-level ``max_bytes=0`` end-to-end disables the cache."""
-        api = _make_api()
-        api.configure_data_cache(max_bytes=0)
-        api._ensure_data_low_level_client()
-        try:
-            assert not api._data_low_level_client.channel_cache.enabled
-        finally:
-            api._data_low_level_client.channel_cache.close()
-
-    def test_negative_raises(self):
-        api = _make_api()
-        with pytest.raises(ValueError, match="max_bytes"):
-            api.configure_data_cache(max_bytes=-1)
-
-
 class TestEnableDataCacheDisk:
     """``enable_data_cache_disk`` / ``disable_data_cache_disk`` plumb the disk
-    tier setting to the underlying ``ChannelCache``, both pre- and post-init.
+    cache settings to the underlying ``ChannelCache``, both pre- and post-init.
 
-    The disk tier itself is exercised directly in
-    ``test_data.py::TestChannelCacheDisk``; the tests here just verify the
+    The cache itself is exercised directly in
+    ``test_data.py::TestChannelCache``; the tests here just verify the
     resource-level wiring around it.
     """
 
     def test_enabled_by_default(self):
-        """Disk persistence is opt-out: the default-constructed resource
-        lands at ``ChannelCache.DEFAULT_DISK_PATH`` on first ``get_data``.
+        """Disk caching is opt-out: the default-constructed resource lands
+        at ``ChannelCache.DEFAULT_DISK_PATH`` on first ``get_data``.
 
         The autouse ``_isolate_default_disk_cache_path`` fixture in
         ``conftest.py`` redirects the constant to a per-test tmp dir so this
@@ -652,7 +601,7 @@ def test_clear_data_cache_on_disk_proxies_to_cache(self, tmp_path):
 
         path = tmp_path / "to-clear"
         # Populate a real disk-cache directory so the marker check passes.
-        cache = ChannelCache(max_bytes=10_000_000, disk_path=path)
+        cache = ChannelCache(disk_path=path)
         cache.close()
         assert path.exists()
 
@@ -660,9 +609,9 @@ def test_clear_data_cache_on_disk_proxies_to_cache(self, tmp_path):
         api.clear_data_cache_on_disk(path)
         assert not path.exists()
 
-    def test_default_path_failure_falls_back_to_memory(self, monkeypatch, tmp_path):
-        """If the opt-out default disk path can't be opened, the wrapper logs
-        a warning and continues with the in-memory cache only.
+    def test_default_path_failure_falls_back_to_no_cache(self, monkeypatch, tmp_path):
+        """If the opt-out default cache path can't be opened, the wrapper
+        logs a warning and continues with caching disabled.
 
         Simulated by pointing ``DEFAULT_DISK_PATH`` at a path that already
         exists as a regular file — ``os.makedirs(..., exist_ok=True)`` raises
@@ -678,9 +627,8 @@ def test_default_path_failure_falls_back_to_memory(self, monkeypatch, tmp_path):
         api._ensure_data_low_level_client()  # must not raise
         cache = api._data_low_level_client.channel_cache
         try:
-            # Disk silently dropped, memory still working.
+            # Cache silently dropped; ``get_data`` will go straight to the wire.
             assert not cache.disk_enabled
-            assert cache.enabled
         finally:
             cache.close()
 
diff --git a/python/lib/sift_client/resources/channels.py b/python/lib/sift_client/resources/channels.py
index 794930fda..6be88f84e 100644
--- a/python/lib/sift_client/resources/channels.py
+++ b/python/lib/sift_client/resources/channels.py
@@ -69,34 +69,8 @@ def __init__(self, sift_client: SiftClient):
         self._low_level_client = ChannelsLowLevelClient(grpc_client=self.client.grpc_client)
         self._units_low_level_client = UnitsLowLevelClient(grpc_client=self.client.grpc_client)
         self._data_low_level_client = None
-        # Caller-supplied cache size; ``None`` means "use the wrapper default
-        # at lazy-init time" so we don't have to import ``data.py`` (and
-        # therefore pandas) just to remember the default.
-        self._data_cache_max_bytes: int | None = None
         self._disk_cache_config = DiskCacheConfig(enabled=True)
 
-    def configure_data_cache(self, *, max_bytes: int) -> None:
-        """Configure the in-memory channel data cache used by ``get_data``.
-
-        Args:
-            max_bytes: Byte cap on the cache. ``0`` disables caching
-                (every ``get_data`` call goes to the wire). Defaults to
-                512 MiB until explicitly configured. Must be ``>= 0``.
-
-        Safe to call before or after the first ``get_data``. If the cache is
-        already live, the new cap is applied immediately and least-recently-
-        used entries are evicted until ``total_bytes`` fits.
-
-        Example:
-            client.channels.configure_data_cache(max_bytes=128 * 1024 * 1024)
-            client.channels.configure_data_cache(max_bytes=0)  # disable
-        """
-        if max_bytes < 0:
-            raise ValueError(f"max_bytes must be >= 0, got {max_bytes}")
-        self._data_cache_max_bytes = max_bytes
-        if self._data_low_level_client is not None:
-            self._data_low_level_client.channel_cache.max_bytes = max_bytes
-
     def enable_data_cache_disk(
         self,
         *,
@@ -107,28 +81,28 @@ def enable_data_cache_disk(
 
         Disk persistence is **on by default** at ``ChannelCache.DEFAULT_DISK_PATH``;
         use this method when you want to override the path or size, or to turn
-        the tier back on after a prior ``disable_data_cache_disk`` call.
+        the cache back on after a prior ``disable_data_cache_disk`` call.
 
-        The disk-backed tier is a second-chance layer beneath the in-memory
-        cache: on a memory miss, ``get_data`` checks disk before going to the
-        wire. The default path lives under ``tempfile.gettempdir()`` and is
-        shared across sessions, so a re-run of the same workload picks up
-        previously-cached windows without a fetch.
+        Each entry that ``get_data`` returns is written to the cache and read
+        back on subsequent calls, even after process restart. The default
+        path lives under ``tempfile.gettempdir()`` and is shared across
+        sessions, so a re-run of the same workload picks up previously-cached
+        windows without a fetch.
 
         Safe to call before or after the first ``get_data``. Reconfiguring
-        (different ``path`` or ``max_bytes``) closes the previous disk handle
-        and opens a new one; in-memory contents are preserved across the swap.
+        (different ``path`` or ``max_bytes``) closes the previous handle and
+        opens a new one.
 
         An explicit ``path`` that can't be opened (e.g. permission denied,
         read-only filesystem) raises so the caller knows the request didn't
         take. The default-path open does *not* raise — see
-        ``_ensure_data_low_level_client`` for the fall-back-to-memory path.
+        ``_ensure_data_low_level_client`` for the silent fall-back behaviour.
 
         Args:
             path: Directory to persist the cache to. ``None`` (the default)
                 uses ``ChannelCache.DEFAULT_DISK_PATH``. Existing entries at
                 the path become available as cache hits.
-            max_bytes: Byte cap on the disk tier. ``None`` uses
+            max_bytes: Byte cap on disk usage. ``None`` uses
                 ``ChannelCache.DEFAULT_DISK_MAX_BYTES`` (4 GiB). When the
                 bound is reached, ``diskcache``'s LRU eviction takes over.
 
@@ -141,13 +115,12 @@ def enable_data_cache_disk(
             self._data_low_level_client.channel_cache.enable_disk(path=path, max_bytes=max_bytes)
 
     def disable_data_cache_disk(self) -> None:
-        """Opt out of disk persistence for the channel data cache.
+        """Opt out of caching for ``get_data`` (no reads or writes).
 
-        Disk persistence is on by default; call this when you don't want any
-        cached data written to disk. Closes any open disk-cache file handle.
-        The on-disk directory is NOT deleted — use
-        :meth:`clear_data_cache_on_disk` to wipe it. In-memory entries are
-        preserved.
+        Caching is on by default; call this when you don't want any cached
+        data written to or read from disk. Closes any open cache file
+        handle. The on-disk directory is NOT deleted — use
+        :meth:`clear_data_cache_on_disk` to wipe it.
         """
         self._disk_cache_config.disable()
         if self._data_low_level_client is not None:
@@ -362,16 +335,12 @@ def _ensure_data_low_level_client(self):
                 DataLowLevelClient,
             )
 
-            # Pass each kwarg only when explicitly configured so the wrapper's
-            # own defaults remain the single source of truth.
             kwargs: dict = {}
-            if self._data_cache_max_bytes is not None:
-                kwargs["data_cache_max_bytes"] = self._data_cache_max_bytes
             disk_config = self._disk_cache_config
             if disk_config.enabled:
-                # ``disk_path=None`` means "disabled" to ChannelCache; substitute
+                # ``disk_path=None`` means "no cache" to ChannelCache; substitute
                 # the default explicitly so the opt-out default still opens
-                # the disk tier. ``DEFAULT_DISK_PATH`` is read here (not at
+                # the cache. ``DEFAULT_DISK_PATH`` is read here (not at
                 # config construction) so test fixtures that monkeypatch the
                 # class attribute see the override.
                 kwargs["disk_cache_path"] = disk_config.path or ChannelCache.DEFAULT_DISK_PATH
@@ -383,26 +352,23 @@ def _ensure_data_low_level_client(self):
                     **kwargs,
                 )
             except Exception:
-                # Explicit user-supplied disk path failures propagate so the
+                # Explicit user-supplied paths failures propagate so the
                 # caller knows their request didn't take. Default-path failures
                 # (read-only ``/tmp``, restricted containers, etc.) degrade
-                # silently to memory-only so ``get_data`` still works.
+                # silently to no-cache mode so ``get_data`` still works.
                 if not disk_config.using_default_path:
                     raise
                 logger.warning(
-                    "Could not open the default channel data disk cache at %r; "
-                    "falling back to in-memory cache only. Call "
+                    "Could not open the default channel data cache at %r; "
+                    "falling back to no caching for ``get_data``. Call "
                     "``client.channels.disable_data_cache_disk()`` to silence "
                     "this warning, or pass an explicit path via "
                     "``enable_data_cache_disk(path=...)``.",
                     kwargs.get("disk_cache_path"),
                     exc_info=True,
                 )
-                kwargs.pop("disk_cache_path", None)
-                kwargs.pop("disk_cache_max_bytes", None)
                 self._data_low_level_client = DataLowLevelClient(
                     grpc_client=self.client.grpc_client,
-                    **kwargs,
                 )
 
     async def get_data(
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index 8e76a56ff..cc3ec914f 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -478,32 +478,13 @@ class ChannelsAPI:
         """
         ...
 
-    def configure_data_cache(self, *, max_bytes: int) -> None:
-        """Configure the in-memory channel data cache used by ``get_data``.
-
-        Args:
-            max_bytes: Byte cap on the cache. ``0`` disables caching
-                (every ``get_data`` call goes to the wire). Defaults to
-                512 MiB until explicitly configured. Must be ``>= 0``.
-
-        Safe to call before or after the first ``get_data``. If the cache is
-        already live, the new cap is applied immediately and least-recently-
-        used entries are evicted until ``total_bytes`` fits.
-
-        Example:
-            client.channels.configure_data_cache(max_bytes=128 * 1024 * 1024)
-            client.channels.configure_data_cache(max_bytes=0)  # disable
-        """
-        ...
-
     def disable_data_cache_disk(self) -> None:
-        """Opt out of disk persistence for the channel data cache.
+        """Opt out of caching for ``get_data`` (no reads or writes).
 
-        Disk persistence is on by default; call this when you don't want any
-        cached data written to disk. Closes any open disk-cache file handle.
-        The on-disk directory is NOT deleted — use
-        :meth:`clear_data_cache_on_disk` to wipe it. In-memory entries are
-        preserved.
+        Caching is on by default; call this when you don't want any cached
+        data written to or read from disk. Closes any open cache file
+        handle. The on-disk directory is NOT deleted — use
+        :meth:`clear_data_cache_on_disk` to wipe it.
         """
         ...
 
@@ -514,28 +495,28 @@ class ChannelsAPI:
 
         Disk persistence is **on by default** at ``ChannelCache.DEFAULT_DISK_PATH``;
         use this method when you want to override the path or size, or to turn
-        the tier back on after a prior ``disable_data_cache_disk`` call.
+        the cache back on after a prior ``disable_data_cache_disk`` call.
 
-        The disk-backed tier is a second-chance layer beneath the in-memory
-        cache: on a memory miss, ``get_data`` checks disk before going to the
-        wire. The default path lives under ``tempfile.gettempdir()`` and is
-        shared across sessions, so a re-run of the same workload picks up
-        previously-cached windows without a fetch.
+        Each entry that ``get_data`` returns is written to the cache and read
+        back on subsequent calls, even after process restart. The default
+        path lives under ``tempfile.gettempdir()`` and is shared across
+        sessions, so a re-run of the same workload picks up previously-cached
+        windows without a fetch.
 
         Safe to call before or after the first ``get_data``. Reconfiguring
-        (different ``path`` or ``max_bytes``) closes the previous disk handle
-        and opens a new one; in-memory contents are preserved across the swap.
+        (different ``path`` or ``max_bytes``) closes the previous handle and
+        opens a new one.
 
         An explicit ``path`` that can't be opened (e.g. permission denied,
         read-only filesystem) raises so the caller knows the request didn't
         take. The default-path open does *not* raise — see
-        ``_ensure_data_low_level_client`` for the fall-back-to-memory path.
+        ``_ensure_data_low_level_client`` for the silent fall-back behaviour.
 
         Args:
             path: Directory to persist the cache to. ``None`` (the default)
                 uses ``ChannelCache.DEFAULT_DISK_PATH``. Existing entries at
                 the path become available as cache hits.
-            max_bytes: Byte cap on the disk tier. ``None`` uses
+            max_bytes: Byte cap on disk usage. ``None`` uses
                 ``ChannelCache.DEFAULT_DISK_MAX_BYTES`` (4 GiB). When the
                 bound is reached, ``diskcache``'s LRU eviction takes over.
 

From babb927b696fd8c022ced4f3e588f4256d5bf8f5 Mon Sep 17 00:00:00 2001
From: Ian Later <ian@siftstack.com>
Date: Thu, 25 Jun 2026 20:42:28 -0700
Subject: [PATCH 13/14] Split channel cache into shared DiskCache +
 ChannelDataCache adapter.

Move the diskcache mechanics into one client-owned store (DiskCache) so
every cache-aware resource shares a single byte budget and LRU. Channels
wrap the store in a ChannelDataCache adapter that namespaces keys as
"channel:<id>". Cache knobs (enable_disk / disable_disk / clear_disk)
move from the channels resource to a client.cache namespace.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 python/CHANGELOG.md                           |  16 +-
 .../sift_client/_internal/cache_namespace.py  | 114 ++++
 .../lib/sift_client/_internal/disk_cache.py   | 336 +++++++++++
 .../_internal/low_level_wrappers/data.py      | 349 +++---------
 .../_internal/low_level_wrappers/test_data.py | 523 +++++-------------
 .../_tests/_internal/test_disk_cache.py       | 343 ++++++++++++
 python/lib/sift_client/_tests/conftest.py     |  33 +-
 .../_tests/resources/test_channels.py         | 142 -----
 .../sift_client/_tests/test_client_cache.py   | 261 +++++++++
 python/lib/sift_client/client.py              |  67 +++
 python/lib/sift_client/resources/channels.py  | 134 +----
 .../resources/sync_stubs/__init__.pyi         |  74 ---
 12 files changed, 1379 insertions(+), 1013 deletions(-)
 create mode 100644 python/lib/sift_client/_internal/cache_namespace.py
 create mode 100644 python/lib/sift_client/_internal/disk_cache.py
 create mode 100644 python/lib/sift_client/_tests/_internal/test_disk_cache.py
 create mode 100644 python/lib/sift_client/_tests/test_client_cache.py

diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md
index d3ffaeb65..4b9205bbd 100644
--- a/python/CHANGELOG.md
+++ b/python/CHANGELOG.md
@@ -11,29 +11,29 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 
 Up to a ~80x speedup for some get_data calls.
 
-#### Channel data cache (opt-out, on by default)
+#### Shared on-disk cache (opt-out, on by default)
 
 `client.channels.get_data(...)` now caches the channel windows it returns to disk by default. Subsequent calls covering the same channel/time range — including from a fresh process — read straight out of the cache instead of going to the wire. This also bounds memory: nothing is held in process after the call returns, which fixes the OOM seen on long sustained pulls (~5–7 GB of cache for a 145M-point pull in earlier versions).
 
-The default location is `<tempfile.gettempdir()>/sift-channel-data-cache`, capped at 4 GiB with LRU eviction. If the default path can't be opened (read-only filesystem, restricted container, etc.), the client logs a warning and continues with caching disabled — `get_data` still works, it just always goes to the wire.
+The cache lives on the `SiftClient` as a single shared store: every cache-aware resource writes to one global byte budget at one path, with one LRU policy. The default location is `<tempfile.gettempdir()>/sift-data-cache`, capped at 4 GiB with LRU eviction. If the default path can't be opened (read-only filesystem, restricted container, etc.), the client logs a warning and continues with caching disabled — `get_data` still works, it just always goes to the wire.
 
 `ignore_cache=True` on `client.channels.get_data(...)` now skips writing into the cache as well as reading from it. Previously a "non-caching" workload still appended to the shared cache on every call.
 
-Opt out, reconfigure, or wipe the cache from the `channels` resource:
+Configuration lives on the new `client.cache` namespace — knobs are global because the store is shared:
 
 ```python
 # Opt out — no data persisted to disk; every get_data call goes to the wire.
-client.channels.disable_data_cache_disk()
+client.cache.disable_disk()
 
 # Reconfigure the location or byte cap.
-client.channels.enable_data_cache_disk(path="/data/sift-cache", max_bytes=2 * 1024 ** 3)
+client.cache.enable_disk(path="/data/sift-cache", max_bytes=2 * 1024 ** 3)
 
 # Remove a stale or corrupted cache directory.
-client.channels.clear_data_cache_on_disk()                   # default tmp path
-client.channels.clear_data_cache_on_disk("/data/sift-cache") # custom path
+client.cache.clear_disk()                   # default tmp path
+client.cache.clear_disk("/data/sift-cache") # custom path
 ```
 
-`enable_data_cache_disk` is also the way to turn the cache back on after a prior `disable_data_cache_disk` call.
+`enable_disk` is also the way to turn the cache back on after a prior `disable_disk` call.
 
 The cache is powered by [`diskcache`](https://grantjenks.com/docs/diskcache/) (pure-Python, SQLite-backed) with LRU eviction.
 
diff --git a/python/lib/sift_client/_internal/cache_namespace.py b/python/lib/sift_client/_internal/cache_namespace.py
new file mode 100644
index 000000000..c76ccaeb9
--- /dev/null
+++ b/python/lib/sift_client/_internal/cache_namespace.py
@@ -0,0 +1,114 @@
+"""User-facing surface for the shared on-disk cache.
+
+This module hosts the small bag of methods exposed as ``client.cache``.
+The cache itself (a :class:`~sift_client._internal.disk_cache.DiskCache`)
+lives on :class:`~sift_client.client.SiftClient` so every resource that
+wants to persist results across calls can reach into one shared store.
+
+The namespace deliberately mirrors :class:`DiskCache` rather than the
+old per-resource API (``client.channels.enable_data_cache_disk(...)``):
+since the store is shared, configuration is global.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+from sift_client._internal.disk_cache import DiskCache
+
+if TYPE_CHECKING:
+    import os
+
+    from sift_client.client import SiftClient
+
+logger = logging.getLogger(__name__)
+
+
+class CacheNamespace:
+    """Resource-agnostic surface for the on-disk cache shared by all resources.
+
+    Exposed as ``client.cache``. The actual handle (:class:`DiskCache`) is
+    constructed lazily on first use so importing :mod:`sift_client` doesn't
+    pay the diskcache cost up front. Configuration changes made before
+    that first use are recorded against the
+    :class:`~sift_client._internal.disk_cache_config.DiskCacheConfig` on the
+    client and applied when the store opens; changes after first use are
+    routed directly to the live :class:`DiskCache`.
+
+    Default policy: disk caching is **opt-out** (the ``DiskCacheConfig`` is
+    constructed with ``enabled=True``). Users who don't want any state on
+    disk call :meth:`disable_disk` to silence it; users who want a custom
+    location or byte cap call :meth:`enable_disk` with arguments.
+    """
+
+    def __init__(self, client: SiftClient):
+        self._client = client
+
+    def enable_disk(
+        self,
+        *,
+        path: str | os.PathLike[str] | None = None,
+        max_bytes: int | None = None,
+    ) -> None:
+        """Enable (or reconfigure) on-disk caching.
+
+        Disk caching is **on by default** at :attr:`DiskCache.DEFAULT_DISK_PATH`;
+        use this method to override the path or size, or to turn the cache
+        back on after a prior :meth:`disable_disk` call.
+
+        Reconfiguring a live cache (different ``path`` or ``max_bytes``)
+        closes the previous handle and opens a new one. Existing entries
+        at the new path become available as cache hits.
+
+        An explicit ``path`` that can't be opened (permission denied,
+        read-only filesystem, ...) raises so the caller knows their
+        request didn't take. The default-path open does *not* raise — see
+        :meth:`SiftClient._get_disk_cache` for the silent fall-back.
+
+        Args:
+            path: Directory to persist to. ``None`` (the default) uses
+                :attr:`DiskCache.DEFAULT_DISK_PATH`.
+            max_bytes: Byte cap on disk usage. ``None`` uses
+                :attr:`DiskCache.DEFAULT_DISK_MAX_BYTES` (4 GiB). When the
+                bound is reached, ``diskcache``'s LRU eviction takes over.
+
+        Example:
+            client.cache.enable_disk(path="/data/sift-cache")
+            client.cache.enable_disk(max_bytes=1024 ** 3)  # 1 GiB
+        """
+        client = self._client
+        client._disk_cache_config.enable(path=path, max_bytes=max_bytes)
+        if client._disk_cache is not None:
+            client._disk_cache.enable_disk(path=path, max_bytes=max_bytes)
+
+    def disable_disk(self) -> None:
+        """Opt out of on-disk caching (no reads or writes).
+
+        Caching is on by default; call this when you don't want any
+        cached data written to or read from disk. Closes any open cache
+        file handle. The on-disk directory is NOT deleted — use
+        :meth:`clear_disk` to wipe it.
+        """
+        client = self._client
+        client._disk_cache_config.disable()
+        if client._disk_cache is not None:
+            client._disk_cache.disable_disk()
+
+    def clear_disk(self, path: str | os.PathLike[str] | None = None) -> None:
+        """Delete a previously-persisted on-disk cache directory.
+
+        Drops stale caches from previous sessions, recovers from a
+        corrupt cache, or reclaims disk space. Removes the directory
+        entirely; if disk caching is on, the next access re-opens an
+        empty cache at the same path.
+
+        Args:
+            path: Directory of the cache to clear. ``None`` (the default)
+                targets :attr:`DiskCache.DEFAULT_DISK_PATH`.
+
+        Raises:
+            ValueError: If ``path`` exists but does not look like a sift
+                data cache directory.
+        """
+        DiskCache.clear_disk(path)
diff --git a/python/lib/sift_client/_internal/disk_cache.py b/python/lib/sift_client/_internal/disk_cache.py
new file mode 100644
index 000000000..0118c6477
--- /dev/null
+++ b/python/lib/sift_client/_internal/disk_cache.py
@@ -0,0 +1,336 @@
+"""Shared on-disk key/value store used by every resource that wants to cache results.
+
+One :class:`DiskCache` instance lives on the :class:`SiftClient` (see
+``client._disk_cache``). Resources don't construct their own — they receive
+a reference and wrap it in a typed adapter that namespaces keys (e.g.
+``ChannelDataCache`` in ``low_level_wrappers/data.py``). The store itself
+is deliberately value-agnostic: callers hand in ``size_bytes`` for the
+oversize guard, ``diskcache`` pickles whatever object the caller supplied,
+and the store never needs to know what's inside.
+
+This module is the sibling of :mod:`._disk_cache_config` — the config
+holds user intent (enabled / path / max_bytes) and the store is the live
+handle keyed off that intent.
+
+Key behaviours pinned here so the adapter layer can stay thin:
+
+* Default path lives under :func:`tempfile.gettempdir` and is shared
+  across processes, so a fresh session reads previously-cached entries.
+* The byte cap is one global budget; LRU eviction spans all resources
+  sharing the store (channels, calculated channels, exports, ...).
+* :meth:`clear_disk` (classmethod) refuses to delete a directory that
+  doesn't look like a sift cache (no diskcache marker), so a typo'd
+  path can't take out the user's documents.
+* Oversized entries are skipped with a one-shot warning per key —
+  otherwise diskcache's eviction loop would drain every other row
+  trying to fit an unfittable entry.
+* Construction with ``disk_path=None`` (or after :meth:`disable_disk`)
+  is a silent no-op store. Callers don't need to branch on disabled
+  state; reads always miss and writes are dropped.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import shutil
+import tempfile
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Iterator, cast
+
+if TYPE_CHECKING:
+    import diskcache
+
+logger = logging.getLogger(__name__)
+
+
+class DiskCache:
+    """Process-wide disk-backed key/value store.
+
+    Wraps a :class:`diskcache.Cache` with the lifecycle management and
+    safety rails sift resources rely on. The instance is shared — each
+    resource adapter namespaces its keys (e.g. ``channel:<id>``) so multiple
+    resources can write to the same store without colliding.
+
+    When ``disk_path`` is ``None``, the instance is a silent no-op: every
+    ``get`` misses, every ``put`` is dropped, and ``__contains__`` is
+    always ``False``. This lets callers treat "caching disabled" the same
+    as a cold cache, with no branching needed at the read/write site.
+
+    Args:
+        disk_path: Directory to persist to. ``None`` keeps the store
+            disabled. A previously-populated directory is reused, so a
+            fresh process reading the same path sees existing entries.
+        disk_max_bytes: Byte cap on the store. ``None`` falls back to
+            :attr:`DEFAULT_DISK_MAX_BYTES`. Ignored when ``disk_path``
+            is ``None``.
+    """
+
+    #: Default directory for the shared cache. Lives under
+    #: :func:`tempfile.gettempdir` so it survives across sessions of the
+    #: same user but doesn't pollute the home directory. The suffix is
+    #: fixed so multiple ``SiftClient`` instances naturally share the
+    #: same store and pick up each other's prior sessions.
+    DEFAULT_DISK_PATH: str = os.path.join(tempfile.gettempdir(), "sift-data-cache")
+
+    #: Default byte cap when :meth:`enable_disk` is called without an
+    #: explicit ``max_bytes``. 4 GiB is generous for the typical ``/tmp``
+    #: filesystem; ``diskcache`` enforces the cap with its own SQLite-
+    #: backed LRU eviction once the bound is reached.
+    DEFAULT_DISK_MAX_BYTES: int = 4 * 1024 * 1024 * 1024
+
+    #: Marker file ``diskcache`` writes inside every cache directory. The
+    #: classmethod :meth:`clear_disk` checks for this before any
+    #: ``shutil.rmtree`` so a typo'd path can't wipe out an unrelated
+    #: directory.
+    _DISKCACHE_MARKER: str = "cache.db"
+
+    def __init__(
+        self,
+        *,
+        disk_path: str | os.PathLike[str] | None = None,
+        disk_max_bytes: int | None = None,
+    ):
+        # Keys we've already logged an "entry exceeds disk cap" warning
+        # for. Tracks the full namespaced key (e.g. ``channel:foo``), not
+        # the resource-side id, so two adapters that happen to share an
+        # id space don't collide on dedup. A successful normal put
+        # clears the bit so a future regression re-warns.
+        self._oversized_warned: set[str] = set()
+        self._disk: diskcache.Cache | None = None
+        self._disk_path: str | None = None
+        self._disk_max_bytes: int | None = None
+        if disk_path is not None:
+            self._open_disk(
+                str(disk_path),
+                disk_max_bytes if disk_max_bytes is not None else self.DEFAULT_DISK_MAX_BYTES,
+            )
+
+    @classmethod
+    def clear_disk(cls, path: str | os.PathLike[str] | None = None) -> None:
+        """Delete a previously-persisted on-disk cache directory.
+
+        Use this to drop stale caches from previous sessions, recover
+        from a corrupt cache, or reclaim disk space. The directory is
+        removed entirely; a future :meth:`enable_disk` call at the same
+        path opens a fresh empty cache.
+
+        Args:
+            path: Directory of the cache to clear. ``None`` (the default)
+                targets :attr:`DEFAULT_DISK_PATH`.
+
+        Raises:
+            ValueError: If ``path`` exists but does not look like a sift
+                cache directory (missing the ``diskcache`` marker file).
+                The guard makes accidental misuse a hard error rather
+                than silent data loss.
+        """
+        target = Path(path) if path is not None else Path(cls.DEFAULT_DISK_PATH)
+        if not target.exists():
+            return
+        if not (target / cls._DISKCACHE_MARKER).exists():
+            raise ValueError(
+                f"{str(target)!r} does not look like a sift data cache "
+                f"directory (missing {cls._DISKCACHE_MARKER!r} marker). "
+                f"Refusing to delete."
+            )
+        shutil.rmtree(target)
+
+    @property
+    def disk_enabled(self) -> bool:
+        """Whether a disk handle is currently open."""
+        return self._disk is not None
+
+    @property
+    def disk_path(self) -> str | None:
+        """Filesystem path of the cache when enabled, else ``None``."""
+        return self._disk_path
+
+    @property
+    def disk_max_bytes(self) -> int | None:
+        """Configured byte cap on disk usage, or ``None`` when disabled."""
+        return self._disk_max_bytes
+
+    def __contains__(self, key: str) -> bool:
+        """True if ``key`` is cached. Always ``False`` when disabled."""
+        if self._disk is None:
+            return False
+        return key in self._disk
+
+    def __iter__(self) -> Iterator[str]:
+        """Yield cached keys. Lets adapters scope a clear to their prefix.
+
+        Yields nothing when disabled. The underlying diskcache iterator
+        is snapshot-style, but callers that intend to mutate during
+        iteration should still wrap with ``list(...)`` to be safe.
+
+        ``diskcache.Cache`` is typed as yielding ``bytes | str | ...``
+        because it supports arbitrary key types; the cast narrows to the
+        ``str`` contract this layer enforces. Adapters never write
+        non-string keys.
+        """
+        if self._disk is None:
+            return
+        for key in self._disk:
+            yield cast("str", key)
+
+    def enable_disk(
+        self,
+        *,
+        path: str | os.PathLike[str] | None = None,
+        max_bytes: int | None = None,
+    ) -> None:
+        """Open the disk handle, replacing any previous one.
+
+        Reconfiguring to a different ``path`` or ``max_bytes`` closes the
+        prior handle first. Existing entries at the new path become
+        available via :meth:`get` without further setup.
+
+        Args:
+            path: Directory to persist to. ``None`` uses
+                :attr:`DEFAULT_DISK_PATH`.
+            max_bytes: Byte cap (``None`` → :attr:`DEFAULT_DISK_MAX_BYTES`).
+        """
+        target_path = str(path) if path is not None else self.DEFAULT_DISK_PATH
+        target_max = max_bytes if max_bytes is not None else self.DEFAULT_DISK_MAX_BYTES
+        if (
+            self._disk is not None
+            and self._disk_path == target_path
+            and self._disk_max_bytes == target_max
+        ):
+            return
+        self._close_disk()
+        self._open_disk(target_path, target_max)
+
+    def disable_disk(self) -> None:
+        """Close the disk handle (if open). Does not touch on-disk contents.
+
+        Use :meth:`clear_disk` to remove a directory from disk.
+        """
+        self._close_disk()
+
+    def get(self, key: str) -> Any | None:
+        """Return the cached value for ``key`` or ``None`` on a miss.
+
+        Returns ``None`` for misses, decoded values for hits, and ``None``
+        (after self-invalidating the row) for corrupt entries surfaced
+        by ``diskcache`` as ``sqlite3.DatabaseError`` or similar. The
+        caller is expected to ``isinstance``-check the result against
+        the type they wrote.
+        """
+        if self._disk is None:
+            return None
+        try:
+            return self._disk.get(key, default=None, retry=True)
+        except Exception:
+            # diskcache surfaces ``sqlite3.DatabaseError`` (and friends)
+            # for corrupt or partially-written entries from a prior
+            # session. Treat as a miss and force-drop the bad row so
+            # we don't repeatedly trip the same path.
+            logger.warning("disk cache read failed for %s; invalidating", key)
+            try:
+                del self._disk[key]
+            except Exception:
+                pass
+            return None
+
+    def put(self, key: str, value: Any, *, size_bytes: int) -> None:
+        """Write ``value`` under ``key``. No-op when disabled.
+
+        Entries whose ``size_bytes`` exceeds :attr:`disk_max_bytes` are
+        skipped with a one-shot warning per key, since diskcache's
+        eviction loop would otherwise drain every other row trying — and
+        failing — to fit an oversized entry. Callers are responsible
+        for measuring the size; the store stays value-agnostic.
+
+        Args:
+            key: Namespaced key (e.g. ``"channel:<id>"``). Adapters are
+                responsible for picking a prefix that won't collide with
+                other adapters writing to the same store.
+            value: Anything ``diskcache`` can pickle.
+            size_bytes: Caller-measured size used for the oversize guard.
+        """
+        if self._disk is None:
+            return
+        if self._disk_max_bytes is not None and size_bytes > self._disk_max_bytes:
+            if key not in self._oversized_warned:
+                logger.warning(
+                    "Entry for %s (%d bytes) is larger than the disk "
+                    "cache cap (%d bytes); skipping disk cache for this "
+                    "entry so other entries aren't evicted. Raise the "
+                    "cap via ``client.cache.enable_disk(max_bytes=...)`` "
+                    "to cache this entry on disk.",
+                    key,
+                    size_bytes,
+                    self._disk_max_bytes,
+                )
+                self._oversized_warned.add(key)
+            try:
+                self._disk.delete(key, retry=True)
+            except Exception:
+                pass
+            return
+        try:
+            self._disk.set(key, value, retry=True)
+            self._oversized_warned.discard(key)
+        except Exception:
+            # Best-effort persistence: keep going on disk errors so the
+            # caller's request still succeeds. Drop the (possibly
+            # partial) disk row.
+            logger.warning("disk cache write failed for %s; invalidating", key)
+            try:
+                self._disk.delete(key, retry=True)
+            except Exception:
+                pass
+
+    def invalidate(self, key: str) -> None:
+        """Remove ``key`` from the cache. Safe to call when absent."""
+        # Invalidation is a fresh start for this key; the next put should
+        # re-evaluate against the current cap and re-warn if still too big.
+        self._oversized_warned.discard(key)
+        if self._disk is not None:
+            try:
+                self._disk.delete(key, retry=True)
+            except Exception:
+                pass
+
+    def clear(self) -> None:
+        """Wipe every entry from the store. The directory itself remains.
+
+        Spans all adapters sharing the store — typically used at test
+        teardown or for full reset. Adapters that want to wipe only their
+        own namespace should iterate ``self`` and call :meth:`invalidate`
+        on matching keys.
+        """
+        self._oversized_warned.clear()
+        if self._disk is not None:
+            self._disk.clear()
+
+    def close(self) -> None:
+        """Release the disk file handle. Safe to call when disabled."""
+        self._close_disk()
+
+    def _open_disk(self, path: str, max_bytes: int) -> None:
+        import diskcache
+
+        os.makedirs(path, exist_ok=True)
+        self._disk = diskcache.Cache(
+            directory=path,
+            size_limit=max_bytes,
+            eviction_policy="least-recently-used",
+            statistics=0,
+            tag_index=0,
+        )
+        self._disk_path = path
+        self._disk_max_bytes = max_bytes
+
+    def _close_disk(self) -> None:
+        if self._disk is None:
+            return
+        try:
+            self._disk.close()
+        except Exception:
+            pass
+        self._disk = None
+        self._disk_path = None
+        self._disk_max_bytes = None
diff --git a/python/lib/sift_client/_internal/low_level_wrappers/data.py b/python/lib/sift_client/_internal/low_level_wrappers/data.py
index 03ab29268..c524a9e03 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/data.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/data.py
@@ -2,11 +2,7 @@
 
 import asyncio
 import logging
-import os
-import shutil
-import tempfile
 from datetime import datetime, timezone
-from pathlib import Path
 from typing import TYPE_CHECKING, Any, cast
 
 import pandas as pd
@@ -20,14 +16,13 @@
 )
 from sift.data.v2.data_pb2_grpc import DataServiceStub
 
+from sift_client._internal.disk_cache import DiskCache
 from sift_client._internal.low_level_wrappers.base import LowLevelClientBase
 from sift_client._internal.time import to_timestamp_nanos
 from sift_client.sift_types.channel import Channel, ChannelDataType
 from sift_client.transport import WithGrpcClient
 
 if TYPE_CHECKING:
-    import diskcache
-
     from sift_client.transport.grpc_transport import GrpcClient
 
 # Configure logging
@@ -60,283 +55,100 @@ def _new_cache_entry(
     )
 
 
-class ChannelCache:
-    """Disk-backed cache of per-channel DataFrames.
-
-    A ``diskcache``-backed key/value store that survives process restarts.
-    ``put`` writes through to disk, ``get`` reads from disk, and
-    ``invalidate``/``clear`` remove entries. The disk tier has a byte cap
-    that ``diskcache`` enforces with its own LRU eviction.
-
-    When no ``disk_path`` is supplied the cache is a no-op: ``get`` always
-    returns ``None``, ``__contains__`` is always ``False``, and ``put`` is
-    silently dropped. This is the "caching disabled" mode used after a
-    :meth:`disable_disk` call (or when disk persistence is turned off on
-    the owning resource).
-
-    An in-memory tier previously sat in front of disk. It was removed once
-    benchmarks showed that for the workloads driving the OOM regression the
-    extra memory footprint outweighed the per-call pickle/deserialize cost
-    on a warm disk hit; if profiling shows the disk reads dominating again,
-    re-introduce a small front cache here.
+class ChannelDataCache:
+    """Channel-side adapter over the shared :class:`DiskCache` store.
+
+    The store is owned by :class:`~sift_client.client.SiftClient` and
+    shared by every cache-aware resource; this adapter is the typed,
+    namespaced view of it that the channel data path uses.
+
+    Responsibilities the adapter holds onto:
+
+    * **Key namespacing.** Every read/write goes through :meth:`_key`,
+      which prefixes the channel id with ``channel:``. That keeps a
+      future calculated-channels or exports adapter on the same store
+      from colliding on raw resource ids.
+    * **Typing.** ``put`` only accepts :class:`ChannelCacheEntry`;
+      ``get`` ``isinstance``-checks the raw value before handing it back,
+      so a corrupt or cross-adapter row reads as a miss instead of
+      blowing up downstream pandas code.
+    * **Size measurement.** The store stays value-agnostic; the adapter
+      already computes ``size_bytes`` on the entry via
+      :func:`_new_cache_entry` (``DataFrame.memory_usage(deep=True)``) so
+      it just forwards that to the store's oversize guard.
+    * **Resource-side state.** :attr:`name_id_map` lives here because
+      it's channel-specific bookkeeping needed to wire raw fetch
+      responses (keyed by channel *name*) back to the cache (keyed by
+      channel *id*).
+
+    The :class:`DiskCacheAdapter` ``Protocol`` is intentionally not
+    declared yet — there's only one adapter shape so far. When a second
+    resource grows its own adapter, extract the Protocol from the two
+    real shapes rather than guessing from one.
     """
 
-    #: Default directory for the cache. Lives under ``tempfile.gettempdir()``
-    #: so it survives across sessions of the same user but doesn't pollute
-    #: the user's home dir. The suffix is fixed so multiple processes
-    #: (different ``SiftClient`` instances, notebooks, etc.) naturally share
-    #: the same store and can read each other's prior sessions.
-    DEFAULT_DISK_PATH: str = os.path.join(tempfile.gettempdir(), "sift-channel-data-cache")
-
-    #: Default byte cap for the cache when ``enable_disk`` is called without
-    #: an explicit ``max_bytes``. 4 GiB is a generous ceiling for the typical
-    #: ``/tmp`` filesystem; ``diskcache`` enforces it with its own SQLite-
-    #: backed LRU eviction once the bound is reached.
-    DEFAULT_DISK_MAX_BYTES: int = 4 * 1024 * 1024 * 1024
+    #: Namespace prefix for keys this adapter writes to the shared
+    #: :class:`DiskCache`. Picked at class scope so adapters in other
+    #: resources can pick distinct prefixes without runtime negotiation.
+    KEY_PREFIX: str = "channel:"
 
-    #: Marker file ``diskcache`` writes inside every cache directory. We
-    #: sanity-check for this before any ``shutil.rmtree`` so a typo in the
-    #: ``clear_disk`` ``path`` argument can't wipe out an unrelated directory.
-    _DISKCACHE_MARKER: str = "cache.db"
-
-    def __init__(
-        self,
-        *,
-        disk_path: str | os.PathLike[str] | None = None,
-        disk_max_bytes: int | None = None,
-    ):
-        """Construct a disk-backed cache.
+    def __init__(self, store: DiskCache):
+        """Wrap ``store`` with channel-data semantics.
 
         Args:
-            disk_path: Directory for the cache. ``None`` disables caching
-                entirely (every operation becomes a no-op). A previously-
-                populated directory is reused, so subsequent sessions can
-                read existing entries.
-            disk_max_bytes: Byte cap on disk usage. ``None`` falls back to
-                ``DEFAULT_DISK_MAX_BYTES``. Ignored when ``disk_path`` is
-                ``None``.
+            store: The shared :class:`DiskCache` instance owned by the
+                :class:`SiftClient`. Multiple adapters may share one store.
         """
+        self._store = store
         self.name_id_map: dict[str, str] = {}
-        # Channels we've already logged an "entry exceeds disk cap" warning
-        # for. The check on the put path would otherwise spam the log once
-        # per ``get_data`` call for any channel whose typical entry is bigger
-        # than the cap. A successful normal put for the same channel clears
-        # the bit so a future regression re-warns.
-        self._oversized_disk_warned: set[str] = set()
-        self._disk: diskcache.Cache | None = None
-        self._disk_path: str | None = None
-        self._disk_max_bytes: int | None = None
-        if disk_path is not None:
-            self._open_disk(
-                str(disk_path),
-                disk_max_bytes if disk_max_bytes is not None else self.DEFAULT_DISK_MAX_BYTES,
-            )
-
-    @classmethod
-    def clear_disk(cls, path: str | os.PathLike[str] | None = None) -> None:
-        """Delete a previously-persisted on-disk cache directory.
-
-        Use this to drop stale caches from previous sessions, recover from a
-        corrupt cache, or reclaim disk space. The directory is removed
-        entirely; a future ``enable_disk`` call at the same path will see a
-        fresh empty cache.
-
-        Args:
-            path: Directory of the cache to clear. ``None`` (the default)
-                targets :attr:`DEFAULT_DISK_PATH`.
-
-        Raises:
-            ValueError: If ``path`` exists but does not look like a sift
-                channel data cache directory (missing the ``diskcache``
-                marker file). This guard makes accidental misuse a hard
-                error rather than silent data loss.
-        """
-        target = Path(path) if path is not None else Path(cls.DEFAULT_DISK_PATH)
-        if not target.exists():
-            return
-        if not (target / cls._DISKCACHE_MARKER).exists():
-            raise ValueError(
-                f"{str(target)!r} does not look like a sift channel data cache "
-                f"directory (missing {cls._DISKCACHE_MARKER!r} marker). "
-                f"Refusing to delete."
-            )
-        shutil.rmtree(target)
-
-    @property
-    def disk_enabled(self) -> bool:
-        """Whether the disk-backed store is currently open."""
-        return self._disk is not None
 
-    @property
-    def disk_path(self) -> str | None:
-        """Filesystem path of the cache when enabled, else ``None``."""
-        return self._disk_path
+    def _key(self, channel_id: str) -> str:
+        return f"{self.KEY_PREFIX}{channel_id}"
 
     @property
-    def disk_max_bytes(self) -> int | None:
-        """Configured byte cap on disk usage, or ``None`` when disabled."""
-        return self._disk_max_bytes
+    def store(self) -> DiskCache:
+        """The shared underlying store. Tests reach in for store-level state."""
+        return self._store
 
     def __contains__(self, channel_id: str) -> bool:
-        """True if the channel is cached on disk.
+        """True if the channel is cached. False when the store is disabled."""
+        return self._key(channel_id) in self._store
 
-        Used by ``_filter_cached_channels`` to decide whether ``get_data``
-        needs to hit the wire. A warm disk lets a fresh session avoid
-        re-fetching previously-served windows.
-        """
-        if self._disk is None:
-            return False
-        return channel_id in self._disk
-
-    def enable_disk(
-        self,
-        *,
-        path: str | os.PathLike[str] | None = None,
-        max_bytes: int | None = None,
-    ) -> None:
-        """Enable (or reconfigure) the disk-backed cache.
-
-        If a previous disk handle was open at a different path or with a
-        different size cap, it's closed first. Disk contents at the new
-        path are NOT recreated from anywhere — only future writes land in
-        the new location.
+    def get(self, channel_id: str) -> ChannelCacheEntry | None:
+        """Return the entry for ``channel_id`` if cached, otherwise None.
 
-        Args:
-            path: Directory to persist to. ``None`` uses
-                :attr:`DEFAULT_DISK_PATH`. The directory is created if
-                missing; an existing one is opened in place and its
-                contents become available to ``get``.
-            max_bytes: Byte cap on disk usage (``None`` →
-                :attr:`DEFAULT_DISK_MAX_BYTES`).
+        Type-checks the raw value before returning so a row written by a
+        different adapter (or a corrupt entry that survived) reads as a
+        miss instead of being handed back as the wrong type.
         """
-        target_path = str(path) if path is not None else self.DEFAULT_DISK_PATH
-        target_max = max_bytes if max_bytes is not None else self.DEFAULT_DISK_MAX_BYTES
-        if (
-            self._disk is not None
-            and self._disk_path == target_path
-            and self._disk_max_bytes == target_max
-        ):
-            return
-        self._close_disk()
-        self._open_disk(target_path, target_max)
-
-    def disable_disk(self) -> None:
-        """Close the disk handle (if open). Does not touch the disk contents.
-
-        Use ``client.channels.clear_data_cache_on_disk(path)`` to remove a
-        directory from disk.
-        """
-        self._close_disk()
-
-    def get(self, channel_id: str) -> ChannelCacheEntry | None:
-        """Return the entry for ``channel_id`` if cached, otherwise None."""
-        if self._disk is None:
-            return None
-        try:
-            disk_entry = self._disk.get(channel_id, default=None, retry=True)
-        except Exception:
-            # diskcache surfaces ``sqlite3.DatabaseError`` (and friends) for
-            # corrupt or partially-written entries from a prior session.
-            # Treat as a miss; force-drop the bad row so we don't repeatedly
-            # trip the same path.
-            logger.warning("disk cache read failed for %s; invalidating", channel_id)
-            try:
-                del self._disk[channel_id]
-            except Exception:
-                pass
-            return None
-        if disk_entry is None or not isinstance(disk_entry, ChannelCacheEntry):
+        raw = self._store.get(self._key(channel_id))
+        if not isinstance(raw, ChannelCacheEntry):
             return None
-        return disk_entry
+        return raw
 
     def put(self, channel_id: str, entry: ChannelCacheEntry) -> None:
         """Insert or replace ``channel_id`` on disk.
 
-        No-op when the disk tier is disabled. Entries larger than
-        ``disk_max_bytes`` are skipped (with a one-shot warning per
-        channel) instead of being inserted, since diskcache's eviction
-        loop would otherwise drain every other row trying — and failing —
-        to fit them.
+        Forwards :attr:`ChannelCacheEntry.size_bytes` to the store so its
+        oversize guard can decide whether to write or skip+warn. No-op
+        when the underlying store is disabled.
         """
-        if self._disk is None:
-            return
-        if self._disk_max_bytes is not None and entry.size_bytes > self._disk_max_bytes:
-            if channel_id not in self._oversized_disk_warned:
-                logger.warning(
-                    "Channel %s data (%d bytes) is larger than the disk "
-                    "cache cap (%d bytes); skipping disk cache for this "
-                    "channel so other entries aren't evicted. Raise the "
-                    "cap via ``client.channels.enable_data_cache_disk("
-                    "max_bytes=...)`` to cache this channel on disk.",
-                    channel_id,
-                    entry.size_bytes,
-                    self._disk_max_bytes,
-                )
-                self._oversized_disk_warned.add(channel_id)
-            try:
-                self._disk.delete(channel_id, retry=True)
-            except Exception:
-                pass
-            return
-        try:
-            self._disk.set(channel_id, entry, retry=True)
-            self._oversized_disk_warned.discard(channel_id)
-        except Exception:
-            # Best-effort persistence: keep going on disk errors so the
-            # user's ``get_data`` call still succeeds. Drop the (possibly
-            # partial) disk row.
-            logger.warning("disk cache write failed for %s; invalidating", channel_id)
-            try:
-                self._disk.delete(channel_id, retry=True)
-            except Exception:
-                pass
+        self._store.put(self._key(channel_id), entry, size_bytes=entry.size_bytes)
 
     def invalidate(self, channel_id: str) -> None:
-        """Remove ``channel_id`` from the cache. Safe to call when absent."""
-        # Invalidation is a fresh start for this channel; if it was warned
-        # about as oversized previously, the next put should re-evaluate
-        # against the current cap and re-warn if still too big.
-        self._oversized_disk_warned.discard(channel_id)
-        if self._disk is not None:
-            try:
-                self._disk.delete(channel_id, retry=True)
-            except Exception:
-                pass
+        """Remove ``channel_id`` from the cache. Safe when absent."""
+        self._store.invalidate(self._key(channel_id))
 
     def clear(self) -> None:
-        """Wipe all entries from disk. The directory itself remains."""
-        self._oversized_disk_warned.clear()
-        if self._disk is not None:
-            self._disk.clear()
-
-    def close(self) -> None:
-        """Release the disk file handle. Safe to call without disk enabled."""
-        self._close_disk()
-
-    def _open_disk(self, path: str, max_bytes: int) -> None:
-        import diskcache
-
-        os.makedirs(path, exist_ok=True)
-        self._disk = diskcache.Cache(
-            directory=path,
-            size_limit=max_bytes,
-            eviction_policy="least-recently-used",
-            statistics=0,
-            tag_index=0,
-        )
-        self._disk_path = path
-        self._disk_max_bytes = max_bytes
+        """Wipe every channel entry. Other adapters' entries are preserved.
 
-    def _close_disk(self) -> None:
-        if self._disk is None:
-            return
-        try:
-            self._disk.close()
-        except Exception:
-            pass
-        self._disk = None
-        self._disk_path = None
-        self._disk_max_bytes = None
+        Walks the shared store's keyspace once and drops anything under
+        :attr:`KEY_PREFIX`. ``list(...)`` snapshots the iterator since
+        we mutate during iteration.
+        """
+        for key in list(self._store):
+            if key.startswith(self.KEY_PREFIX):
+                self._store.invalidate(key)
 
 
 class DataLowLevelClient(LowLevelClientBase, WithGrpcClient):
@@ -349,24 +161,25 @@ def __init__(
         self,
         grpc_client: GrpcClient,
         *,
-        disk_cache_path: str | os.PathLike[str] | None = None,
-        disk_cache_max_bytes: int | None = None,
+        channel_cache: ChannelDataCache | None = None,
     ):
         """Initialize the DataLowLevelClient.
 
         Args:
             grpc_client: The gRPC client to use for making API calls.
-            disk_cache_path: Directory for the disk-backed channel-data cache.
-                ``None`` disables caching entirely. See ``ChannelCache``.
-            disk_cache_max_bytes: Byte cap for disk usage. ``None`` uses
-                ``ChannelCache.DEFAULT_DISK_MAX_BYTES``. Ignored when
-                ``disk_cache_path`` is ``None``.
+            channel_cache: Adapter wrapping the shared :class:`DiskCache` the
+                :class:`SiftClient` owns. When ``None`` (only the unit-test
+                construction path), the wrapper falls back to a no-op store
+                so cache reads/writes are silent. Production callers always
+                pass an adapter built from ``client._get_disk_cache()``.
         """
         super().__init__(grpc_client)
-        self.channel_cache = ChannelCache(
-            disk_path=disk_cache_path,
-            disk_max_bytes=disk_cache_max_bytes,
-        )
+        # Production wires the shared store in via the resource. The fallback
+        # here lets a bare ``DataLowLevelClient(MagicMock())`` keep working
+        # in unit tests without forcing every site to plumb a store.
+        if channel_cache is None:
+            channel_cache = ChannelDataCache(DiskCache())
+        self.channel_cache = channel_cache
 
     def _update_name_id_map(self, channels: list[Channel]):
         """Update the name id map with the new channels."""
diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
index 4fc094440..6e28bd2bd 100644
--- a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
+++ b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_data.py
@@ -2,11 +2,9 @@
 
 Four classes, narrowest scope first:
 
-* :class:`TestChannelCache` — disk-backed :class:`ChannelCache` unit tests
-  (fresh open, cross-session reload, invalidate/clear, oversized guards,
-  disable/reconfigure).
-* :class:`TestChannelCacheClearDisk` — ``ChannelCache.clear_disk``
-  classmethod (default path, custom path, safety guard).
+* :class:`TestChannelDataCache` — the typed adapter over the shared
+  :class:`DiskCache`. Covers key namespacing, the isinstance guard on
+  ``get``, and the prefix-scoped ``clear``.
 * :class:`TestMergePages` — ``DataLowLevelClient._merge_pages``, the
   per-channel concat helper.
 * :class:`TestDataLowLevelClient` — constructor wiring and per-instance
@@ -14,14 +12,18 @@
 * :class:`TestGetChannelData` — end-to-end on the public
   ``get_channel_data`` API against a mocked ``_get_data_impl``.
 
-The OOM regression that motivated this code happened because the cache was
-a class attribute that grew without bound. ``test_per_instance_isolation``
-is the canary that catches anyone re-introducing that pattern.
+Storage-layer behaviour (oversize guards, marker-checked clear,
+cross-session reload) lives in ``_tests/_internal/test_disk_cache.py``;
+this file stays focused on the channel-data path.
+
+The OOM regression that motivated this code happened because the cache
+was a class attribute that grew without bound. ``test_per_instance_isolation``
+is the canary that catches anyone re-introducing that pattern, even though
+ownership has since moved to the client.
 """
 
 from __future__ import annotations
 
-import logging
 from contextlib import contextmanager
 from datetime import datetime, timedelta, timezone
 from typing import Any, Iterator
@@ -30,9 +32,10 @@
 import pandas as pd
 import pytest
 
+from sift_client._internal.disk_cache import DiskCache
 from sift_client._internal.low_level_wrappers.data import (
-    ChannelCache,
     ChannelCacheEntry,
+    ChannelDataCache,
     DataLowLevelClient,
     _new_cache_entry,
 )
@@ -41,13 +44,6 @@
 _NOW = datetime(2025, 1, 1, tzinfo=timezone.utc)
 _WINDOW_END = _NOW + timedelta(days=1)
 
-# Snapshot of the real ``DEFAULT_DISK_PATH`` constant captured at module import.
-# The autouse ``_isolate_default_disk_cache_path`` fixture in ``conftest.py``
-# overrides the class attribute on every test for isolation; the
-# ``TestChannelCacheClearDisk::test_default_path_constant_under_tmp`` test still
-# needs to see the production value to verify its shape.
-_PRODUCTION_DEFAULT_DISK_PATH = ChannelCache.DEFAULT_DISK_PATH
-
 
 # ---------- shared helpers -----------
 
@@ -97,39 +93,15 @@ def _channel(cid: str) -> Channel:
 
 
 def _client_with_cache(tmp_path, subdir: str = "cache") -> DataLowLevelClient:
-    """Build a ``DataLowLevelClient`` whose ``ChannelCache`` points at ``tmp_path``.
+    """Build a ``DataLowLevelClient`` whose adapter points at ``tmp_path``.
 
-    Tests that exercise cache behaviour (hits/misses/eviction) need an
-    actual disk-backed cache, so ``disk_cache_path`` must be supplied. A
-    plain ``DataLowLevelClient(MagicMock())`` defaults to no-cache mode
+    Tests that exercise cache behaviour (hits/misses) need an actual
+    disk-backed adapter, so the store has to be opened explicitly. A
+    plain ``DataLowLevelClient(MagicMock())`` defaults to a no-op store
     and would silently turn every cache test into a wire-path test.
     """
-    return DataLowLevelClient(MagicMock(), disk_cache_path=tmp_path / subdir)
-
-
-@contextmanager
-def _capture_data_warnings() -> Iterator[list[logging.LogRecord]]:
-    """Capture warnings emitted by the ``data`` module's logger directly.
-
-    Pytest's ``caplog`` reads from the root logger, but the Sift pytest plugin
-    sets ``propagate=False`` on the ``sift_client`` logger when audit logging
-    is active, so records emitted from any descendant don't reach the root.
-    Attaching a list-backed handler at the leaf logger bypasses that and
-    surfaces exactly the records we emit.
-    """
-    target = logging.getLogger("sift_client._internal.low_level_wrappers.data")
-    records: list[logging.LogRecord] = []
-
-    class _ListHandler(logging.Handler):
-        def emit(self, record: logging.LogRecord) -> None:
-            records.append(record)
-
-    handler = _ListHandler(level=logging.WARNING)
-    target.addHandler(handler)
-    try:
-        yield records
-    finally:
-        target.removeHandler(handler)
+    store = DiskCache(disk_path=tmp_path / subdir)
+    return DataLowLevelClient(MagicMock(), channel_cache=ChannelDataCache(store))
 
 
 def _patch_deserializer(sentinel_to_frames: dict[str, dict[str, pd.DataFrame]]) -> Any:
@@ -201,348 +173,140 @@ async def fake_impl(
 # ---------- tests -----------
 
 
-class TestChannelCache:
-    """Disk-backed :class:`ChannelCache` behaviour.
+class TestChannelDataCache:
+    """The typed adapter over the shared :class:`DiskCache`.
 
-    Five invariants must hold across these tests:
+    Three invariants get pinned:
 
-    1. Constructing without a ``disk_path`` yields a no-op cache (every
-       operation is silent; ``__contains__`` returns ``False``).
-    2. A fresh disk directory starts empty and accepts new writes.
-    3. Closing a populated cache and reopening at the same path surfaces
-       the previous entries on read (the "previous session" requirement
-       that powers cold-start reuse).
-    4. Oversized entries are skipped with a deduped warning rather than
-       being inserted and triggering an eviction storm.
-    5. ``invalidate``/``clear`` reset the oversized-warning dedup state
-       so a future regression re-warns.
+    1. Every operation routes through the namespaced key
+       (``channel:<id>``), so two adapters sharing one store don't
+       collide on bare resource ids.
+    2. :meth:`ChannelDataCache.get` returns ``None`` on a type-mismatch
+       hit (e.g. a row another adapter wrote) instead of handing
+       arbitrary objects to downstream pandas code.
+    3. :meth:`ChannelDataCache.clear` wipes only the adapter's namespace
+       — entries belonging to other adapters survive.
 
-    All tests confine writes to ``tmp_path`` so nothing leaks into the
-    real ``/tmp/sift-channel-data-cache``.
+    Store-level behaviour (oversized guards, cross-session reload,
+    marker-checked clear_disk) is exercised in ``test_disk_cache.py``.
     """
 
-    def test_disabled_when_no_path(self) -> None:
-        """``ChannelCache()`` with no ``disk_path`` is a silent no-op."""
-        cache = ChannelCache()
-        assert cache.disk_enabled is False
-        assert cache.disk_path is None
-        assert cache.disk_max_bytes is None
-        # Operations don't raise; the cache just stays empty.
-        cache.put("chan-1", _entry(rows=4))
-        assert "chan-1" not in cache
-        assert cache.get("chan-1") is None
-        cache.invalidate("chan-1")
-        cache.clear()
-        cache.close()
-
-    def test_fresh_cache_writes_and_reads(self, tmp_path) -> None:
-        """A fresh disk directory accepts writes and serves them back."""
-        path = tmp_path / "fresh"
-        cache = ChannelCache(disk_path=path)
+    def test_get_miss_returns_none(self, tmp_path):
+        adapter = ChannelDataCache(DiskCache(disk_path=tmp_path / "miss"))
+        try:
+            assert "c1" not in adapter
+            assert adapter.get("c1") is None
+        finally:
+            adapter.store.close()
+
+    def test_round_trip(self, tmp_path):
+        """Put then get returns an equivalent entry."""
+        adapter = ChannelDataCache(DiskCache(disk_path=tmp_path / "rt"))
         try:
-            assert cache.disk_enabled
-            assert cache.disk_path == str(path)
-            assert cache.disk_max_bytes == ChannelCache.DEFAULT_DISK_MAX_BYTES
             entry = _entry(rows=8)
-            cache.put("chan-1", entry)
-            assert "chan-1" in cache
-            got = cache.get("chan-1")
+            adapter.put("c1", entry)
+            assert "c1" in adapter
+            got = adapter.get("c1")
             assert got is not None
             pd.testing.assert_frame_equal(got.data, entry.data)
             assert got.start_time == entry.start_time
             assert got.end_time == entry.end_time
         finally:
-            cache.close()
-
-    def test_reopen_existing_dir_sees_prior_session_entries(self, tmp_path) -> None:
-        """Closing then reopening at the same path makes prior entries hit.
-
-        This is the "look for existing caches from previous sessions"
-        guarantee: a new ``ChannelCache`` at a populated directory finds
-        entries on disk and returns them on the next read.
-        """
-        path = tmp_path / "prev-session"
-        df = _frame("chan-1", rows=12, freq="s")
-        original_entry = _new_cache_entry(
-            data=df,
-            start_time=df.index[0].to_pydatetime(),
-            end_time=df.index[-1].to_pydatetime(),
-        )
-        # Session 1: populate and close.
-        session1 = ChannelCache(disk_path=path)
-        session1.put("chan-1", original_entry)
-        session1.close()
-
-        # Session 2: fresh process simulated by a brand-new ChannelCache
-        # at the same directory.
-        session2 = ChannelCache(disk_path=path)
-        try:
-            assert "chan-1" in session2
-            got = session2.get("chan-1")
-            assert got is not None
-            pd.testing.assert_frame_equal(got.data, original_entry.data)
-            assert got.start_time == original_entry.start_time
-            assert got.end_time == original_entry.end_time
-        finally:
-            session2.close()
-
-    def test_repeated_put_overwrites(self, tmp_path) -> None:
-        """A second ``put`` on the same key replaces the prior entry."""
-        cache = ChannelCache(disk_path=tmp_path / "overwrite")
-        try:
-            small = _entry(rows=10)
-            bigger = _entry(rows=100)
-            cache.put("chan", small)
-            cache.put("chan", bigger)
-            got = cache.get("chan")
-            assert got is not None
-            pd.testing.assert_frame_equal(got.data, bigger.data)
-        finally:
-            cache.close()
-
-    def test_invalidate_removes_entry(self, tmp_path) -> None:
-        """``invalidate`` drops the entry; safe to call when absent."""
-        cache = ChannelCache(disk_path=tmp_path / "inval")
-        try:
-            cache.invalidate("never_added")  # safe before any puts
-            cache.put("chan-1", _entry(rows=4))
-            cache.invalidate("chan-1")
-            assert "chan-1" not in cache
-            assert cache.get("chan-1") is None
-        finally:
-            cache.close()
-
-    def test_clear_wipes_disk(self, tmp_path) -> None:
-        cache = ChannelCache(disk_path=tmp_path / "clear")
-        try:
-            cache.put("chan-1", _entry(rows=4))
-            cache.put("chan-2", _entry(rows=4))
-            cache.clear()
-            assert "chan-1" not in cache
-            assert "chan-2" not in cache
-        finally:
-            cache.close()
-
-    def test_disable_disk_closes_handle(self, tmp_path) -> None:
-        """Turning off disk closes the handle and silences subsequent ops."""
-        cache = ChannelCache(disk_path=tmp_path / "disable")
-        try:
-            cache.put("chan-1", _entry(rows=4))
-            cache.disable_disk()
-            assert not cache.disk_enabled
-            assert cache.disk_path is None
-            assert "chan-1" not in cache  # no handle → no hits
-            assert cache.get("chan-1") is None
-            # Subsequent puts are silently dropped.
-            cache.put("chan-2", _entry(rows=4))
-            assert "chan-2" not in cache
-        finally:
-            cache.close()
+            adapter.store.close()
 
-    def test_enable_disk_reconfigures_path(self, tmp_path) -> None:
-        """Reconfiguring to a different path closes the old handle.
+    def test_writes_use_namespaced_key(self, tmp_path):
+        """The raw store sees ``channel:<id>``, not the bare id.
 
-        The new directory starts empty: ``chan-1`` lived in the old
-        directory's diskcache, so the lookup at the new path misses.
+        Pins the key-shape contract two adapters share. Without it, a
+        second adapter that happens to share an id with the channel
+        adapter would clobber the channel row.
         """
-        cache = ChannelCache(disk_path=tmp_path / "a")
+        store = DiskCache(disk_path=tmp_path / "ns")
+        adapter = ChannelDataCache(store)
         try:
-            cache.put("chan-1", _entry(rows=4))
-            cache.enable_disk(path=tmp_path / "b")
-            assert cache.disk_path == str(tmp_path / "b")
-            assert "chan-1" not in cache  # fresh directory
+            adapter.put("c1", _entry(rows=4))
+            assert "channel:c1" in store
+            assert "c1" not in store
         finally:
-            cache.close()
+            store.close()
 
-    def test_enable_disk_noop_when_same_settings(self, tmp_path) -> None:
-        """Re-enabling with identical settings doesn't churn the disk handle."""
-        cache = ChannelCache(disk_path=tmp_path / "noop")
-        try:
-            handle_before = cache._disk
-            cache.enable_disk(path=tmp_path / "noop", max_bytes=ChannelCache.DEFAULT_DISK_MAX_BYTES)
-            assert cache._disk is handle_before
-        finally:
-            cache.close()
-
-    def test_oversized_entry_skips_cache_preserves_neighbours(self, tmp_path) -> None:
-        """An entry larger than the cap is skipped without evicting peers.
+    def test_get_isinstance_check_filters_foreign_rows(self, tmp_path):
+        """A row whose value isn't a ChannelCacheEntry reads as a miss.
 
-        Without this guard, ``diskcache``'s cull would evict every other
-        row trying to fit an unfittable entry, then drop the entry itself
-        — the wipe-everything failure mode the bounded-cache work
-        originally fixed. The disk-tier guard mirrors that fix.
-
-        Memory is sized to accept small entries but reject the oversized one
-        so memory-tier writes don't compete with disk-tier writes. We
-        assert on the disk ``_disk`` mapping directly because that's where
-        the contested behavior lives.
-
-        ``disk_max_bytes`` has to leave room for ``diskcache``'s pickle
-        envelope around each small entry (a few KB) AND be small enough
-        that the oversized entry trips the guard. Half the oversized
-        DataFrame's raw byte size hits both constraints comfortably.
+        Models a corrupt entry or a key collision from another writer.
+        ``ChannelDataCache.get`` must isinstance-check the raw value so
+        callers downstream never receive the wrong shape.
         """
-        small = _entry(rows=4)
-        oversized = _entry(rows=10_000)
-        cache = ChannelCache(
-            disk_path=tmp_path / "disk-oversize",
-            disk_max_bytes=oversized.size_bytes // 2,
-        )
+        store = DiskCache(disk_path=tmp_path / "foreign")
+        adapter = ChannelDataCache(store)
         try:
-            cache.put("small-1", small)
-            cache.put("small-2", small)
-            assert cache._disk is not None
-            with _capture_data_warnings() as records:
-                cache.put("huge", oversized)
-            # Prior entries survive; oversized one was not written.
-            assert "small-1" in cache
-            assert "small-2" in cache
-            assert "huge" not in cache
-            assert any("larger than the disk cache cap" in r.getMessage() for r in records)
+            store.put("channel:c1", {"not": "an entry"}, size_bytes=64)
+            assert adapter.get("c1") is None
         finally:
-            cache.close()
-
-    def test_oversized_put_drops_prior_entry(self, tmp_path) -> None:
-        """An oversized re-insert must drop the prior slice, not silently keep it.
+            store.close()
 
-        Otherwise a stale subrange would masquerade as a hit on the next
-        ``get`` even though the caller's intent was to refresh the entry.
-        """
-        small = _entry(rows=4)
-        oversized = _entry(rows=10_000)
-        cache = ChannelCache(
-            disk_path=tmp_path / "drop-prior",
-            disk_max_bytes=oversized.size_bytes // 2,
-        )
+    def test_invalidate_removes_entry(self, tmp_path):
+        adapter = ChannelDataCache(DiskCache(disk_path=tmp_path / "inval"))
         try:
-            cache.put("chan", small)
-            assert "chan" in cache
-            cache.put("chan", oversized)
-            assert "chan" not in cache
+            adapter.invalidate("never_added")  # safe before any puts
+            adapter.put("c1", _entry(rows=4))
+            adapter.invalidate("c1")
+            assert "c1" not in adapter
+            assert adapter.get("c1") is None
         finally:
-            cache.close()
+            adapter.store.close()
 
-    def test_oversized_put_warns_once_per_channel(self, tmp_path) -> None:
-        """Repeated oversized puts for the same channel log once, not on every call.
+    def test_clear_is_prefix_scoped(self, tmp_path):
+        """``clear`` drops channel rows but leaves other adapters' rows alone.
 
-        Without dedup, every ``get_data`` for an oversized channel would
-        write a fresh WARNING line — quickly drowning out other signal in
-        the logs.
+        Simulates a second resource writing to the same store with a
+        different prefix; the channel adapter's clear must not be a
+        whole-store wipe.
         """
-        oversized = _entry(rows=10_000)
-        cache = ChannelCache(
-            disk_path=tmp_path / "dedup",
-            disk_max_bytes=oversized.size_bytes // 2,
-        )
+        store = DiskCache(disk_path=tmp_path / "scoped")
+        adapter = ChannelDataCache(store)
         try:
-            with _capture_data_warnings() as records:
-                for _ in range(5):
-                    cache.put("chan", oversized)
-            warnings = [r for r in records if "larger than the disk cache cap" in r.getMessage()]
-            assert len(warnings) == 1
+            adapter.put("c1", _entry(rows=4))
+            adapter.put("c2", _entry(rows=4))
+            # Simulate a row written by a different adapter.
+            store.put("other:1", "foreign-value", size_bytes=64)
+            adapter.clear()
+            assert "c1" not in adapter
+            assert "c2" not in adapter
+            assert "other:1" in store
         finally:
-            cache.close()
+            store.close()
 
-    def test_oversized_warning_resets_after_normal_put(self, tmp_path) -> None:
-        """A successful normal-sized put clears the dedup bit.
+    def test_size_bytes_propagates_to_store(self, tmp_path):
+        """The adapter forwards the entry's ``size_bytes`` to the store guard.
 
-        Used by callers who narrow a time window after seeing the warning:
-        the next oversized regression should re-warn rather than stay silent.
+        Sized below the entry's actual ``size_bytes`` so the store's
+        oversize guard kicks in. The adapter never measures size itself;
+        it relies on ``_new_cache_entry`` having stamped the value.
         """
-        small = _entry(rows=4)
-        oversized = _entry(rows=10_000)
-        cache = ChannelCache(
-            disk_path=tmp_path / "reset-after-normal",
-            disk_max_bytes=oversized.size_bytes // 2,
-        )
-        try:
-            with _capture_data_warnings() as records:
-                cache.put("chan", oversized)  # 1st warning
-                cache.put("chan", small)  # resets state
-                cache.put("chan", oversized)  # 2nd warning
-            warnings = [r for r in records if "larger than the disk cache cap" in r.getMessage()]
-            assert len(warnings) == 2
-        finally:
-            cache.close()
-
-    def test_invalidate_resets_oversized_warning(self, tmp_path) -> None:
-        """``invalidate`` is a fresh start; the next oversized put re-warns."""
-        oversized = _entry(rows=10_000)
-        cache = ChannelCache(
-            disk_path=tmp_path / "reset-invalidate",
-            disk_max_bytes=oversized.size_bytes // 2,
-        )
+        entry = _entry(rows=10_000)
+        store = DiskCache(disk_path=tmp_path / "size", disk_max_bytes=entry.size_bytes // 2)
+        adapter = ChannelDataCache(store)
         try:
-            with _capture_data_warnings() as records:
-                cache.put("chan", oversized)
-                cache.invalidate("chan")
-                cache.put("chan", oversized)
-            warnings = [r for r in records if "larger than the disk cache cap" in r.getMessage()]
-            assert len(warnings) == 2
+            adapter.put("c1", entry)
+            assert "c1" not in adapter  # oversize skipped by the store
         finally:
-            cache.close()
-
-    def test_clear_resets_oversized_warning(self, tmp_path) -> None:
-        """``clear`` resets dedup state across channels."""
-        oversized = _entry(rows=10_000)
-        cache = ChannelCache(
-            disk_path=tmp_path / "reset-clear",
-            disk_max_bytes=oversized.size_bytes // 2,
-        )
-        try:
-            with _capture_data_warnings() as records:
-                cache.put("chan-a", oversized)
-                cache.put("chan-b", oversized)
-                cache.clear()
-                cache.put("chan-a", oversized)
-                cache.put("chan-b", oversized)
-            warnings = [r for r in records if "larger than the disk cache cap" in r.getMessage()]
-            assert len(warnings) == 4
-        finally:
-            cache.close()
-
+            store.close()
 
-class TestChannelCacheClearDisk:
-    """``ChannelCache.clear_disk`` removes a cache dir, refuses other dirs.
+    def test_no_op_store_keeps_adapter_silent(self):
+        """An adapter on a disabled store behaves like a cold cache.
 
-    The classmethod is the source of truth that the resource-level
-    ``ChannelsAPIAsync.clear_data_cache_on_disk`` proxies through, so it
-    must be defensive against pointing at the wrong directory.
-    """
-
-    def test_clear_removes_directory(self, tmp_path) -> None:
-        path = tmp_path / "victim"
-        cache = ChannelCache(disk_path=path)
-        cache.put("chan-1", _entry(rows=4))
-        cache.close()
-        assert path.exists()
-        ChannelCache.clear_disk(path)
-        assert not path.exists()
-
-    def test_clear_missing_path_is_noop(self, tmp_path) -> None:
-        ChannelCache.clear_disk(tmp_path / "never-existed")  # no raise
-
-    def test_clear_refuses_non_diskcache_directory(self, tmp_path) -> None:
-        """A typo'd path with unrelated contents must not be wiped."""
-        target = tmp_path / "user-stuff"
-        target.mkdir()
-        (target / "important.txt").write_text("don't delete me")
-        with pytest.raises(ValueError, match="does not look like a sift channel data cache"):
-            ChannelCache.clear_disk(target)
-        assert (target / "important.txt").read_text() == "don't delete me"
-
-    def test_default_path_constant_under_tmp(self) -> None:
-        """Default lives under the OS tmp dir, not a user directory.
-
-        Reads the module-level snapshot captured at import time rather than
-        ``ChannelCache.DEFAULT_DISK_PATH`` directly, because the autouse
-        ``_isolate_default_disk_cache_path`` fixture monkeypatches that
-        attribute for every test to keep ``/tmp`` clean.
+        Disabling the store is the path ``client.cache.disable_disk()``
+        exercises; resources can keep their adapter reference and every
+        operation just no-ops.
         """
-        import tempfile
-
-        assert _PRODUCTION_DEFAULT_DISK_PATH.startswith(tempfile.gettempdir())
-        assert _PRODUCTION_DEFAULT_DISK_PATH.endswith("sift-channel-data-cache")
+        adapter = ChannelDataCache(DiskCache())
+        assert not adapter.store.disk_enabled
+        adapter.put("c1", _entry(rows=4))
+        assert "c1" not in adapter
+        assert adapter.get("c1") is None
+        adapter.invalidate("c1")
+        adapter.clear()
 
 
 class TestMergePages:
@@ -648,24 +412,24 @@ class TestDataLowLevelClient:
     :class:`TestGetChannelData`.
     """
 
-    def test_no_cache_when_disk_path_omitted(self) -> None:
-        """Default construction leaves the cache in no-op mode.
+    def test_default_construction_uses_no_op_store(self) -> None:
+        """Default construction leaves the adapter wrapping a disabled store.
 
-        The ``ChannelsAPIAsync`` resource is the public surface for
-        opting into disk persistence; the bare ``DataLowLevelClient``
-        keeps caching off so unit tests don't accidentally write to
-        ``/tmp`` just by instantiating the wrapper.
+        Resources wire the shared store in via the keyword arg; the
+        ``MagicMock()``-only path here keeps unit tests free of disk I/O.
         """
         client = DataLowLevelClient(MagicMock())
-        assert not client.channel_cache.disk_enabled
+        assert isinstance(client.channel_cache, ChannelDataCache)
+        assert not client.channel_cache.store.disk_enabled
 
     def test_per_instance_isolation(self, tmp_path) -> None:
-        """Two clients with separate disk paths must not share cache state.
+        """Two clients with distinct stores must not share cache state.
 
         Regression test for the original OOM bug: ``channel_cache`` was a
         class attribute, so every ``SiftClient`` in the process appended
-        to the same dict. Two fresh clients with distinct directories must
-        have independent caches.
+        to the same dict. Two fresh adapters over independent stores must
+        stay independent — even now that store ownership has moved to the
+        client, the contract is the same.
         """
         client_a = _client_with_cache(tmp_path, "a")
         client_b = _client_with_cache(tmp_path, "b")
@@ -674,23 +438,24 @@ class attribute, so every ``SiftClient`` in the process appended
             assert "c1" in client_a.channel_cache
             assert "c1" not in client_b.channel_cache
         finally:
-            client_a.channel_cache.close()
-            client_b.channel_cache.close()
-
-    def test_disk_cache_kwargs_propagate(self, tmp_path) -> None:
-        """Constructor kwargs land on the underlying ``ChannelCache``."""
-        path = tmp_path / "kwargs"
-        client = DataLowLevelClient(
-            MagicMock(),
-            disk_cache_path=path,
-            disk_cache_max_bytes=8_192,
-        )
+            client_a.channel_cache.store.close()
+            client_b.channel_cache.store.close()
+
+    def test_adapter_kwarg_propagates(self, tmp_path) -> None:
+        """The constructor honours an externally-constructed adapter.
+
+        Mirrors the production wiring where ``ChannelsAPIAsync`` builds
+        the adapter from ``client._get_disk_cache()`` and hands it in.
+        """
+        store = DiskCache(disk_path=tmp_path / "external", disk_max_bytes=8_192)
+        adapter = ChannelDataCache(store)
+        client = DataLowLevelClient(MagicMock(), channel_cache=adapter)
         try:
-            assert client.channel_cache.disk_enabled
-            assert client.channel_cache.disk_path == str(path)
-            assert client.channel_cache.disk_max_bytes == 8_192
+            assert client.channel_cache is adapter
+            assert client.channel_cache.store is store
+            assert client.channel_cache.store.disk_max_bytes == 8_192
         finally:
-            client.channel_cache.close()
+            store.close()
 
 
 class TestGetChannelData:
@@ -766,7 +531,7 @@ async def test_cache_hit_short_circuits_grpc(self, tmp_path) -> None:
                 )
             pd.testing.assert_frame_equal(first["c1"].sort_index(), second["c1"].sort_index())
         finally:
-            client.channel_cache.close()
+            client.channel_cache.store.close()
 
     @pytest.mark.asyncio
     async def test_partial_cache_hit_merges_cached_and_fresh(self, tmp_path) -> None:
@@ -799,7 +564,7 @@ async def test_partial_cache_hit_merges_cached_and_fresh(self, tmp_path) -> None
             pd.testing.assert_frame_equal(result["c1"].sort_index(), c1_df.sort_index())
             pd.testing.assert_frame_equal(result["c2"].sort_index(), c2_df.sort_index())
         finally:
-            client.channel_cache.close()
+            client.channel_cache.store.close()
 
     @pytest.mark.asyncio
     async def test_ignore_cache_true_returns_fresh_and_skips_write(self, tmp_path) -> None:
@@ -822,4 +587,4 @@ async def test_ignore_cache_true_returns_fresh_and_skips_write(self, tmp_path) -
             pd.testing.assert_frame_equal(result["c1"], df)
             assert "c1" not in client.channel_cache
         finally:
-            client.channel_cache.close()
+            client.channel_cache.store.close()
diff --git a/python/lib/sift_client/_tests/_internal/test_disk_cache.py b/python/lib/sift_client/_tests/_internal/test_disk_cache.py
new file mode 100644
index 000000000..66e57a9e6
--- /dev/null
+++ b/python/lib/sift_client/_tests/_internal/test_disk_cache.py
@@ -0,0 +1,343 @@
+"""Tests for :mod:`sift_client._internal.disk_cache`.
+
+Two classes, narrowest scope first:
+
+* :class:`TestDiskCache` — direct unit tests on :class:`DiskCache`:
+  the disabled-when-no-path no-op, fresh writes/reads, cross-session
+  reload, oversize guard + dedup keyed on the full namespaced key, and
+  the marker-guarded :meth:`DiskCache.clear_disk` classmethod.
+* :class:`TestClearDisk` — the classmethod's defensive guards.
+
+The store is intentionally key/value-agnostic — every test treats it as
+a plain ``str``-keyed dict that happens to persist across handles, with
+``size_bytes`` supplied by the caller. The channel-specific adapter
+(:class:`ChannelDataCache`) is exercised separately in ``test_data.py``.
+"""
+
+from __future__ import annotations
+
+import logging
+from contextlib import contextmanager
+from typing import Iterator
+
+import pytest
+
+from sift_client._internal.disk_cache import DiskCache
+
+# Snapshot of the production constant captured at import time. The autouse
+# ``_isolate_default_disk_cache_path`` fixture in ``conftest.py`` overrides
+# the class attribute per test; the constant-shape test still needs the
+# real value to assert against.
+_PRODUCTION_DEFAULT_DISK_PATH = DiskCache.DEFAULT_DISK_PATH
+
+
+@contextmanager
+def _capture_disk_cache_warnings() -> Iterator[list[logging.LogRecord]]:
+    """Capture warnings emitted by the disk-cache logger directly.
+
+    Pytest's ``caplog`` reads from the root logger, but the Sift pytest
+    plugin sets ``propagate=False`` on the ``sift_client`` logger when
+    audit logging is active, so records emitted from any descendant don't
+    reach the root. Attaching a list-backed handler at the leaf logger
+    bypasses that.
+    """
+    target = logging.getLogger("sift_client._internal.disk_cache")
+    records: list[logging.LogRecord] = []
+
+    class _ListHandler(logging.Handler):
+        def emit(self, record: logging.LogRecord) -> None:
+            records.append(record)
+
+    handler = _ListHandler(level=logging.WARNING)
+    target.addHandler(handler)
+    try:
+        yield records
+    finally:
+        target.removeHandler(handler)
+
+
+class TestDiskCache:
+    """End-to-end behaviour of the shared on-disk store."""
+
+    def test_disabled_when_no_path(self) -> None:
+        """``DiskCache()`` with no ``disk_path`` is a silent no-op."""
+        cache = DiskCache()
+        assert cache.disk_enabled is False
+        assert cache.disk_path is None
+        assert cache.disk_max_bytes is None
+        # Every operation no-ops; no AttributeError, no warning.
+        cache.put("k", "v", size_bytes=4)
+        assert "k" not in cache
+        assert cache.get("k") is None
+        assert list(iter(cache)) == []
+        cache.invalidate("k")
+        cache.clear()
+        cache.close()
+
+    def test_fresh_cache_writes_and_reads(self, tmp_path) -> None:
+        """A fresh disk directory accepts writes and serves them back."""
+        cache = DiskCache(disk_path=tmp_path / "fresh")
+        try:
+            assert cache.disk_enabled
+            assert cache.disk_path == str(tmp_path / "fresh")
+            assert cache.disk_max_bytes == DiskCache.DEFAULT_DISK_MAX_BYTES
+            cache.put("k", {"hello": "world"}, size_bytes=64)
+            assert "k" in cache
+            assert cache.get("k") == {"hello": "world"}
+        finally:
+            cache.close()
+
+    def test_reopen_existing_dir_sees_prior_session_entries(self, tmp_path) -> None:
+        """Closing then reopening at the same path surfaces prior entries.
+
+        This is the cold-start reuse guarantee: a fresh process pointing
+        at a populated directory reads back what an earlier process wrote.
+        """
+        path = tmp_path / "prev-session"
+        session1 = DiskCache(disk_path=path)
+        session1.put("k", [1, 2, 3], size_bytes=24)
+        session1.close()
+
+        session2 = DiskCache(disk_path=path)
+        try:
+            assert "k" in session2
+            assert session2.get("k") == [1, 2, 3]
+        finally:
+            session2.close()
+
+    def test_repeated_put_overwrites(self, tmp_path) -> None:
+        cache = DiskCache(disk_path=tmp_path / "overwrite")
+        try:
+            cache.put("k", "first", size_bytes=8)
+            cache.put("k", "second", size_bytes=8)
+            assert cache.get("k") == "second"
+        finally:
+            cache.close()
+
+    def test_invalidate_removes_entry(self, tmp_path) -> None:
+        cache = DiskCache(disk_path=tmp_path / "inval")
+        try:
+            cache.invalidate("never_added")  # safe before any puts
+            cache.put("k", "v", size_bytes=4)
+            cache.invalidate("k")
+            assert "k" not in cache
+            assert cache.get("k") is None
+        finally:
+            cache.close()
+
+    def test_clear_wipes_store(self, tmp_path) -> None:
+        cache = DiskCache(disk_path=tmp_path / "clear")
+        try:
+            cache.put("a", 1, size_bytes=8)
+            cache.put("b", 2, size_bytes=8)
+            cache.clear()
+            assert "a" not in cache
+            assert "b" not in cache
+        finally:
+            cache.close()
+
+    def test_iter_yields_keys(self, tmp_path) -> None:
+        """``__iter__`` exposes the keyspace so adapters can prefix-clear."""
+        cache = DiskCache(disk_path=tmp_path / "iter")
+        try:
+            cache.put("alpha:1", 1, size_bytes=8)
+            cache.put("beta:1", 2, size_bytes=8)
+            cache.put("alpha:2", 3, size_bytes=8)
+            assert set(cache) == {"alpha:1", "alpha:2", "beta:1"}
+        finally:
+            cache.close()
+
+    def test_disable_disk_closes_handle(self, tmp_path) -> None:
+        """Turning off disk closes the handle and silences subsequent ops."""
+        cache = DiskCache(disk_path=tmp_path / "disable")
+        try:
+            cache.put("k", "v", size_bytes=4)
+            cache.disable_disk()
+            assert not cache.disk_enabled
+            assert cache.disk_path is None
+            assert "k" not in cache
+            assert cache.get("k") is None
+            cache.put("new", "x", size_bytes=4)  # silently dropped
+            assert "new" not in cache
+        finally:
+            cache.close()
+
+    def test_enable_disk_reconfigures_path(self, tmp_path) -> None:
+        """Reconfiguring to a different path closes the old handle.
+
+        The new directory starts empty: ``k`` lived in the old directory
+        so the lookup at the new path misses.
+        """
+        cache = DiskCache(disk_path=tmp_path / "a")
+        try:
+            cache.put("k", "v", size_bytes=4)
+            cache.enable_disk(path=tmp_path / "b")
+            assert cache.disk_path == str(tmp_path / "b")
+            assert "k" not in cache
+        finally:
+            cache.close()
+
+    def test_enable_disk_noop_when_same_settings(self, tmp_path) -> None:
+        """Re-enabling with identical settings doesn't churn the disk handle."""
+        cache = DiskCache(disk_path=tmp_path / "noop")
+        try:
+            handle_before = cache._disk
+            cache.enable_disk(
+                path=tmp_path / "noop", max_bytes=DiskCache.DEFAULT_DISK_MAX_BYTES
+            )
+            assert cache._disk is handle_before
+        finally:
+            cache.close()
+
+    def test_oversized_entry_skipped_and_preserves_neighbours(self, tmp_path) -> None:
+        """An entry larger than the cap is skipped without evicting peers.
+
+        Without this guard, ``diskcache``'s cull would evict every other
+        row trying to fit an unfittable entry, then drop the entry itself
+        — the wipe-everything failure mode the cache work originally fixed.
+
+        Cap is sized to leave plenty of room for diskcache's pickle
+        envelope around the small entries while still being small enough
+        that the declared oversized ``size_bytes`` (10 MB) trips the
+        guard. ``size_bytes`` is the caller's contract — the store
+        compares that, not the actual on-disk size.
+        """
+        cap = 1 * 1024 * 1024  # 1 MiB
+        cache = DiskCache(disk_path=tmp_path / "oversize", disk_max_bytes=cap)
+        try:
+            cache.put("small-1", "value", size_bytes=64)
+            cache.put("small-2", "value", size_bytes=64)
+            with _capture_disk_cache_warnings() as records:
+                cache.put("huge", "value", size_bytes=10 * 1024 * 1024)
+            assert "small-1" in cache
+            assert "small-2" in cache
+            assert "huge" not in cache
+            assert any("larger than the disk cache cap" in r.getMessage() for r in records)
+        finally:
+            cache.close()
+
+    def test_oversized_put_drops_prior_entry(self, tmp_path) -> None:
+        """An oversized re-insert must drop the prior value, not silently keep it."""
+        cap = 1 * 1024 * 1024
+        cache = DiskCache(disk_path=tmp_path / "drop-prior", disk_max_bytes=cap)
+        try:
+            cache.put("k", "small", size_bytes=64)
+            assert "k" in cache
+            cache.put("k", "big", size_bytes=10 * 1024 * 1024)
+            assert "k" not in cache
+        finally:
+            cache.close()
+
+    def test_oversized_put_warns_once_per_key(self, tmp_path) -> None:
+        """Repeated oversized puts for the same key log once, not every call."""
+        cap = 1 * 1024 * 1024
+        cache = DiskCache(disk_path=tmp_path / "dedup", disk_max_bytes=cap)
+        try:
+            with _capture_disk_cache_warnings() as records:
+                for _ in range(5):
+                    cache.put("k", "v", size_bytes=10 * 1024 * 1024)
+            warnings = [r for r in records if "larger than the disk cache cap" in r.getMessage()]
+            assert len(warnings) == 1
+        finally:
+            cache.close()
+
+    def test_oversized_warning_resets_after_normal_put(self, tmp_path) -> None:
+        """A successful normal-sized put clears the dedup bit for that key."""
+        cap = 1 * 1024 * 1024
+        cache = DiskCache(disk_path=tmp_path / "reset-normal", disk_max_bytes=cap)
+        try:
+            with _capture_disk_cache_warnings() as records:
+                cache.put("k", "v", size_bytes=10 * 1024 * 1024)  # 1st warning
+                cache.put("k", "v", size_bytes=64)  # resets state
+                cache.put("k", "v", size_bytes=10 * 1024 * 1024)  # 2nd warning
+            warnings = [r for r in records if "larger than the disk cache cap" in r.getMessage()]
+            assert len(warnings) == 2
+        finally:
+            cache.close()
+
+    def test_dedup_keys_on_full_namespaced_key(self, tmp_path) -> None:
+        """Dedup is per-key, so two adapters' colliding bare ids don't share state.
+
+        Pins the design choice that the oversize warning dedup tracks the
+        full namespaced key handed to ``put`` (e.g. ``channel:foo`` vs
+        ``calc:foo``) rather than collapsing on the bare id. Two different
+        prefixes for the same suffix each get their own one-shot warning.
+        """
+        cap = 1 * 1024 * 1024
+        cache = DiskCache(disk_path=tmp_path / "two-prefixes", disk_max_bytes=cap)
+        try:
+            with _capture_disk_cache_warnings() as records:
+                cache.put("alpha:foo", "v", size_bytes=10 * 1024 * 1024)
+                cache.put("beta:foo", "v", size_bytes=10 * 1024 * 1024)
+            warnings = [r for r in records if "larger than the disk cache cap" in r.getMessage()]
+            assert len(warnings) == 2
+            messages = [r.getMessage() for r in warnings]
+            assert any("alpha:foo" in m for m in messages)
+            assert any("beta:foo" in m for m in messages)
+        finally:
+            cache.close()
+
+    def test_invalidate_resets_oversized_warning(self, tmp_path) -> None:
+        cap = 1 * 1024 * 1024
+        cache = DiskCache(disk_path=tmp_path / "reset-inval", disk_max_bytes=cap)
+        try:
+            with _capture_disk_cache_warnings() as records:
+                cache.put("k", "v", size_bytes=10 * 1024 * 1024)
+                cache.invalidate("k")
+                cache.put("k", "v", size_bytes=10 * 1024 * 1024)
+            warnings = [r for r in records if "larger than the disk cache cap" in r.getMessage()]
+            assert len(warnings) == 2
+        finally:
+            cache.close()
+
+    def test_clear_resets_oversized_warning(self, tmp_path) -> None:
+        cap = 1 * 1024 * 1024
+        cache = DiskCache(disk_path=tmp_path / "reset-clear", disk_max_bytes=cap)
+        try:
+            with _capture_disk_cache_warnings() as records:
+                cache.put("a", "v", size_bytes=10 * 1024 * 1024)
+                cache.put("b", "v", size_bytes=10 * 1024 * 1024)
+                cache.clear()
+                cache.put("a", "v", size_bytes=10 * 1024 * 1024)
+                cache.put("b", "v", size_bytes=10 * 1024 * 1024)
+            warnings = [r for r in records if "larger than the disk cache cap" in r.getMessage()]
+            assert len(warnings) == 4
+        finally:
+            cache.close()
+
+
+class TestClearDisk:
+    """:meth:`DiskCache.clear_disk` removes a cache dir, refuses other dirs."""
+
+    def test_clear_removes_directory(self, tmp_path) -> None:
+        path = tmp_path / "victim"
+        cache = DiskCache(disk_path=path)
+        cache.put("k", "v", size_bytes=4)
+        cache.close()
+        assert path.exists()
+        DiskCache.clear_disk(path)
+        assert not path.exists()
+
+    def test_clear_missing_path_is_noop(self, tmp_path) -> None:
+        DiskCache.clear_disk(tmp_path / "never-existed")  # no raise
+
+    def test_clear_refuses_non_diskcache_directory(self, tmp_path) -> None:
+        """A typo'd path with unrelated contents must not be wiped."""
+        target = tmp_path / "user-stuff"
+        target.mkdir()
+        (target / "important.txt").write_text("don't delete me")
+        with pytest.raises(ValueError, match="does not look like a sift data cache"):
+            DiskCache.clear_disk(target)
+        assert (target / "important.txt").read_text() == "don't delete me"
+
+    def test_default_path_constant_under_tmp(self) -> None:
+        """Default lives under the OS tmp dir, not a user directory.
+
+        Reads the module-level snapshot rather than ``DEFAULT_DISK_PATH``
+        directly because the autouse fixture monkeypatches that attribute
+        for every test.
+        """
+        import tempfile
+
+        assert _PRODUCTION_DEFAULT_DISK_PATH.startswith(tempfile.gettempdir())
+        assert _PRODUCTION_DEFAULT_DISK_PATH.endswith("sift-data-cache")
diff --git a/python/lib/sift_client/_tests/conftest.py b/python/lib/sift_client/_tests/conftest.py
index 31aebf03a..5790a2f7a 100644
--- a/python/lib/sift_client/_tests/conftest.py
+++ b/python/lib/sift_client/_tests/conftest.py
@@ -11,28 +11,25 @@
 
 @pytest.fixture(autouse=True)
 def _isolate_default_disk_cache_path(monkeypatch, tmp_path):
-    """Redirect ``ChannelCache.DEFAULT_DISK_PATH`` to a per-test tmp dir.
-
-    The channel data disk cache is **opt-out** — any test that triggers the
-    lazy ``DataLowLevelClient`` init through ``ChannelsAPIAsync`` would
-    otherwise create the real ``/tmp/sift-channel-data-cache`` directory and
-    leak state across runs. Redirecting the default to ``tmp_path`` keeps
-    every test self-contained without each test having to know that the disk
-    tier is on by default.
-
-    The override deliberately preserves the ``sift-channel-data-cache``
-    suffix so ``TestChannelCacheClearDisk::test_default_path_constant_under_tmp``
-    keeps validating the real shape of the constant.
-
-    Importing ``ChannelCache`` here pulls in pandas, but only once per
-    session — fixture body still runs per-test, just the monkeypatch.
+    """Redirect ``DiskCache.DEFAULT_DISK_PATH`` to a per-test tmp dir.
+
+    On-disk caching is **opt-out** — any test that triggers the lazy
+    ``DiskCache`` init through ``SiftClient._get_disk_cache`` would
+    otherwise create the real ``/tmp/sift-data-cache`` directory and leak
+    state across runs. Redirecting the default to ``tmp_path`` keeps every
+    test self-contained without each test having to know the cache is on
+    by default.
+
+    The override preserves the ``sift-data-cache`` suffix so
+    ``TestClearDisk::test_default_path_constant_under_tmp`` keeps
+    validating the real shape of the constant.
     """
-    from sift_client._internal.low_level_wrappers.data import ChannelCache
+    from sift_client._internal.disk_cache import DiskCache
 
     monkeypatch.setattr(
-        ChannelCache,
+        DiskCache,
         "DEFAULT_DISK_PATH",
-        str(tmp_path / "sift-channel-data-cache"),
+        str(tmp_path / "sift-data-cache"),
     )
 
 
diff --git a/python/lib/sift_client/_tests/resources/test_channels.py b/python/lib/sift_client/_tests/resources/test_channels.py
index ceee9ddef..f337bd3f5 100644
--- a/python/lib/sift_client/_tests/resources/test_channels.py
+++ b/python/lib/sift_client/_tests/resources/test_channels.py
@@ -501,145 +501,3 @@ async def fake_update_channel(update):
 
         api._units_low_level_client.create_unit.assert_not_awaited()
         assert captured["update"].unit == ""
-
-
-class TestEnableDataCacheDisk:
-    """``enable_data_cache_disk`` / ``disable_data_cache_disk`` plumb the disk
-    cache settings to the underlying ``ChannelCache``, both pre- and post-init.
-
-    The cache itself is exercised directly in
-    ``test_data.py::TestChannelCache``; the tests here just verify the
-    resource-level wiring around it.
-    """
-
-    def test_enabled_by_default(self):
-        """Disk caching is opt-out: the default-constructed resource lands
-        at ``ChannelCache.DEFAULT_DISK_PATH`` on first ``get_data``.
-
-        The autouse ``_isolate_default_disk_cache_path`` fixture in
-        ``conftest.py`` redirects the constant to a per-test tmp dir so this
-        doesn't litter the real ``/tmp``.
-        """
-        from sift_client._internal.low_level_wrappers.data import ChannelCache
-
-        api = _make_api()
-        api._ensure_data_low_level_client()
-        cache = api._data_low_level_client.channel_cache
-        try:
-            assert cache.disk_enabled
-            assert cache.disk_path == ChannelCache.DEFAULT_DISK_PATH
-        finally:
-            cache.close()
-
-    def test_enable_before_lazy_init_propagates(self, tmp_path):
-        api = _make_api()
-        api.enable_data_cache_disk(path=str(tmp_path / "pre-init"), max_bytes=4096)
-        api._ensure_data_low_level_client()
-        cache = api._data_low_level_client.channel_cache
-        try:
-            assert cache.disk_enabled
-            assert cache.disk_path == str(tmp_path / "pre-init")
-            assert cache.disk_max_bytes == 4096
-        finally:
-            cache.close()
-
-    def test_enable_after_lazy_init_updates_live_cache(self, tmp_path):
-        """``disable_data_cache_disk`` → ``enable_data_cache_disk`` round-trip
-        on a live cache swaps the disk handle without recreating the wrapper.
-        """
-        api = _make_api()
-        # Start from the disk-off state so the test exercises the "off → on"
-        # transition rather than "default-on → reconfigured-on".
-        api.disable_data_cache_disk()
-        api._ensure_data_low_level_client()
-        cache = api._data_low_level_client.channel_cache
-        try:
-            assert not cache.disk_enabled
-            api.enable_data_cache_disk(path=str(tmp_path / "post-init"))
-            assert cache.disk_enabled
-            assert cache.disk_path == str(tmp_path / "post-init")
-        finally:
-            cache.close()
-
-    def test_enable_with_default_path_lands_on_default(self, monkeypatch, tmp_path):
-        """Calling ``enable_data_cache_disk()`` with no args uses the default path.
-
-        Redirects ``ChannelCache.DEFAULT_DISK_PATH`` to ``tmp_path`` so the
-        test doesn't create the real ``/tmp/sift-channel-data-cache``
-        directory.
-        """
-        from sift_client._internal.low_level_wrappers.data import ChannelCache
-
-        fake_default = str(tmp_path / "fake-default")
-        monkeypatch.setattr(ChannelCache, "DEFAULT_DISK_PATH", fake_default)
-
-        api = _make_api()
-        api.enable_data_cache_disk()
-        api._ensure_data_low_level_client()
-        cache = api._data_low_level_client.channel_cache
-        try:
-            assert cache.disk_path == fake_default
-        finally:
-            cache.close()
-
-    def test_disable_closes_live_disk_handle(self, tmp_path):
-        api = _make_api()
-        api.enable_data_cache_disk(path=str(tmp_path / "to-close"))
-        api._ensure_data_low_level_client()
-        cache = api._data_low_level_client.channel_cache
-        try:
-            assert cache.disk_enabled
-            api.disable_data_cache_disk()
-            assert not cache.disk_enabled
-            assert cache.disk_path is None
-        finally:
-            cache.close()
-
-    def test_clear_data_cache_on_disk_proxies_to_cache(self, tmp_path):
-        """The resource method removes the directory by proxying to ChannelCache."""
-        from sift_client._internal.low_level_wrappers.data import ChannelCache
-
-        path = tmp_path / "to-clear"
-        # Populate a real disk-cache directory so the marker check passes.
-        cache = ChannelCache(disk_path=path)
-        cache.close()
-        assert path.exists()
-
-        api = _make_api()
-        api.clear_data_cache_on_disk(path)
-        assert not path.exists()
-
-    def test_default_path_failure_falls_back_to_no_cache(self, monkeypatch, tmp_path):
-        """If the opt-out default cache path can't be opened, the wrapper
-        logs a warning and continues with caching disabled.
-
-        Simulated by pointing ``DEFAULT_DISK_PATH`` at a path that already
-        exists as a regular file — ``os.makedirs(..., exist_ok=True)`` raises
-        ``FileExistsError`` for non-directory targets.
-        """
-        from sift_client._internal.low_level_wrappers.data import ChannelCache
-
-        blocker = tmp_path / "not-a-dir"
-        blocker.write_text("i am a file, not a directory")
-        monkeypatch.setattr(ChannelCache, "DEFAULT_DISK_PATH", str(blocker))
-
-        api = _make_api()
-        api._ensure_data_low_level_client()  # must not raise
-        cache = api._data_low_level_client.channel_cache
-        try:
-            # Cache silently dropped; ``get_data`` will go straight to the wire.
-            assert not cache.disk_enabled
-        finally:
-            cache.close()
-
-    def test_explicit_path_failure_propagates(self, tmp_path):
-        """An explicit ``enable_data_cache_disk(path=...)`` that can't open
-        propagates the OSError — silent fallback would hide a user mistake.
-        """
-        blocker = tmp_path / "not-a-dir"
-        blocker.write_text("i am a file, not a directory")
-
-        api = _make_api()
-        api.enable_data_cache_disk(path=str(blocker))
-        with pytest.raises(FileExistsError):
-            api._ensure_data_low_level_client()
diff --git a/python/lib/sift_client/_tests/test_client_cache.py b/python/lib/sift_client/_tests/test_client_cache.py
new file mode 100644
index 000000000..bb7e85279
--- /dev/null
+++ b/python/lib/sift_client/_tests/test_client_cache.py
@@ -0,0 +1,261 @@
+"""Tests for :mod:`sift_client._internal.cache_namespace`.
+
+The namespace is the user-facing surface for the shared on-disk store
+that lives on the :class:`SiftClient`. Three concerns get pinned here:
+
+1. Default policy (opt-out: caching on at the default path) lands on
+   the live store on first use.
+2. Pre-init configuration (``client.cache.disable_disk()`` /
+   ``enable_disk(path=..., max_bytes=...)`` before any resource has
+   touched the cache) takes effect on the lazy build.
+3. Post-init reconfiguration mutates the live :class:`DiskCache` in
+   place rather than swapping it out — every resource adapter holds a
+   reference to the same store.
+
+The single-instance-shared-across-resources invariant is the architectural
+linchpin: a future second adapter must see the *same* handle as the channel
+adapter so a global byte budget and LRU still apply.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from sift_client._internal.disk_cache import DiskCache
+
+
+def _make_client():
+    """Build a SiftClient-like object with the bits the namespace needs.
+
+    Reaching into ``sift_client.SiftClient.__init__`` requires a live gRPC
+    config; the namespace only touches ``_disk_cache_config`` and
+    ``_disk_cache``, so a tiny stand-in keeps these tests independent of
+    transport setup.
+    """
+    from sift_client._internal.cache_namespace import CacheNamespace
+    from sift_client._internal.disk_cache_config import DiskCacheConfig
+
+    class _StandinClient:
+        def __init__(self) -> None:
+            self._disk_cache_config = DiskCacheConfig(enabled=True)
+            self._disk_cache: DiskCache | None = None
+            self.cache = CacheNamespace(self)  # type: ignore[arg-type]
+
+    return _StandinClient()
+
+
+# Pull the same lazy-init helper the real client uses so we exercise the
+# default-path-fallback path against the live code rather than a mock.
+def _get_disk_cache(client) -> DiskCache:
+    if client._disk_cache is None:
+        config = client._disk_cache_config
+        if not config.enabled:
+            client._disk_cache = DiskCache()
+            return client._disk_cache
+        target_path = config.path or DiskCache.DEFAULT_DISK_PATH
+        try:
+            client._disk_cache = DiskCache(
+                disk_path=target_path,
+                disk_max_bytes=config.max_bytes,
+            )
+        except Exception:
+            if not config.using_default_path:
+                raise
+            client._disk_cache = DiskCache()
+    return client._disk_cache
+
+
+class TestCacheNamespaceDefaults:
+    """Opt-out default: the namespace is on, default path, fresh start."""
+
+    def test_enabled_by_default(self):
+        """First lazy access lands at ``DiskCache.DEFAULT_DISK_PATH``."""
+        client = _make_client()
+        store = _get_disk_cache(client)
+        try:
+            assert store.disk_enabled
+            assert store.disk_path == DiskCache.DEFAULT_DISK_PATH
+        finally:
+            store.close()
+
+    def test_one_store_shared_across_lazy_calls(self):
+        """Re-entering ``_get_disk_cache`` returns the same handle."""
+        client = _make_client()
+        first = _get_disk_cache(client)
+        second = _get_disk_cache(client)
+        try:
+            assert first is second
+        finally:
+            first.close()
+
+
+class TestEnableDisk:
+    """``client.cache.enable_disk`` configures the store, pre- and post-init."""
+
+    def test_pre_init_path_lands_on_store(self, tmp_path):
+        client = _make_client()
+        client.cache.enable_disk(path=str(tmp_path / "pre"), max_bytes=4096)
+        store = _get_disk_cache(client)
+        try:
+            assert store.disk_enabled
+            assert store.disk_path == str(tmp_path / "pre")
+            assert store.disk_max_bytes == 4096
+        finally:
+            store.close()
+
+    def test_post_init_swap_uses_same_store_instance(self, tmp_path):
+        """Reconfiguring after first use mutates in place rather than re-creating.
+
+        Every resource adapter holds a reference to ``client._disk_cache``;
+        if a reconfig replaced the handle, those adapters would still see
+        the stale one. ``DiskCache.enable_disk`` swaps the *contents* on
+        the same instance.
+        """
+        client = _make_client()
+        client.cache.disable_disk()  # start from off so this is a real on transition
+        store = _get_disk_cache(client)
+        try:
+            assert not store.disk_enabled
+            client.cache.enable_disk(path=str(tmp_path / "post"))
+            assert client._disk_cache is store  # same instance
+            assert store.disk_enabled
+            assert store.disk_path == str(tmp_path / "post")
+        finally:
+            store.close()
+
+    def test_enable_with_default_path_lands_on_default(self, monkeypatch, tmp_path):
+        """``enable_disk()`` with no args uses :attr:`DEFAULT_DISK_PATH`.
+
+        Redirects the constant so the test doesn't create the real
+        ``/tmp/sift-data-cache`` directory.
+        """
+        fake_default = str(tmp_path / "fake-default")
+        monkeypatch.setattr(DiskCache, "DEFAULT_DISK_PATH", fake_default)
+
+        client = _make_client()
+        client.cache.enable_disk()
+        store = _get_disk_cache(client)
+        try:
+            assert store.disk_path == fake_default
+        finally:
+            store.close()
+
+
+class TestDisableDisk:
+    """``client.cache.disable_disk`` turns the live cache off."""
+
+    def test_disable_closes_live_handle(self, tmp_path):
+        client = _make_client()
+        client.cache.enable_disk(path=str(tmp_path / "to-close"))
+        store = _get_disk_cache(client)
+        try:
+            assert store.disk_enabled
+            client.cache.disable_disk()
+            assert not store.disk_enabled
+            assert store.disk_path is None
+        finally:
+            store.close()
+
+    def test_disable_before_lazy_init_keeps_store_off(self, tmp_path):
+        """Calling disable before first use means the lazy build skips the open."""
+        client = _make_client()
+        client.cache.disable_disk()
+        store = _get_disk_cache(client)
+        try:
+            assert not store.disk_enabled
+        finally:
+            store.close()
+
+
+class TestClearDiskProxy:
+    """``client.cache.clear_disk`` proxies through to :meth:`DiskCache.clear_disk`."""
+
+    def test_clear_removes_directory(self, tmp_path):
+        path = tmp_path / "to-clear"
+        # Populate a real cache directory so the marker check passes.
+        cache = DiskCache(disk_path=path)
+        cache.close()
+        assert path.exists()
+
+        client = _make_client()
+        client.cache.clear_disk(path)
+        assert not path.exists()
+
+
+class TestLazyInitFallback:
+    """The default-path-failure fallback used by ``SiftClient._get_disk_cache``."""
+
+    def test_default_path_failure_falls_back_to_no_cache(self, monkeypatch, tmp_path):
+        """If the default cache path can't be opened, the lazy init produces
+        a disabled :class:`DiskCache` rather than raising.
+
+        Simulated by pointing ``DEFAULT_DISK_PATH`` at a path that already
+        exists as a regular file — ``os.makedirs(..., exist_ok=True)``
+        raises ``FileExistsError`` for non-directory targets.
+        """
+        blocker = tmp_path / "not-a-dir"
+        blocker.write_text("i am a file, not a directory")
+        monkeypatch.setattr(DiskCache, "DEFAULT_DISK_PATH", str(blocker))
+
+        client = _make_client()
+        store = _get_disk_cache(client)  # must not raise
+        try:
+            assert not store.disk_enabled
+        finally:
+            store.close()
+
+    def test_explicit_path_failure_propagates(self, tmp_path):
+        """An explicit path that can't be opened propagates the OSError.
+
+        Silent fallback would hide a user mistake.
+        """
+        blocker = tmp_path / "not-a-dir"
+        blocker.write_text("i am a file, not a directory")
+
+        client = _make_client()
+        client.cache.enable_disk(path=str(blocker))
+        with pytest.raises(FileExistsError):
+            _get_disk_cache(client)
+
+
+class TestSiftClientIntegration:
+    """End-to-end through the real :class:`SiftClient.__init__` entry point.
+
+    Asserts the wire-up: the namespace really lives at ``client.cache``,
+    the config is mutable through it, and the lazy ``_get_disk_cache``
+    returns the configured store.
+    """
+
+    def _make_real_client(self):
+        from sift_client import SiftClient, SiftConnectionConfig
+
+        return SiftClient(
+            connection_config=SiftConnectionConfig(
+                api_key="x",
+                grpc_url="disabled.invalid:0",
+                rest_url="https://disabled.invalid",
+                use_ssl=False,
+            )
+        )
+
+    def test_attribute_present_and_uses_real_lazy_init(self, monkeypatch, tmp_path):
+        fake_default = str(tmp_path / "real-client-default")
+        monkeypatch.setattr(DiskCache, "DEFAULT_DISK_PATH", fake_default)
+
+        client = self._make_real_client()
+        store = client._get_disk_cache()
+        try:
+            assert client.cache is not None
+            assert store.disk_enabled
+            assert store.disk_path == fake_default
+        finally:
+            store.close()
+
+    def test_disable_before_first_get_data_keeps_store_off(self):
+        client = self._make_real_client()
+        client.cache.disable_disk()
+        store = client._get_disk_cache()
+        try:
+            assert not store.disk_enabled
+        finally:
+            store.close()
diff --git a/python/lib/sift_client/client.py b/python/lib/sift_client/client.py
index 5db5bf473..2cda463f1 100644
--- a/python/lib/sift_client/client.py
+++ b/python/lib/sift_client/client.py
@@ -1,5 +1,10 @@
 from __future__ import annotations
 
+import logging
+from typing import TYPE_CHECKING
+
+from sift_client._internal.cache_namespace import CacheNamespace
+from sift_client._internal.disk_cache_config import DiskCacheConfig
 from sift_client._internal.urls import frontend_origin_for_api
 from sift_client.resources import (
     AssetsAPI,
@@ -45,6 +50,11 @@
 )
 from sift_client.util.util import AsyncAPIs
 
+if TYPE_CHECKING:
+    from sift_client._internal.disk_cache import DiskCache
+
+logger = logging.getLogger(__name__)
+
 
 class SiftClient(
     WithGrpcClient,
@@ -126,6 +136,9 @@ class SiftClient(
     data_import: DataImportAPI
     """Instance of the Data Import API for making synchronous requests."""
 
+    cache: CacheNamespace
+    """Surface for the shared on-disk cache used by every cache-aware resource."""
+
     async_: AsyncAPIs
     """Accessor for the asynchronous APIs. All asynchronous APIs are available as attributes on this accessor."""
 
@@ -180,6 +193,14 @@ def __init__(
         # pytest plugin's ``--sift-disabled`` mode.
         self._simulate: bool = False
 
+        # Shared on-disk cache: user intent in ``_disk_cache_config`` (opt-out
+        # default), live handle in ``_disk_cache`` (lazy so importing this
+        # module doesn't pay the diskcache cost up front). The
+        # ``client.cache`` namespace mutates both.
+        self._disk_cache_config = DiskCacheConfig(enabled=True)
+        self._disk_cache: DiskCache | None = None
+        self.cache = CacheNamespace(self)
+
         self.ping = PingAPI(self)
         self.assets = AssetsAPI(self)
         self.calculated_channels = CalculatedChannelsAPI(self)
@@ -231,6 +252,52 @@ def rest_client(self) -> RestClient:
         """The REST client used by the SiftClient for making REST API calls."""
         return self._rest_client
 
+    def _get_disk_cache(self) -> DiskCache:
+        """Lazy accessor for the shared on-disk cache. Internal to resources.
+
+        The cache is built on first use so that importing ``sift_client``
+        doesn't pay the ``diskcache``/``sqlite`` cost up front. The opt-out
+        default ("disk caching on at the temp-dir path") is applied here,
+        along with the silent-fallback-on-default-path failure: if the
+        user left :class:`DiskCacheConfig` at its defaults and opening
+        fails (read-only ``/tmp``, restricted container, ...), we log a
+        warning and return a disabled :class:`DiskCache` so resources can
+        still serve requests by going to the wire. An explicit user-
+        supplied path that can't be opened propagates so the caller knows
+        their request didn't take.
+
+        After the first call this just returns the memoized handle.
+        Subsequent ``client.cache.enable_disk(...)`` calls mutate the
+        existing handle in place; this method is not re-entered.
+        """
+        if self._disk_cache is None:
+            from sift_client._internal.disk_cache import DiskCache
+
+            config = self._disk_cache_config
+            if not config.enabled:
+                self._disk_cache = DiskCache()
+                return self._disk_cache
+            target_path = config.path or DiskCache.DEFAULT_DISK_PATH
+            try:
+                self._disk_cache = DiskCache(
+                    disk_path=target_path,
+                    disk_max_bytes=config.max_bytes,
+                )
+            except Exception:
+                if not config.using_default_path:
+                    raise
+                logger.warning(
+                    "Could not open the default sift data cache at %r; "
+                    "falling back to no caching. Call "
+                    "``client.cache.disable_disk()`` to silence this "
+                    "warning, or pass an explicit path via "
+                    "``client.cache.enable_disk(path=...)``.",
+                    target_path,
+                    exc_info=True,
+                )
+                self._disk_cache = DiskCache()
+        return self._disk_cache
+
     @property
     def app_url(self) -> str | None:
         """The Sift web-app origin for this client, or None if it can't be determined.
diff --git a/python/lib/sift_client/resources/channels.py b/python/lib/sift_client/resources/channels.py
index 6be88f84e..df5d218a9 100644
--- a/python/lib/sift_client/resources/channels.py
+++ b/python/lib/sift_client/resources/channels.py
@@ -1,9 +1,7 @@
 from __future__ import annotations
 
-import logging
 from typing import TYPE_CHECKING
 
-from sift_client._internal.disk_cache_config import DiskCacheConfig
 from sift_client._internal.low_level_wrappers.channels import ChannelsLowLevelClient
 from sift_client._internal.low_level_wrappers.units import UnitsLowLevelClient
 from sift_client.resources._base import ResourceBase
@@ -13,7 +11,6 @@
 from sift_client.util import cel_utils as cel
 
 if TYPE_CHECKING:
-    import os
     import re
     from datetime import datetime
 
@@ -22,8 +19,6 @@
 
     from sift_client.client import SiftClient
 
-logger = logging.getLogger(__name__)
-
 
 def _channel_ids_from_list(items: list[str | Channel]) -> list[str]:
     """Resolve a list of channel IDs or Channel objects to a list of channel IDs.
@@ -69,89 +64,6 @@ def __init__(self, sift_client: SiftClient):
         self._low_level_client = ChannelsLowLevelClient(grpc_client=self.client.grpc_client)
         self._units_low_level_client = UnitsLowLevelClient(grpc_client=self.client.grpc_client)
         self._data_low_level_client = None
-        self._disk_cache_config = DiskCacheConfig(enabled=True)
-
-    def enable_data_cache_disk(
-        self,
-        *,
-        path: str | os.PathLike[str] | None = None,
-        max_bytes: int | None = None,
-    ) -> None:
-        """Configure (or re-enable after ``disable_data_cache_disk``) the disk cache.
-
-        Disk persistence is **on by default** at ``ChannelCache.DEFAULT_DISK_PATH``;
-        use this method when you want to override the path or size, or to turn
-        the cache back on after a prior ``disable_data_cache_disk`` call.
-
-        Each entry that ``get_data`` returns is written to the cache and read
-        back on subsequent calls, even after process restart. The default
-        path lives under ``tempfile.gettempdir()`` and is shared across
-        sessions, so a re-run of the same workload picks up previously-cached
-        windows without a fetch.
-
-        Safe to call before or after the first ``get_data``. Reconfiguring
-        (different ``path`` or ``max_bytes``) closes the previous handle and
-        opens a new one.
-
-        An explicit ``path`` that can't be opened (e.g. permission denied,
-        read-only filesystem) raises so the caller knows the request didn't
-        take. The default-path open does *not* raise — see
-        ``_ensure_data_low_level_client`` for the silent fall-back behaviour.
-
-        Args:
-            path: Directory to persist the cache to. ``None`` (the default)
-                uses ``ChannelCache.DEFAULT_DISK_PATH``. Existing entries at
-                the path become available as cache hits.
-            max_bytes: Byte cap on disk usage. ``None`` uses
-                ``ChannelCache.DEFAULT_DISK_MAX_BYTES`` (4 GiB). When the
-                bound is reached, ``diskcache``'s LRU eviction takes over.
-
-        Example:
-            client.channels.enable_data_cache_disk(path="/data/sift-cache")
-            client.channels.enable_data_cache_disk(max_bytes=1024 ** 3)  # 1 GiB
-        """
-        self._disk_cache_config.enable(path=path, max_bytes=max_bytes)
-        if self._data_low_level_client is not None:
-            self._data_low_level_client.channel_cache.enable_disk(path=path, max_bytes=max_bytes)
-
-    def disable_data_cache_disk(self) -> None:
-        """Opt out of caching for ``get_data`` (no reads or writes).
-
-        Caching is on by default; call this when you don't want any cached
-        data written to or read from disk. Closes any open cache file
-        handle. The on-disk directory is NOT deleted — use
-        :meth:`clear_data_cache_on_disk` to wipe it.
-        """
-        self._disk_cache_config.disable()
-        if self._data_low_level_client is not None:
-            self._data_low_level_client.channel_cache.disable_disk()
-
-    def clear_data_cache_on_disk(self, path: str | os.PathLike[str] | None = None) -> None:
-        """Delete a previously-persisted on-disk channel data cache directory.
-
-        Drops stale caches from previous sessions, recovers from a corrupt
-        cache, or reclaims disk space. Removes the directory entirely; if disk
-        persistence is on, the next ``get_data`` re-opens an empty cache at
-        the same path.
-
-        This is a thin proxy around
-        :meth:`ChannelCache.clear_disk <sift_client._internal.low_level_wrappers.data.ChannelCache.clear_disk>`
-        — exposed on the resource so callers don't need to reach into
-        ``_internal`` modules. The underlying classmethod is also reachable
-        directly (``ChannelCache.clear_disk(...)``) if the caller doesn't have
-        a ``SiftClient`` handy.
-
-        Args:
-            path: Directory of the cache to clear. ``None`` (the default)
-                targets ``ChannelCache.DEFAULT_DISK_PATH``.
-
-        Raises:
-            ValueError: If ``path`` exists but does not look like a sift
-                channel data cache directory.
-        """
-        from sift_client._internal.low_level_wrappers.data import ChannelCache
-
-        ChannelCache.clear_disk(path)
 
     async def get(
         self,
@@ -331,45 +243,19 @@ def _ensure_data_low_level_client(self):
         """Ensure that the data low level client is initialized. Separated out like this to not require large dependencies (pandas/pyarrow) for the client if not fetching data."""
         if self._data_low_level_client is None:
             from sift_client._internal.low_level_wrappers.data import (
-                ChannelCache,
+                ChannelDataCache,
                 DataLowLevelClient,
             )
 
-            kwargs: dict = {}
-            disk_config = self._disk_cache_config
-            if disk_config.enabled:
-                # ``disk_path=None`` means "no cache" to ChannelCache; substitute
-                # the default explicitly so the opt-out default still opens
-                # the cache. ``DEFAULT_DISK_PATH`` is read here (not at
-                # config construction) so test fixtures that monkeypatch the
-                # class attribute see the override.
-                kwargs["disk_cache_path"] = disk_config.path or ChannelCache.DEFAULT_DISK_PATH
-                if disk_config.max_bytes is not None:
-                    kwargs["disk_cache_max_bytes"] = disk_config.max_bytes
-            try:
-                self._data_low_level_client = DataLowLevelClient(
-                    grpc_client=self.client.grpc_client,
-                    **kwargs,
-                )
-            except Exception:
-                # Explicit user-supplied paths failures propagate so the
-                # caller knows their request didn't take. Default-path failures
-                # (read-only ``/tmp``, restricted containers, etc.) degrade
-                # silently to no-cache mode so ``get_data`` still works.
-                if not disk_config.using_default_path:
-                    raise
-                logger.warning(
-                    "Could not open the default channel data cache at %r; "
-                    "falling back to no caching for ``get_data``. Call "
-                    "``client.channels.disable_data_cache_disk()`` to silence "
-                    "this warning, or pass an explicit path via "
-                    "``enable_data_cache_disk(path=...)``.",
-                    kwargs.get("disk_cache_path"),
-                    exc_info=True,
-                )
-                self._data_low_level_client = DataLowLevelClient(
-                    grpc_client=self.client.grpc_client,
-                )
+            # The shared on-disk store lives on the client; we just wrap it
+            # in the channel-side adapter. Cache configuration (enable /
+            # disable / clear / path / max_bytes) is owned by
+            # ``client.cache`` — there's no resource-level knob anymore.
+            store = self.client._get_disk_cache()
+            self._data_low_level_client = DataLowLevelClient(
+                grpc_client=self.client.grpc_client,
+                channel_cache=ChannelDataCache(store),
+            )
 
     async def get_data(
         self,
diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
index cc3ec914f..c37c3aed3 100644
--- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi
+++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi
@@ -5,7 +5,6 @@ from __future__ import annotations
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    import os
     import re
     from datetime import datetime, timedelta
     from pathlib import Path
@@ -453,79 +452,6 @@ class ChannelsAPI:
         """
         ...
 
-    def clear_data_cache_on_disk(self, path: str | os.PathLike[str] | None = None) -> None:
-        """Delete a previously-persisted on-disk channel data cache directory.
-
-        Drops stale caches from previous sessions, recovers from a corrupt
-        cache, or reclaims disk space. Removes the directory entirely; if disk
-        persistence is on, the next ``get_data`` re-opens an empty cache at
-        the same path.
-
-        This is a thin proxy around
-        :meth:`ChannelCache.clear_disk <sift_client._internal.low_level_wrappers.data.ChannelCache.clear_disk>`
-        — exposed on the resource so callers don't need to reach into
-        ``_internal`` modules. The underlying classmethod is also reachable
-        directly (``ChannelCache.clear_disk(...)``) if the caller doesn't have
-        a ``SiftClient`` handy.
-
-        Args:
-            path: Directory of the cache to clear. ``None`` (the default)
-                targets ``ChannelCache.DEFAULT_DISK_PATH``.
-
-        Raises:
-            ValueError: If ``path`` exists but does not look like a sift
-                channel data cache directory.
-        """
-        ...
-
-    def disable_data_cache_disk(self) -> None:
-        """Opt out of caching for ``get_data`` (no reads or writes).
-
-        Caching is on by default; call this when you don't want any cached
-        data written to or read from disk. Closes any open cache file
-        handle. The on-disk directory is NOT deleted — use
-        :meth:`clear_data_cache_on_disk` to wipe it.
-        """
-        ...
-
-    def enable_data_cache_disk(
-        self, *, path: str | os.PathLike[str] | None = None, max_bytes: int | None = None
-    ) -> None:
-        """Configure (or re-enable after ``disable_data_cache_disk``) the disk cache.
-
-        Disk persistence is **on by default** at ``ChannelCache.DEFAULT_DISK_PATH``;
-        use this method when you want to override the path or size, or to turn
-        the cache back on after a prior ``disable_data_cache_disk`` call.
-
-        Each entry that ``get_data`` returns is written to the cache and read
-        back on subsequent calls, even after process restart. The default
-        path lives under ``tempfile.gettempdir()`` and is shared across
-        sessions, so a re-run of the same workload picks up previously-cached
-        windows without a fetch.
-
-        Safe to call before or after the first ``get_data``. Reconfiguring
-        (different ``path`` or ``max_bytes``) closes the previous handle and
-        opens a new one.
-
-        An explicit ``path`` that can't be opened (e.g. permission denied,
-        read-only filesystem) raises so the caller knows the request didn't
-        take. The default-path open does *not* raise — see
-        ``_ensure_data_low_level_client`` for the silent fall-back behaviour.
-
-        Args:
-            path: Directory to persist the cache to. ``None`` (the default)
-                uses ``ChannelCache.DEFAULT_DISK_PATH``. Existing entries at
-                the path become available as cache hits.
-            max_bytes: Byte cap on disk usage. ``None`` uses
-                ``ChannelCache.DEFAULT_DISK_MAX_BYTES`` (4 GiB). When the
-                bound is reached, ``diskcache``'s LRU eviction takes over.
-
-        Example:
-            client.channels.enable_data_cache_disk(path="/data/sift-cache")
-            client.channels.enable_data_cache_disk(max_bytes=1024 ** 3)  # 1 GiB
-        """
-        ...
-
     def find(self, **kwargs) -> Channel | None:
         """Find a single channel matching the given query. Takes the same arguments as `list`. If more than one channel is found,
         raises an error.

From 16b62a15564975288aea9faa04758ea186307e42 Mon Sep 17 00:00:00 2001
From: Ian Later <ian@siftstack.com>
Date: Thu, 25 Jun 2026 23:53:24 -0700
Subject: [PATCH 14/14] fmt

---
 python/lib/sift_client/_tests/_internal/test_disk_cache.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/lib/sift_client/_tests/_internal/test_disk_cache.py b/python/lib/sift_client/_tests/_internal/test_disk_cache.py
index 66e57a9e6..5711580a6 100644
--- a/python/lib/sift_client/_tests/_internal/test_disk_cache.py
+++ b/python/lib/sift_client/_tests/_internal/test_disk_cache.py
@@ -182,9 +182,7 @@ def test_enable_disk_noop_when_same_settings(self, tmp_path) -> None:
         cache = DiskCache(disk_path=tmp_path / "noop")
         try:
             handle_before = cache._disk
-            cache.enable_disk(
-                path=tmp_path / "noop", max_bytes=DiskCache.DEFAULT_DISK_MAX_BYTES
-            )
+            cache.enable_disk(path=tmp_path / "noop", max_bytes=DiskCache.DEFAULT_DISK_MAX_BYTES)
             assert cache._disk is handle_before
         finally:
             cache.close()