Source code for cyto.utils.seg_cache

"""
cyto.utils.seg_cache
=====================
Pickle-based segmentation cache utilities.

Cache files are named ``segmentation_frame_{idx:04d}_{cell_type}.pkl``
and live in a persistent directory that is shared across runs and ignored
by git (``notebooks/**/cache/``).

Both the batch script and the interactive notebook import from here so the
cache format is defined in one place.
"""

import os
import pickle


[docs] def get_cache_filename(cache_dir, frame_idx, cell_type): """Return the canonical cache file path for a frame and cell type. Parameters ---------- cache_dir : str or Path Directory containing cache files. frame_idx : int Frame index (0-based). cell_type : str e.g. ``'cancer'`` or ``'tcell'``. Returns ------- str """ return os.path.join(str(cache_dir), f"segmentation_frame_{frame_idx:04d}_{cell_type}.pkl")
[docs] def cache_exists(cache_dir, frame_idx, cell_type): """Return True if a (non-failed) cache file exists for this frame. Parameters ---------- cache_dir : str or Path frame_idx : int cell_type : str Returns ------- bool """ path = get_cache_filename(cache_dir, frame_idx, cell_type) if not os.path.exists(path): return False try: with open(path, 'rb') as f: data = pickle.load(f) return not data.get('failed', False) except Exception: return False
[docs] def save_segmentation_cache(cache_dir, frame_idx, cell_type, label, features, failed=False): """Persist segmentation results to a pkl file. Parameters ---------- cache_dir : str or Path frame_idx : int cell_type : str label : ndarray Segmentation label array. features : DataFrame or list Sparse feature table (empty list for failed frames). failed : bool, optional If True, marks this entry as a failed segmentation (default False). """ os.makedirs(str(cache_dir), exist_ok=True) cache_file = get_cache_filename(cache_dir, frame_idx, cell_type) cache_data = { 'frame': frame_idx, 'cell_type': cell_type, 'label': label, 'features': features, 'failed': failed, } with open(cache_file, 'wb') as f: pickle.dump(cache_data, f)
[docs] def load_segmentation_cache(cache_dir, frame_idx, cell_type): """Load segmentation results from a pkl file. Parameters ---------- cache_dir : str or Path frame_idx : int cell_type : str Returns ------- label : ndarray or None features : DataFrame, list, or None failed : bool or None None indicates the cache file does not exist or could not be read. """ cache_file = get_cache_filename(cache_dir, frame_idx, cell_type) if not os.path.exists(cache_file): return None, None, None try: with open(cache_file, 'rb') as f: data = pickle.load(f) failed = data.get('failed', False) return data['label'], data['features'], failed except Exception as e: print(f"Error loading cache {cache_file}: {e}") return None, None, None