Source code for cyto.utils.config

from pathlib import Path
from typing import Optional


[docs] def load_db_config(configs_dir: Optional[Path] = None) -> dict: """ Load the PostgreSQL database configuration for pyCyto. Searches upward from the current working directory (or ``configs_dir``) for ``configs/db.def.toml``, then overlays ``configs/db.user.toml`` if present. The user file must supply at minimum ``database.password``. Args: configs_dir (Path, optional): Explicit path to the ``configs/`` directory. If ``None``, walks upward from ``Path.cwd()`` until a directory containing ``db.def.toml`` is found. Returns: dict: Merged database config with keys ``host``, ``port``, ``dbname``, ``user``, ``password``, and optionally ``admin`` sub-dict. Raises: FileNotFoundError: If ``db.def.toml`` cannot be located, or if ``db.user.toml`` does not exist (credentials are required at runtime). Example:: from cyto.utils import load_db_config from urllib.parse import quote_plus db = load_db_config() conn_str = ( f"postgresql+psycopg2://{quote_plus(db['user'])}:" f"{quote_plus(db['password'])}@{db['host']}:{db['port']}/{db['dbname']}" ) """ try: import tomllib # Python 3.11+ except ImportError: import tomli as tomllib # Python 3.10: pip install tomli if configs_dir is not None: root = Path(configs_dir) else: here = Path.cwd() for candidate in [here, *here.parents]: if (candidate / "configs" / "db.def.toml").exists(): root = candidate / "configs" break if (candidate / "db.def.toml").exists(): root = candidate break else: raise FileNotFoundError( "db.def.toml not found in cwd or any parent directory. " "Pass configs_dir explicitly, e.g.: " "load_db_config(Path('configs/'))" ) with open(root / "db.def.toml", "rb") as f: cfg = tomllib.load(f) user_path = root / "db.user.toml" if not user_path.exists(): raise FileNotFoundError( f"Database credentials not found: {user_path}\n" "Copy configs/db.user.toml.example to configs/db.user.toml " "and fill in your password." ) with open(user_path, "rb") as f: user_cfg = tomllib.load(f) # Deep-merge: user values override defaults section by section for section, values in user_cfg.items(): if section in cfg and isinstance(cfg[section], dict) and isinstance(values, dict): cfg[section].update(values) else: cfg[section] = values return cfg["database"]
[docs] def load_notebook_config(notebooks_dir: Optional[Path] = None) -> dict: """ Load the DataOps path configuration for notebooks. Searches upward from the current working directory (or ``notebooks_dir``) for ``config.def.toml``, then overlays ``config.user.toml`` if present. Returns the merged configuration dict. Args: notebooks_dir (Path, optional): Explicit path to the ``notebooks/`` directory containing ``config.def.toml``. If ``None``, the function walks upward from ``Path.cwd()`` until the file is found. Returns: dict: Merged TOML config — keys match the sections in ``config.def.toml`` (``paths``, ``dataset``, ``datasets``). Also injects ``_meta.notebooks_dir`` and ``_meta.output_root`` (resolved Path) for convenience. Raises: FileNotFoundError: If ``config.def.toml`` cannot be located. DataOps retention tiers (documented in notebooks/config.def.toml): Tier 1 — Ceph SoT: raw snapshots + promoted final artifacts (permanent) Tier 2 — output_root: figures, tables, metadata JSON (retained until promoted) Tier 3 — scratch_root: large intermediate TIFFs/arrays (volatile, delete after promotion) Tier 4 — Ephemeral: SLURM logs, tmp; auto-rotated Example:: from cyto.utils import load_notebook_config from pathlib import Path cfg = load_notebook_config() DATA_ROOT = Path(cfg["paths"]["data_root"]) SCRATCH_ROOT = Path(cfg["paths"]["scratch_root"]) OUTPUT_ROOT = Path(cfg["paths"]["output_root"]) # small results SNAPSHOT_ID = cfg["dataset"]["snapshot_id"] """ try: import tomllib # Python 3.11+ except ImportError: import tomli as tomllib # Python 3.10: pip install tomli if notebooks_dir is not None: root = Path(notebooks_dir) else: here = Path.cwd() for candidate in [here, *here.parents]: if (candidate / "config.def.toml").exists(): root = candidate break else: raise FileNotFoundError( "config.def.toml not found in cwd or any parent directory. " "Pass notebooks_dir explicitly, e.g.: " "load_notebook_config(Path('notebooks/'))" ) with open(root / "config.def.toml", "rb") as f: cfg = tomllib.load(f) cfg_user = root / "config.user.toml" if cfg_user.exists(): with open(cfg_user, "rb") as f: cfg.update(tomllib.load(f)) # Inject resolved meta paths for convenience output_root_str = cfg.get("paths", {}).get( "output_root", str(root.parent / "output"), # fallback: <repo_root>/output/ ) cfg.setdefault("_meta", {}) cfg["_meta"]["notebooks_dir"] = str(root) cfg["_meta"]["output_root"] = str(Path(output_root_str)) return cfg