Source code for cyto.utils.config
from pathlib import Path
from typing import Optional
[docs]
def load_db_config(configs_dir: Optional[Path] = None) -> dict:
"""
Load the PostgreSQL database configuration for pyCyto.
Searches upward from the current working directory (or ``configs_dir``)
for ``configs/db.def.toml``, then overlays ``configs/db.user.toml`` if
present. The user file must supply at minimum ``database.password``.
Args:
configs_dir (Path, optional): Explicit path to the ``configs/``
directory. If ``None``, walks upward from ``Path.cwd()`` until a
directory containing ``db.def.toml`` is found.
Returns:
dict: Merged database config with keys ``host``, ``port``, ``dbname``,
``user``, ``password``, and optionally ``admin`` sub-dict.
Raises:
FileNotFoundError: If ``db.def.toml`` cannot be located, or if
``db.user.toml`` does not exist (credentials are required at
runtime).
Example::
from cyto.utils import load_db_config
from urllib.parse import quote_plus
db = load_db_config()
conn_str = (
f"postgresql+psycopg2://{quote_plus(db['user'])}:"
f"{quote_plus(db['password'])}@{db['host']}:{db['port']}/{db['dbname']}"
)
"""
try:
import tomllib # Python 3.11+
except ImportError:
import tomli as tomllib # Python 3.10: pip install tomli
if configs_dir is not None:
root = Path(configs_dir)
else:
here = Path.cwd()
for candidate in [here, *here.parents]:
if (candidate / "configs" / "db.def.toml").exists():
root = candidate / "configs"
break
if (candidate / "db.def.toml").exists():
root = candidate
break
else:
raise FileNotFoundError(
"db.def.toml not found in cwd or any parent directory. "
"Pass configs_dir explicitly, e.g.: "
"load_db_config(Path('configs/'))"
)
with open(root / "db.def.toml", "rb") as f:
cfg = tomllib.load(f)
user_path = root / "db.user.toml"
if not user_path.exists():
raise FileNotFoundError(
f"Database credentials not found: {user_path}\n"
"Copy configs/db.user.toml.example to configs/db.user.toml "
"and fill in your password."
)
with open(user_path, "rb") as f:
user_cfg = tomllib.load(f)
# Deep-merge: user values override defaults section by section
for section, values in user_cfg.items():
if section in cfg and isinstance(cfg[section], dict) and isinstance(values, dict):
cfg[section].update(values)
else:
cfg[section] = values
return cfg["database"]
[docs]
def load_notebook_config(notebooks_dir: Optional[Path] = None) -> dict:
"""
Load the DataOps path configuration for notebooks.
Searches upward from the current working directory (or ``notebooks_dir``)
for ``config.def.toml``, then overlays ``config.user.toml`` if present.
Returns the merged configuration dict.
Args:
notebooks_dir (Path, optional): Explicit path to the ``notebooks/``
directory containing ``config.def.toml``. If ``None``, the
function walks upward from ``Path.cwd()`` until the file is found.
Returns:
dict: Merged TOML config — keys match the sections in
``config.def.toml`` (``paths``, ``dataset``, ``datasets``).
Also injects ``_meta.notebooks_dir`` and
``_meta.output_root`` (resolved Path) for convenience.
Raises:
FileNotFoundError: If ``config.def.toml`` cannot be located.
DataOps retention tiers (documented in notebooks/config.def.toml):
Tier 1 — Ceph SoT: raw snapshots + promoted final artifacts (permanent)
Tier 2 — output_root: figures, tables, metadata JSON (retained until promoted)
Tier 3 — scratch_root: large intermediate TIFFs/arrays (volatile, delete after promotion)
Tier 4 — Ephemeral: SLURM logs, tmp; auto-rotated
Example::
from cyto.utils import load_notebook_config
from pathlib import Path
cfg = load_notebook_config()
DATA_ROOT = Path(cfg["paths"]["data_root"])
SCRATCH_ROOT = Path(cfg["paths"]["scratch_root"])
OUTPUT_ROOT = Path(cfg["paths"]["output_root"]) # small results
SNAPSHOT_ID = cfg["dataset"]["snapshot_id"]
"""
try:
import tomllib # Python 3.11+
except ImportError:
import tomli as tomllib # Python 3.10: pip install tomli
if notebooks_dir is not None:
root = Path(notebooks_dir)
else:
here = Path.cwd()
for candidate in [here, *here.parents]:
if (candidate / "config.def.toml").exists():
root = candidate
break
else:
raise FileNotFoundError(
"config.def.toml not found in cwd or any parent directory. "
"Pass notebooks_dir explicitly, e.g.: "
"load_notebook_config(Path('notebooks/'))"
)
with open(root / "config.def.toml", "rb") as f:
cfg = tomllib.load(f)
cfg_user = root / "config.user.toml"
if cfg_user.exists():
with open(cfg_user, "rb") as f:
cfg.update(tomllib.load(f))
# Inject resolved meta paths for convenience
output_root_str = cfg.get("paths", {}).get(
"output_root",
str(root.parent / "output"), # fallback: <repo_root>/output/
)
cfg.setdefault("_meta", {})
cfg["_meta"]["notebooks_dir"] = str(root)
cfg["_meta"]["output_root"] = str(Path(output_root_str))
return cfg