Source code for el1xr_opt.Modules.oM_InputSource

"""el1xr_opt input source — the ``InputSource`` interface and ``open_source`` factory.

The model reads its input through one of two backends:

  * ``CSVSource``    — a directory of ``oM_Dict_*`` / ``oM_Data_*`` CSV files
                       (the historical layout).
  * ``DuckDBSource`` — a single ``<case>.duckdb`` file holding the same tables.

``open_source(path)`` looks at the path and returns the right backend: a
directory gives a ``CSVSource``, a ``.duckdb`` file gives a ``DuckDBSource``.
Both backends return identical DataFrames, so the rest of the model does not
know or care which one was used.

The duckdb backend is imported lazily, so a checkout without ``duckdb``
installed can still build the CSV path.
"""
from __future__ import annotations

import abc
import os
from pathlib import Path

import pandas as pd



[docs]
class InputSource(abc.ABC):
    """Abstract input source. Implementations: ``CSVSource``, ``DuckDBSource``."""

    case_name: str


[docs]
    @abc.abstractmethod
    def list_data_stems(self) -> set:
        """Stems of the data tables present (no ``oM_Data_`` prefix, no ``_<case>.csv`` suffix)."""



[docs]
    @abc.abstractmethod
    def read_dict(self, stem: str) -> pd.DataFrame:
        """Return the dimension dict for ``stem`` as a plain DataFrame (no index).

        Returns an empty DataFrame if the dict is absent.
        """



[docs]
    @abc.abstractmethod
    def read_data(self, stem: str) -> pd.DataFrame:
        """Return a data table with its leading unnamed columns set as a nameless index.

        This is the exact shape ``oM_InputData`` expects: the same DataFrame the
        old ``pd.read_csv`` + ``set_index(unnamed columns)`` code produced.
        Raises ``FileNotFoundError`` if the stem is absent.
        """



[docs]
    def close(self) -> None:  # default no-op
        pass


    def __enter__(self) -> "InputSource":
        return self

    def __exit__(self, *exc: object) -> None:
        self.close()




[docs]
def open_source(path: str | os.PathLike) -> InputSource:
    """Return a ``CSVSource`` for a directory or a ``DuckDBSource`` for a ``.duckdb`` file."""
    p = Path(path).expanduser()
    if p.is_dir():
        from .oM_InputCSVSource import CSVSource  # lazy: keeps duckdb-free trees importing this module
        return CSVSource(p)
    if p.is_file() and p.suffix == ".duckdb":
        from .oM_InputDuckDBSource import DuckDBSource, _HAS_DUCKDB
        if not _HAS_DUCKDB:
            raise ImportError("duckdb is required to read a .duckdb input; install it with `pip install duckdb`")
        return DuckDBSource(p)
    raise ValueError(f"{p}: not a CSV case directory or a .duckdb file")




[docs]
def resolve_source(dir_name: str | os.PathLike, case_name: str) -> InputSource:
    """Pick the input for ``(dir_name, case_name)``.

    Prefers the CSV case folder ``<dir_name>/<case_name>`` when it holds the
    case's ``oM_Data_Parameter`` file (so CSV stays the default whenever a real
    case folder is present), otherwise falls back to the DuckDB file
    ``<dir_name>/<case_name>.duckdb``. Checking for the Parameter file rather
    than just the folder means an empty results folder of the same name does not
    shadow a ``.duckdb`` input.
    """
    from .oM_InputSchema import data_filename

    case_dir = os.path.join(dir_name, case_name)
    if os.path.isfile(os.path.join(case_dir, data_filename("Parameter", case_name))):
        return open_source(case_dir)
    db_path = os.path.join(dir_name, f"{case_name}.duckdb")
    if os.path.isfile(db_path):
        return open_source(db_path)
    raise FileNotFoundError(
        f"no CSV case folder with data at '{case_dir}' and no DuckDB file '{db_path}'"
    )




[docs]
def finalize_data_index(df: pd.DataFrame, idx_cols: list) -> pd.DataFrame:
    """Move ``idx_cols`` into the index and clear their names (the model expects a nameless index)."""
    if idx_cols:
        df = df.set_index(idx_cols)
        df.index.names = [None] * len(idx_cols)
    return df




[docs]
def df_to_set_values(df: pd.DataFrame) -> list:
    """Convert a dict DataFrame into the values a Pyomo ``Set(initialize=...)`` accepts.

      * one column   -> ``[v1, v2, ...]``
      * two+ columns -> ``[(a, b, ...), ...]`` (relation / membership)
    """
    if df.shape[1] == 0:
        return []
    if df.shape[1] == 1:
        return df.iloc[:, 0].tolist()
    return [tuple(row) for row in df.itertuples(index=False, name=None)]