Source code for el1xr_opt.Modules.oM_CsvToDuckDB
"""Convert a CSV case folder into a single ``<case>.duckdb`` file.
The DuckDB file holds the same tables as the CSV folder and can be fed to the
model in place of the folder. Use it from the command line:
el1xr-csv2duckdb --dir data/EEM26 --case Home1
which writes ``data/EEM26/Home1.duckdb``. Reading it back through the model
produces the same results as reading the CSV folder.
"""
from __future__ import annotations
import argparse
import os
import pandas as pd
from .oM_InputCSVSource import CSVSource
from .oM_InputSchema import (
DB_DATA_PREFIX,
DB_DICT_PREFIX,
META_KEY_CASE,
META_TABLE,
idx_name,
)
def _write_table(con, name: str, df: pd.DataFrame) -> None:
# Persist the DataFrame through DuckDB's relational API, which takes the table
# name as a Python argument. No table name is interpolated into a SQL string.
import duckdb
duckdb.from_df(df, connection=con).create(name)
def _flatten_data(df: pd.DataFrame) -> pd.DataFrame:
"""Turn the nameless index into ``__idx0``, ``__idx1``, ... value columns."""
n = df.index.nlevels
flat = df.reset_index()
rename = {flat.columns[i]: idx_name(i) for i in range(n)}
return flat.rename(columns=rename)
[docs]
def csv_case_to_duckdb(dir_name: str, case_name: str, db_path: str | None = None,
overwrite: bool = True) -> str:
"""Write ``<dir_name>/<case_name>.duckdb`` from the CSV case folder. Returns the path."""
import duckdb
src = CSVSource(os.path.join(dir_name, case_name))
if db_path is None:
db_path = os.path.join(dir_name, f"{case_name}.duckdb")
if os.path.exists(db_path):
if not overwrite:
raise FileExistsError(db_path)
os.remove(db_path)
con = duckdb.connect(db_path)
try:
meta_df = pd.DataFrame({"Key": [META_KEY_CASE], "Value": [case_name]})
_write_table(con, META_TABLE, meta_df)
for stem in sorted(src.list_dict_stems()):
_write_table(con, f"{DB_DICT_PREFIX}{stem}", src.read_dict(stem))
for stem in sorted(src.list_data_stems()):
_write_table(con, f"{DB_DATA_PREFIX}{stem}", _flatten_data(src.read_data(stem)))
finally:
con.close()
return db_path
[docs]
def main(argv=None) -> None:
parser = argparse.ArgumentParser(description="Convert a CSV case folder to a .duckdb file.")
parser.add_argument("--dir", required=True, help="Parent directory holding the case folder.")
parser.add_argument("--case", required=True, help="Case name (the folder inside --dir).")
parser.add_argument("--out", default=None, help="Output .duckdb path (default: <dir>/<case>.duckdb).")
args = parser.parse_args(argv)
path = csv_case_to_duckdb(args.dir, args.case, db_path=args.out)
print(f"Wrote {path}")
if __name__ == "__main__":
main()