428 lines
17 KiB
Python
428 lines
17 KiB
Python
"""
|
|
FP&A API Loader
|
|
Reads generated CSV files and loads them into the Go FP&A API.
|
|
|
|
Real endpoint map (from main.go):
|
|
POST /api/v1/budgets ← one budget record per request
|
|
PUT /api/v1/budgets/{id} ← update (not used in seeding)
|
|
DELETE /api/v1/budgets/{id} ← delete (not used in seeding)
|
|
POST /api/v1/actuals/ingest ← bulk actuals ingest
|
|
GET /api/v1/variance ← read-only, not loaded
|
|
GET /api/v1/variance/alerts ← read-only, not loaded
|
|
GET /api/v1/health ← health check
|
|
|
|
Load order matters:
|
|
1. Budgets first — actuals reference budget lines by category+period
|
|
2. Actuals second — ingested in bulk against existing budgets
|
|
"""
|
|
|
|
import csv
|
|
import json
|
|
import time
|
|
import os
|
|
import requests
|
|
from typing import List, Dict, Any, Optional, Callable
|
|
from dataclasses import dataclass
|
|
|
|
DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data", "csv")
|
|
|
|
# ── Config ────────────────────────────────────────────────────────────────────
|
|
@dataclass
|
|
class LoaderConfig:
|
|
base_url: str = "http://localhost:8080"
|
|
batch_size: int = 50 # used for actuals ingest
|
|
delay_between_batches: float = 0.05
|
|
dry_run: bool = False
|
|
auth_token: Optional[str] = None
|
|
|
|
DEFAULT_CONFIG = LoaderConfig()
|
|
|
|
# ── CSV reading helpers ───────────────────────────────────────────────────────
|
|
INT_FIELDS = {"year", "month"}
|
|
FLOAT_FIELDS = {
|
|
"budget_amount", "actual_amount", "variance", "variance_pct",
|
|
"product_revenue", "service_revenue", "total_revenue",
|
|
"cogs_product", "cogs_service", "total_cogs",
|
|
"gross_profit", "gross_margin_pct", "total_opex",
|
|
"ebitda", "ebitda_margin_pct", "net_income",
|
|
"cash_collected_product", "cash_collected_service",
|
|
"cash_paid_opex", "cash_paid_cogs", "net_operating_cash_flow",
|
|
"capex", "net_investing_cash_flow", "loan_repayment",
|
|
"equity_raised", "net_financing_cash_flow",
|
|
"net_change_in_cash", "closing_cash_balance",
|
|
"annual_salary_budget", "actual_salary_paid_ytd", "headcount_fte",
|
|
}
|
|
|
|
def _coerce(row: Dict[str, str]) -> Dict[str, Any]:
|
|
out = {}
|
|
for k, v in row.items():
|
|
if k in INT_FIELDS:
|
|
out[k] = int(v) if v else None
|
|
elif k in FLOAT_FIELDS:
|
|
out[k] = float(v) if v else None
|
|
else:
|
|
out[k] = v
|
|
return out
|
|
|
|
def read_csv(filename: str) -> List[Dict[str, Any]]:
|
|
path = os.path.join(DATA_DIR, filename)
|
|
if not os.path.exists(path):
|
|
raise FileNotFoundError(f"CSV not found: {path} (run generators/generate_data.py first)")
|
|
with open(path, newline="") as f:
|
|
return [_coerce(row) for row in csv.DictReader(f)]
|
|
|
|
# ── Payload mappers ───────────────────────────────────────────────────────────
|
|
# Each mapper takes one CSV row and returns the JSON body your Go handler expects.
|
|
# Adjust field names here if your Go structs use different names.
|
|
|
|
def revenue_row_to_budget(row: Dict) -> Dict:
|
|
"""revenue_budget_vs_actuals.csv → POST /api/v1/budgets"""
|
|
return {
|
|
"category": row["revenue_type"], # "Product" | "Service"
|
|
"department": "Revenue",
|
|
"period": row["period"], # "2023-01"
|
|
"year": row["year"],
|
|
"month": row["month"],
|
|
"amount": row["budget_amount"],
|
|
"currency": "USD",
|
|
"notes": f"{row['revenue_type']} revenue budget",
|
|
}
|
|
|
|
def revenue_row_to_actual(row: Dict) -> Dict:
|
|
"""revenue_budget_vs_actuals.csv → POST /api/v1/actuals/ingest"""
|
|
return {
|
|
"category": row["revenue_type"],
|
|
"department": "Revenue",
|
|
"period": row["period"],
|
|
"year": row["year"],
|
|
"month": row["month"],
|
|
"amount": row["actual_amount"],
|
|
"currency": "USD",
|
|
"source": "csv_import",
|
|
}
|
|
|
|
def opex_row_to_budget(row: Dict) -> Dict:
|
|
"""opex_budget_vs_actuals.csv → POST /api/v1/budgets"""
|
|
return {
|
|
"category": row["category"], # "Salaries", "Cloud Infrastructure", …
|
|
"department": row["department"],
|
|
"period": row["period"],
|
|
"year": row["year"],
|
|
"month": row["month"],
|
|
"amount": row["budget_amount"],
|
|
"currency": "USD",
|
|
"notes": f"{row['department']} opex budget",
|
|
}
|
|
|
|
def opex_row_to_actual(row: Dict) -> Dict:
|
|
"""opex_budget_vs_actuals.csv → POST /api/v1/actuals/ingest"""
|
|
return {
|
|
"category": row["category"],
|
|
"department": row["department"],
|
|
"period": row["period"],
|
|
"year": row["year"],
|
|
"month": row["month"],
|
|
"amount": row["actual_amount"],
|
|
"currency": "USD",
|
|
"source": "csv_import",
|
|
}
|
|
|
|
def pl_row_to_actual(row: Dict) -> Dict:
|
|
"""
|
|
pl_income_statement.csv → POST /api/v1/actuals/ingest
|
|
The P&L is a derived/summary view; we ingest key line items as actuals.
|
|
Budgets for these are already covered by revenue + opex CSVs.
|
|
"""
|
|
return {
|
|
"category": "P&L Summary",
|
|
"department": "Finance",
|
|
"period": row["period"],
|
|
"year": row["year"],
|
|
"month": row["month"],
|
|
"amount": row["net_income"],
|
|
"currency": "USD",
|
|
"source": "csv_import",
|
|
"metadata": {
|
|
"total_revenue": row["total_revenue"],
|
|
"gross_profit": row["gross_profit"],
|
|
"gross_margin_pct": row["gross_margin_pct"],
|
|
"ebitda": row["ebitda"],
|
|
"ebitda_margin_pct": row["ebitda_margin_pct"],
|
|
},
|
|
}
|
|
|
|
def cashflow_row_to_actual(row: Dict) -> Dict:
|
|
"""cash_flow.csv → POST /api/v1/actuals/ingest"""
|
|
return {
|
|
"category": "Cash Flow",
|
|
"department": "Finance",
|
|
"period": row["period"],
|
|
"year": row["year"],
|
|
"month": row["month"],
|
|
"amount": row["net_change_in_cash"],
|
|
"currency": "USD",
|
|
"source": "csv_import",
|
|
"metadata": {
|
|
"net_operating_cash_flow": row["net_operating_cash_flow"],
|
|
"net_investing_cash_flow": row["net_investing_cash_flow"],
|
|
"net_financing_cash_flow": row["net_financing_cash_flow"],
|
|
"closing_cash_balance": row["closing_cash_balance"],
|
|
"equity_raised": row["equity_raised"],
|
|
},
|
|
}
|
|
|
|
def headcount_row_to_actual(row: Dict) -> Dict:
|
|
"""headcount_workforce.csv → POST /api/v1/actuals/ingest (active employees only)"""
|
|
return {
|
|
"category": "Headcount",
|
|
"department": row["department"],
|
|
"period": row["period"],
|
|
"year": row["year"],
|
|
"month": row["month"],
|
|
"amount": row["actual_salary_paid_ytd"],
|
|
"currency": "USD",
|
|
"source": "csv_import",
|
|
"metadata": {
|
|
"employee_id": row["employee_id"],
|
|
"role": row["role"],
|
|
"status": row["status"],
|
|
"fte": row["headcount_fte"],
|
|
"annual_salary": row["annual_salary_budget"],
|
|
},
|
|
}
|
|
|
|
# ── HTTP helpers ──────────────────────────────────────────────────────────────
|
|
def _headers(config: LoaderConfig) -> Dict:
|
|
h = {"Content-Type": "application/json"}
|
|
if config.auth_token:
|
|
h["Authorization"] = f"Bearer {config.auth_token}"
|
|
return h
|
|
|
|
def post_one(url: str, payload: Dict, config: LoaderConfig) -> Dict:
|
|
"""Single POST — used for /api/v1/budgets (one record per call)."""
|
|
if config.dry_run:
|
|
print(f" [DRY RUN] POST {url}")
|
|
print(f" {json.dumps(payload, indent=6)}")
|
|
return {"status": "dry_run"}
|
|
try:
|
|
resp = requests.post(url, json=payload, headers=_headers(config), timeout=10)
|
|
resp.raise_for_status()
|
|
return resp.json() if resp.content else {"status": "ok"}
|
|
except requests.exceptions.ConnectionError:
|
|
return {"error": "connection_refused"}
|
|
except requests.exceptions.HTTPError as e:
|
|
return {"error": str(e), "status_code": resp.status_code, "body": resp.text}
|
|
except Exception as e:
|
|
return {"error": str(e)}
|
|
|
|
def post_batch(url: str, records: List[Dict], config: LoaderConfig) -> Dict:
|
|
"""Batch POST — used for /api/v1/actuals/ingest."""
|
|
if config.dry_run:
|
|
print(f" [DRY RUN] POST {url} ({len(records)} records)")
|
|
print(f" Sample: {json.dumps(records[0], indent=6)}")
|
|
return {"status": "dry_run", "count": len(records)}
|
|
try:
|
|
resp = requests.post(url, json={"records": records},
|
|
headers=_headers(config), timeout=30)
|
|
resp.raise_for_status()
|
|
return resp.json() if resp.content else {"status": "ok"}
|
|
except requests.exceptions.ConnectionError:
|
|
return {"error": "connection_refused"}
|
|
except requests.exceptions.HTTPError as e:
|
|
return {"error": str(e), "status_code": resp.status_code, "body": resp.text}
|
|
except Exception as e:
|
|
return {"error": str(e)}
|
|
|
|
# ── Budget loader: POST /api/v1/budgets ───────────────────────────────────────
|
|
def load_budgets(config: LoaderConfig = DEFAULT_CONFIG) -> Dict:
|
|
"""
|
|
Loads budget rows from revenue + opex CSVs.
|
|
Each row is a separate POST (budget records are individual, not batched).
|
|
Deduplicated by (category, department, period) to avoid double-posting.
|
|
"""
|
|
url = config.base_url.rstrip("/") + "/api/v1/budgets"
|
|
budget_rows = []
|
|
|
|
for filename, mapper in [
|
|
("revenue_budget_vs_actuals.csv", revenue_row_to_budget),
|
|
("opex_budget_vs_actuals.csv", opex_row_to_budget),
|
|
]:
|
|
rows = read_csv(filename)
|
|
budget_rows.extend(mapper(r) for r in rows)
|
|
|
|
# Deduplicate: last write wins for same category+dept+period
|
|
seen = {}
|
|
for row in budget_rows:
|
|
key = (row["category"], row["department"], row["period"])
|
|
seen[key] = row
|
|
unique = list(seen.values())
|
|
|
|
print(f"\n📋 Loading budgets → {url}")
|
|
print(f" {len(unique)} unique budget lines")
|
|
|
|
results = {"total": len(unique), "ok": 0, "errors": []}
|
|
for i, payload in enumerate(unique, 1):
|
|
result = post_one(url, payload, config)
|
|
if "error" in result:
|
|
results["errors"].append({**result, "payload": payload})
|
|
if len(results["errors"]) <= 3: # don't flood the console
|
|
print(f" ⚠ [{i}/{len(unique)}] {result['error']}")
|
|
else:
|
|
results["ok"] += 1
|
|
if i % 50 == 0 or i == len(unique):
|
|
print(f" ✓ {i}/{len(unique)} budgets posted")
|
|
time.sleep(0.01) # light throttle for individual POSTs
|
|
|
|
return results
|
|
|
|
# ── Actuals loader: POST /api/v1/actuals/ingest ───────────────────────────────
|
|
def load_actuals(config: LoaderConfig = DEFAULT_CONFIG) -> Dict:
|
|
"""
|
|
Collects actuals from all CSV sources and bulk-ingests them.
|
|
Filters headcount to Active employees only to avoid salary double-counting.
|
|
"""
|
|
url = config.base_url.rstrip("/") + "/api/v1/actuals/ingest"
|
|
all_actuals = []
|
|
|
|
# Revenue actuals
|
|
for row in read_csv("revenue_budget_vs_actuals.csv"):
|
|
all_actuals.append(revenue_row_to_actual(row))
|
|
|
|
# Opex actuals
|
|
for row in read_csv("opex_budget_vs_actuals.csv"):
|
|
all_actuals.append(opex_row_to_actual(row))
|
|
|
|
# P&L summary actuals
|
|
for row in read_csv("pl_income_statement.csv"):
|
|
all_actuals.append(pl_row_to_actual(row))
|
|
|
|
# Cash flow actuals
|
|
for row in read_csv("cash_flow.csv"):
|
|
all_actuals.append(cashflow_row_to_actual(row))
|
|
|
|
# Headcount actuals — active employees only, one record per employee per month
|
|
headcount_seen = set()
|
|
for row in read_csv("headcount_workforce.csv"):
|
|
if row["status"] != "Active":
|
|
continue
|
|
key = (row["employee_id"], row["period"])
|
|
if key in headcount_seen:
|
|
continue
|
|
headcount_seen.add(key)
|
|
all_actuals.append(headcount_row_to_actual(row))
|
|
|
|
print(f"\n📥 Loading actuals → {url}")
|
|
print(f" {len(all_actuals)} records | batch size {config.batch_size}")
|
|
|
|
results = {"total": len(all_actuals), "batches": 0, "errors": []}
|
|
for i in range(0, len(all_actuals), config.batch_size):
|
|
batch = all_actuals[i : i + config.batch_size]
|
|
result = post_batch(url, batch, config)
|
|
results["batches"] += 1
|
|
if "error" in result:
|
|
results["errors"].append(result)
|
|
print(f" ⚠ batch {results['batches']}: {result['error']}")
|
|
else:
|
|
print(f" ✓ batch {results['batches']} ({len(batch)} records)")
|
|
time.sleep(config.delay_between_batches)
|
|
|
|
return results
|
|
|
|
# ── Variance check: GET /api/v1/variance ──────────────────────────────────────
|
|
def check_variance(config: LoaderConfig = DEFAULT_CONFIG, period: Optional[str] = None):
|
|
"""
|
|
Calls the variance report after loading to verify data landed correctly.
|
|
"""
|
|
url = config.base_url.rstrip("/") + "/api/v1/variance"
|
|
params = {"period": period} if period else {}
|
|
print(f"\n📊 Fetching variance report → {url}")
|
|
if config.dry_run:
|
|
print(" [DRY RUN] skipped")
|
|
return {}
|
|
try:
|
|
resp = requests.get(url, params=params, timeout=10)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
print(f" ✓ {len(data) if isinstance(data, list) else 1} variance entries returned")
|
|
return data
|
|
except Exception as e:
|
|
print(f" ⚠ {e}")
|
|
return {"error": str(e)}
|
|
|
|
def check_alerts(config: LoaderConfig = DEFAULT_CONFIG):
|
|
"""Calls /api/v1/variance/alerts — shows any over/under budget flags."""
|
|
url = config.base_url.rstrip("/") + "/api/v1/variance/alerts"
|
|
print(f"\n🚨 Fetching variance alerts → {url}")
|
|
if config.dry_run:
|
|
print(" [DRY RUN] skipped")
|
|
return {}
|
|
try:
|
|
resp = requests.get(url, timeout=10)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
count = len(data) if isinstance(data, list) else "?"
|
|
print(f" ✓ {count} alerts")
|
|
return data
|
|
except Exception as e:
|
|
print(f" ⚠ {e}")
|
|
return {"error": str(e)}
|
|
|
|
# ── Full seed run ─────────────────────────────────────────────────────────────
|
|
def seed_all(config: LoaderConfig = DEFAULT_CONFIG):
|
|
"""
|
|
Full seed sequence:
|
|
1. Load budgets (POST /api/v1/budgets, one at a time)
|
|
2. Load actuals (POST /api/v1/actuals/ingest, batched)
|
|
3. Spot-check variance report
|
|
"""
|
|
print(f"\n🚀 FP&A Seed → {config.base_url}")
|
|
if config.dry_run:
|
|
print(" *** DRY RUN — no requests will be sent ***\n")
|
|
|
|
budget_result = load_budgets(config)
|
|
actuals_result = load_actuals(config)
|
|
check_variance(config)
|
|
check_alerts(config)
|
|
|
|
b_errors = len(budget_result.get("errors", []))
|
|
a_errors = len(actuals_result.get("errors", []))
|
|
|
|
print("\n── Seed Summary ─────────────────────────────────")
|
|
print(f" Budgets : {budget_result['ok']}/{budget_result['total']} ok"
|
|
+ (f" ⚠ {b_errors} errors" if b_errors else " ✅"))
|
|
print(f" Actuals : {actuals_result['total']} records in "
|
|
f"{actuals_result['batches']} batches"
|
|
+ (f" ⚠ {a_errors} errors" if a_errors else " ✅"))
|
|
print()
|
|
return {"budgets": budget_result, "actuals": actuals_result}
|
|
|
|
# ── CLI ───────────────────────────────────────────────────────────────────────
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Seed FP&A CSV data into Go API")
|
|
parser.add_argument("--url", default="http://localhost:8080", help="API base URL")
|
|
parser.add_argument("--batch", type=int, default=50, help="Actuals batch size")
|
|
parser.add_argument("--dry-run", action="store_true", help="Print payloads, don't send")
|
|
parser.add_argument("--token", default=None, help="Bearer auth token")
|
|
parser.add_argument("--only", choices=["budgets", "actuals", "variance", "alerts"],
|
|
help="Run only one step instead of full seed")
|
|
args = parser.parse_args()
|
|
|
|
cfg = LoaderConfig(
|
|
base_url=args.url,
|
|
batch_size=args.batch,
|
|
dry_run=args.dry_run,
|
|
auth_token=args.token,
|
|
)
|
|
|
|
if args.only == "budgets":
|
|
load_budgets(cfg)
|
|
elif args.only == "actuals":
|
|
load_actuals(cfg)
|
|
elif args.only == "variance":
|
|
print(json.dumps(check_variance(cfg), indent=2))
|
|
elif args.only == "alerts":
|
|
print(json.dumps(check_alerts(cfg), indent=2))
|
|
else:
|
|
seed_all(cfg) |