"""
routes/autosampler_routes.py
-----------------------------
Endpoints for the automated sampler (file-upload based).

Caller uploads a file + params → backend samples → returns sampled rows as JSON.
No persistent state. Each request is fully self-contained.

Note: For the /run endpoint the caller can optionally request a file download
(xlsx or csv) instead of JSON by setting output_format to 'xlsx' or 'csv'.
"""
import io
import json
from fastapi import APIRouter, UploadFile, File, Form, HTTPException
from fastapi.responses import StreamingResponse

from functions.file_utils import read_uploaded_file, records_to_excel_bytes, records_to_csv_bytes
from functions.cochran import z_score_from_confidence, cochran_sample_size
from functions.sampling import (
    simple_random_sampling,
    systematic_sampling,
    stratified_sampling,
    cluster_sampling,
    get_cluster_info,
)

router = APIRouter()


# ── Column Inspector ──────────────────────────────────────────────────────────

@router.post("/columns")
async def get_columns(file: UploadFile = File(...)):
    """
    Upload a CSV or XLSX file and return its column names and row count.
    Useful for building the UI before the user configures sampling parameters.
    """
    records, filename = await read_uploaded_file(file)
    columns = list(records[0].keys()) if records else []
    return {
        "ok": True,
        "filename": filename,
        "rows": len(records),
        "columns": columns,
    }


# ── Cluster Inspector ─────────────────────────────────────────────────────────

@router.post("/cluster-info")
async def cluster_info(
    file: UploadFile = File(...),
    cluster_col: str = Form(...),
    min_size: int = Form(0),
):
    """
    Return cluster sizes for a given column in an uploaded file.
    Lets the UI show cluster options before running cluster sampling.
    """
    records, filename = await read_uploaded_file(file)
    if cluster_col not in (records[0].keys() if records else []):
        raise HTTPException(status_code=400, detail=f"Column '{cluster_col}' not found")

    all_clusters = get_cluster_info(records, cluster_col)
    eligible = [c for c in all_clusters if c["count"] >= min_size] if min_size > 0 else all_clusters

    return {
        "ok": True,
        "clusters": all_clusters,
        "eligible": eligible,
        "total": len(all_clusters),
        "eligible_count": len(eligible),
    }


# ── Main Run Endpoint ─────────────────────────────────────────────────────────

@router.post("/run")
async def run_autosampler(
    file: UploadFile = File(...),
    params: str = Form("{}"),
    output_format: str = Form("json"),
):
    """
    Full auto-sampler pipeline:
      1. Parse uploaded file
      2. Compute Cochran sample size
      3. Run the chosen sampling method
      4. Return sampled rows as JSON, XLSX, or CSV

    params JSON schema:
    {
        "cochran": {
            "z": 1.96,          // Z-score (or confidence level if <= 1)
            "p": 0.5,
            "e": 0.05,
            "N": null           // null → use dataset size
        },
        "method": "simple_random",   // simple_random | stratified | cluster | systematic
        "method_params": {
            // stratified: { "strata": [...] }
            // cluster:    { "cluster_column": "...", "mode": "auto", "n_clusters": 5, "min_cluster_size": 10 }
            // systematic: {}
        },
        "random_state": 42,
        "run_by": "Username"
    }
    """
    # Parse file
    records, filename = await read_uploaded_file(file)
    if not records:
        raise HTTPException(status_code=400, detail="Uploaded file contains no data")

    # Parse params
    try:
        p = json.loads(params)
    except Exception:
        raise HTTPException(status_code=400, detail="Invalid params JSON")

    # Cochran
    cochran_p = p.get("cochran", {})
    z = float(cochran_p.get("z", 1.96))
    prop = float(cochran_p.get("p", 0.5))
    e = float(cochran_p.get("e", 0.05))
    N_raw = cochran_p.get("N")
    N = int(N_raw) if N_raw else len(records)

    cochran_result = cochran_sample_size(Z=z, p=prop, E=e, N=N)
    n = cochran_result["recommended_n"]

    # Sampling
    method = p.get("method", "simple_random")
    method_params = p.get("method_params", {})
    random_state = int(p.get("random_state", 42))

    if method == "simple_random":
        result = simple_random_sampling(records, n, random_state)

    elif method == "systematic":
        result = systematic_sampling(records, n, random_state)

    elif method == "stratified":
        strata = method_params.get("strata", [])
        result = stratified_sampling(records, n, strata, random_state)

    elif method == "cluster":
        cluster_col = method_params.get("cluster_column", "")
        if not cluster_col:
            raise HTTPException(status_code=400, detail="cluster_column is required for cluster sampling")
        result = cluster_sampling(
            data=records,
            n=n,
            cluster_column=cluster_col,
            mode=method_params.get("mode", "auto"),
            n_clusters=method_params.get("n_clusters"),
            min_cluster_size=int(method_params.get("min_cluster_size", 0) or 0),
            manual_clusters=method_params.get("manual_clusters"),
            random_state=random_state,
        )
    else:
        raise HTTPException(status_code=400, detail=f"Unknown method: {method}")

    sampled = result["sampled"]
    info = result["info"]

    # Return as JSON
    if output_format == "json":
        return {
            "ok": True,
            "filename": filename,
            "cochran": cochran_result,
            "sampling": info,
            "sampled": sampled,
        }

    # Return as file download
    if output_format == "xlsx":
        content = records_to_excel_bytes(sampled)
        media_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
        ext = "xlsx"
    else:
        content = records_to_csv_bytes(sampled)
        media_type = "text/csv"
        ext = "csv"

    return StreamingResponse(
        io.BytesIO(content),
        media_type=media_type,
        headers={"Content-Disposition": f'attachment; filename="sample.{ext}"'},
    )