visualizador_instanciados/backend/app/main.py

from __future__ import annotations

from contextlib import asynccontextmanager

from fastapi import FastAPI, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware

from .graph_export import edge_retrieval_query, graph_from_sparql_bindings
from .models import EdgesResponse, GraphResponse, NodesResponse, SparqlQueryRequest, StatsResponse
from .rdf_store import RDFStore
from .sparql_engine import AnzoGraphEngine, RdflibEngine, SparqlEngine, create_sparql_engine
from .settings import Settings


settings = Settings()


@asynccontextmanager
async def lifespan(app: FastAPI):
    sparql: SparqlEngine = create_sparql_engine(settings)
    await sparql.startup()
    app.state.sparql = sparql

    # Only build node/edge tables when running in rdflib mode.
    if settings.graph_backend == "rdflib":
        assert isinstance(sparql, RdflibEngine)
        if sparql.graph is None:
            raise RuntimeError("rdflib graph failed to load")

        store = RDFStore(
            ttl_path=settings.ttl_path,
            include_bnodes=settings.include_bnodes,
            max_triples=settings.max_triples,
        )
        store.load(sparql.graph)
        app.state.store = store

    yield

    await sparql.shutdown()


app = FastAPI(title="visualizador_instanciados backend", lifespan=lifespan)

cors_origins = settings.cors_origin_list()
app.add_middleware(
    CORSMiddleware,
    allow_origins=cors_origins,
    allow_credentials=False,
    allow_methods=["*"],
    allow_headers=["*"],
)


@app.get("/api/health")
def health() -> dict[str, str]:
    return {"status": "ok"}


@app.get("/api/stats", response_model=StatsResponse)
async def stats() -> StatsResponse:
    sparql: SparqlEngine = app.state.sparql

    if settings.graph_backend == "rdflib":
        store: RDFStore = app.state.store
        return StatsResponse(
            backend=sparql.name,
            ttl_path=settings.ttl_path,
            sparql_endpoint=None,
            parsed_triples=store.parsed_triples,
            nodes=store.node_count,
            edges=store.edge_count,
        )

    # AnzoGraph: compute basic counts via SPARQL.
    assert isinstance(sparql, AnzoGraphEngine)

    def _count_from(result: dict, *, var: str = "count") -> int:
        bindings = (((result.get("results") or {}).get("bindings")) or [])
        if not bindings:
            return 0
        raw = bindings[0].get(var, {}).get("value")
        try:
            return int(raw)
        except Exception:
            return 0

    bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?n))"
    nodes_q = f"""
SELECT (COUNT(DISTINCT ?n) AS ?count)
WHERE {{
  {{ ?n ?p ?o }} UNION {{ ?s ?p ?n }}
  FILTER(!isLiteral(?n))
  {bnode_filter}
}}
"""
    triples_q = "SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }"

    # Approximate "edges" similarly to our rdflib export: non-literal object, and skip label predicates.
    edges_bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))"
    edges_q = f"""
SELECT (COUNT(*) AS ?count)
WHERE {{
  ?s ?p ?o .
  FILTER(!isLiteral(?o))
  FILTER(?p NOT IN (
    <http://www.w3.org/2000/01/rdf-schema#label>,
    <http://www.w3.org/2004/02/skos/core#prefLabel>,
    <http://www.w3.org/2004/02/skos/core#altLabel>
  ))
  {edges_bnode_filter}
}}
"""

    triples_res = await sparql.query_json(triples_q)
    nodes_res = await sparql.query_json(nodes_q)
    edges_res = await sparql.query_json(edges_q)

    return StatsResponse(
        backend=sparql.name,
        ttl_path=settings.ttl_path,
        sparql_endpoint=settings.effective_sparql_endpoint(),
        parsed_triples=_count_from(triples_res),
        nodes=_count_from(nodes_res),
        edges=_count_from(edges_res),
    )


@app.post("/api/sparql")
async def sparql_query(req: SparqlQueryRequest) -> dict:
    sparql: SparqlEngine = app.state.sparql
    data = await sparql.query_json(req.query)
    return data


@app.get("/api/nodes", response_model=NodesResponse)
def nodes(
    limit: int = Query(default=10_000, ge=1, le=200_000),
    offset: int = Query(default=0, ge=0),
) -> NodesResponse:
    if settings.graph_backend != "rdflib":
        raise HTTPException(status_code=501, detail="GET /api/nodes is only supported in GRAPH_BACKEND=rdflib mode")
    store: RDFStore = app.state.store
    return NodesResponse(total=store.node_count, nodes=store.node_slice(offset=offset, limit=limit))


@app.get("/api/edges", response_model=EdgesResponse)
def edges(
    limit: int = Query(default=50_000, ge=1, le=500_000),
    offset: int = Query(default=0, ge=0),
) -> EdgesResponse:
    if settings.graph_backend != "rdflib":
        raise HTTPException(status_code=501, detail="GET /api/edges is only supported in GRAPH_BACKEND=rdflib mode")
    store: RDFStore = app.state.store
    return EdgesResponse(total=store.edge_count, edges=store.edge_slice(offset=offset, limit=limit))


@app.get("/api/graph", response_model=GraphResponse)
async def graph(
    node_limit: int = Query(default=50_000, ge=1, le=200_000),
    edge_limit: int = Query(default=100_000, ge=1, le=500_000),
) -> GraphResponse:
    sparql: SparqlEngine = app.state.sparql

    # Use SPARQL for graph export in BOTH modes so callers don't care which backend is in use.
    edges_q = edge_retrieval_query(edge_limit=edge_limit, include_bnodes=settings.include_bnodes)
    res = await sparql.query_json(edges_q)
    bindings = (((res.get("results") or {}).get("bindings")) or [])
    nodes, edges = graph_from_sparql_bindings(
        bindings,
        node_limit=node_limit,
        include_bnodes=settings.include_bnodes,
    )
    return GraphResponse(nodes=nodes, edges=edges)