from __future__ import annotations from contextlib import asynccontextmanager from fastapi import FastAPI, HTTPException, Query from fastapi.middleware.cors import CORSMiddleware from .models import EdgesResponse, GraphResponse, NodesResponse, SparqlQueryRequest, StatsResponse from .rdf_store import RDFStore from .sparql_engine import AnzoGraphEngine, RdflibEngine, SparqlEngine, create_sparql_engine from .settings import Settings settings = Settings() @asynccontextmanager async def lifespan(app: FastAPI): sparql: SparqlEngine = create_sparql_engine(settings) await sparql.startup() app.state.sparql = sparql # Only build node/edge tables when running in rdflib mode. if settings.graph_backend == "rdflib": assert isinstance(sparql, RdflibEngine) if sparql.graph is None: raise RuntimeError("rdflib graph failed to load") store = RDFStore( ttl_path=settings.ttl_path, include_bnodes=settings.include_bnodes, max_triples=settings.max_triples, ) store.load(sparql.graph) app.state.store = store yield await sparql.shutdown() app = FastAPI(title="visualizador_instanciados backend", lifespan=lifespan) cors_origins = settings.cors_origin_list() app.add_middleware( CORSMiddleware, allow_origins=cors_origins, allow_credentials=False, allow_methods=["*"], allow_headers=["*"], ) @app.get("/api/health") def health() -> dict[str, str]: return {"status": "ok"} @app.get("/api/stats", response_model=StatsResponse) async def stats() -> StatsResponse: sparql: SparqlEngine = app.state.sparql if settings.graph_backend == "rdflib": store: RDFStore = app.state.store return StatsResponse( backend=sparql.name, ttl_path=settings.ttl_path, sparql_endpoint=None, parsed_triples=store.parsed_triples, nodes=store.node_count, edges=store.edge_count, ) # AnzoGraph: compute basic counts via SPARQL. assert isinstance(sparql, AnzoGraphEngine) def _count_from(result: dict, *, var: str = "count") -> int: bindings = (((result.get("results") or {}).get("bindings")) or []) if not bindings: return 0 raw = bindings[0].get(var, {}).get("value") try: return int(raw) except Exception: return 0 bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?n))" nodes_q = f""" SELECT (COUNT(DISTINCT ?n) AS ?count) WHERE {{ {{ ?n ?p ?o }} UNION {{ ?s ?p ?n }} FILTER(!isLiteral(?n)) {bnode_filter} }} """ triples_q = "SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }" # Approximate "edges" similarly to our rdflib export: non-literal object, and skip label predicates. edges_bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))" edges_q = f""" SELECT (COUNT(*) AS ?count) WHERE {{ ?s ?p ?o . FILTER(!isLiteral(?o)) FILTER(?p NOT IN ( , , )) {edges_bnode_filter} }} """ triples_res = await sparql.query_json(triples_q) nodes_res = await sparql.query_json(nodes_q) edges_res = await sparql.query_json(edges_q) return StatsResponse( backend=sparql.name, ttl_path=settings.ttl_path, sparql_endpoint=settings.effective_sparql_endpoint(), parsed_triples=_count_from(triples_res), nodes=_count_from(nodes_res), edges=_count_from(edges_res), ) @app.post("/api/sparql") async def sparql_query(req: SparqlQueryRequest) -> dict: sparql: SparqlEngine = app.state.sparql data = await sparql.query_json(req.query) return data @app.get("/api/nodes", response_model=NodesResponse) def nodes( limit: int = Query(default=10_000, ge=1, le=200_000), offset: int = Query(default=0, ge=0), ) -> NodesResponse: if settings.graph_backend != "rdflib": raise HTTPException(status_code=501, detail="GET /api/nodes is only supported in GRAPH_BACKEND=rdflib mode") store: RDFStore = app.state.store return NodesResponse(total=store.node_count, nodes=store.node_slice(offset=offset, limit=limit)) @app.get("/api/edges", response_model=EdgesResponse) def edges( limit: int = Query(default=50_000, ge=1, le=500_000), offset: int = Query(default=0, ge=0), ) -> EdgesResponse: if settings.graph_backend != "rdflib": raise HTTPException(status_code=501, detail="GET /api/edges is only supported in GRAPH_BACKEND=rdflib mode") store: RDFStore = app.state.store return EdgesResponse(total=store.edge_count, edges=store.edge_slice(offset=offset, limit=limit)) @app.get("/api/graph", response_model=GraphResponse) async def graph( node_limit: int = Query(default=50_000, ge=1, le=200_000), edge_limit: int = Query(default=100_000, ge=1, le=500_000), ) -> GraphResponse: sparql: SparqlEngine = app.state.sparql if settings.graph_backend == "rdflib": store: RDFStore = app.state.store return GraphResponse( nodes=store.node_slice(offset=0, limit=node_limit), edges=store.edge_slice(offset=0, limit=edge_limit), ) # AnzoGraph mode: return a simple subgraph by pulling the first N triples. assert isinstance(sparql, AnzoGraphEngine) edges_bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))" edges_q = f""" SELECT ?s ?p ?o WHERE {{ ?s ?p ?o . FILTER(!isLiteral(?o)) FILTER(?p NOT IN ( , , )) {edges_bnode_filter} }} LIMIT {edge_limit} """ res = await sparql.query_json(edges_q) bindings = (((res.get("results") or {}).get("bindings")) or []) node_id_by_key: dict[tuple[str, str], int] = {} node_meta: list[tuple[str, str]] = [] # (termType, iri) out_edges: list[dict[str, object]] = [] def _term_to_key_and_iri(term: dict[str, str]) -> tuple[tuple[str, str], tuple[str, str]] | None: t = term.get("type") v = term.get("value") if not t or v is None: return None if t == "literal": return None if t == "bnode" and not settings.include_bnodes: return None if t == "bnode": return (("bnode", v), ("bnode", f"_:{v}")) # Default to "uri". return (("uri", v), ("uri", v)) def _get_or_add(term: dict[str, str]) -> int | None: out = _term_to_key_and_iri(term) if out is None: return None key, meta = out existing = node_id_by_key.get(key) if existing is not None: return existing if len(node_meta) >= node_limit: return None nid = len(node_meta) node_id_by_key[key] = nid node_meta.append(meta) return nid for b in bindings: s_term = b.get("s") or {} o_term = b.get("o") or {} p_term = b.get("p") or {} sid = _get_or_add(s_term) oid = _get_or_add(o_term) if sid is None or oid is None: continue pred = p_term.get("value") if not pred: continue out_edges.append({"source": sid, "target": oid, "predicate": pred}) out_nodes = [ {"id": i, "termType": term_type, "iri": iri, "label": None} for i, (term_type, iri) in enumerate(node_meta) ] return GraphResponse(nodes=out_nodes, edges=out_edges)