Files
visualizador_instanciados/backend/app/main.py
2026-03-02 14:32:42 -03:00

248 lines
7.6 KiB
Python

from __future__ import annotations
from contextlib import asynccontextmanager
from fastapi import FastAPI, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware
from .models import EdgesResponse, GraphResponse, NodesResponse, SparqlQueryRequest, StatsResponse
from .rdf_store import RDFStore
from .sparql_engine import AnzoGraphEngine, RdflibEngine, SparqlEngine, create_sparql_engine
from .settings import Settings
settings = Settings()
@asynccontextmanager
async def lifespan(app: FastAPI):
sparql: SparqlEngine = create_sparql_engine(settings)
await sparql.startup()
app.state.sparql = sparql
# Only build node/edge tables when running in rdflib mode.
if settings.graph_backend == "rdflib":
assert isinstance(sparql, RdflibEngine)
if sparql.graph is None:
raise RuntimeError("rdflib graph failed to load")
store = RDFStore(
ttl_path=settings.ttl_path,
include_bnodes=settings.include_bnodes,
max_triples=settings.max_triples,
)
store.load(sparql.graph)
app.state.store = store
yield
await sparql.shutdown()
app = FastAPI(title="visualizador_instanciados backend", lifespan=lifespan)
cors_origins = settings.cors_origin_list()
app.add_middleware(
CORSMiddleware,
allow_origins=cors_origins,
allow_credentials=False,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/api/health")
def health() -> dict[str, str]:
return {"status": "ok"}
@app.get("/api/stats", response_model=StatsResponse)
async def stats() -> StatsResponse:
sparql: SparqlEngine = app.state.sparql
if settings.graph_backend == "rdflib":
store: RDFStore = app.state.store
return StatsResponse(
backend=sparql.name,
ttl_path=settings.ttl_path,
sparql_endpoint=None,
parsed_triples=store.parsed_triples,
nodes=store.node_count,
edges=store.edge_count,
)
# AnzoGraph: compute basic counts via SPARQL.
assert isinstance(sparql, AnzoGraphEngine)
def _count_from(result: dict, *, var: str = "count") -> int:
bindings = (((result.get("results") or {}).get("bindings")) or [])
if not bindings:
return 0
raw = bindings[0].get(var, {}).get("value")
try:
return int(raw)
except Exception:
return 0
bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?n))"
nodes_q = f"""
SELECT (COUNT(DISTINCT ?n) AS ?count)
WHERE {{
{{ ?n ?p ?o }} UNION {{ ?s ?p ?n }}
FILTER(!isLiteral(?n))
{bnode_filter}
}}
"""
triples_q = "SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }"
# Approximate "edges" similarly to our rdflib export: non-literal object, and skip label predicates.
edges_bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))"
edges_q = f"""
SELECT (COUNT(*) AS ?count)
WHERE {{
?s ?p ?o .
FILTER(!isLiteral(?o))
FILTER(?p NOT IN (
<http://www.w3.org/2000/01/rdf-schema#label>,
<http://www.w3.org/2004/02/skos/core#prefLabel>,
<http://www.w3.org/2004/02/skos/core#altLabel>
))
{edges_bnode_filter}
}}
"""
triples_res = await sparql.query_json(triples_q)
nodes_res = await sparql.query_json(nodes_q)
edges_res = await sparql.query_json(edges_q)
return StatsResponse(
backend=sparql.name,
ttl_path=settings.ttl_path,
sparql_endpoint=settings.effective_sparql_endpoint(),
parsed_triples=_count_from(triples_res),
nodes=_count_from(nodes_res),
edges=_count_from(edges_res),
)
@app.post("/api/sparql")
async def sparql_query(req: SparqlQueryRequest) -> dict:
sparql: SparqlEngine = app.state.sparql
data = await sparql.query_json(req.query)
return data
@app.get("/api/nodes", response_model=NodesResponse)
def nodes(
limit: int = Query(default=10_000, ge=1, le=200_000),
offset: int = Query(default=0, ge=0),
) -> NodesResponse:
if settings.graph_backend != "rdflib":
raise HTTPException(status_code=501, detail="GET /api/nodes is only supported in GRAPH_BACKEND=rdflib mode")
store: RDFStore = app.state.store
return NodesResponse(total=store.node_count, nodes=store.node_slice(offset=offset, limit=limit))
@app.get("/api/edges", response_model=EdgesResponse)
def edges(
limit: int = Query(default=50_000, ge=1, le=500_000),
offset: int = Query(default=0, ge=0),
) -> EdgesResponse:
if settings.graph_backend != "rdflib":
raise HTTPException(status_code=501, detail="GET /api/edges is only supported in GRAPH_BACKEND=rdflib mode")
store: RDFStore = app.state.store
return EdgesResponse(total=store.edge_count, edges=store.edge_slice(offset=offset, limit=limit))
@app.get("/api/graph", response_model=GraphResponse)
async def graph(
node_limit: int = Query(default=50_000, ge=1, le=200_000),
edge_limit: int = Query(default=100_000, ge=1, le=500_000),
) -> GraphResponse:
sparql: SparqlEngine = app.state.sparql
if settings.graph_backend == "rdflib":
store: RDFStore = app.state.store
return GraphResponse(
nodes=store.node_slice(offset=0, limit=node_limit),
edges=store.edge_slice(offset=0, limit=edge_limit),
)
# AnzoGraph mode: return a simple subgraph by pulling the first N triples.
assert isinstance(sparql, AnzoGraphEngine)
edges_bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))"
edges_q = f"""
SELECT ?s ?p ?o
WHERE {{
?s ?p ?o .
FILTER(!isLiteral(?o))
FILTER(?p NOT IN (
<http://www.w3.org/2000/01/rdf-schema#label>,
<http://www.w3.org/2004/02/skos/core#prefLabel>,
<http://www.w3.org/2004/02/skos/core#altLabel>
))
{edges_bnode_filter}
}}
LIMIT {edge_limit}
"""
res = await sparql.query_json(edges_q)
bindings = (((res.get("results") or {}).get("bindings")) or [])
node_id_by_key: dict[tuple[str, str], int] = {}
node_meta: list[tuple[str, str]] = [] # (termType, iri)
out_edges: list[dict[str, object]] = []
def _term_to_key_and_iri(term: dict[str, str]) -> tuple[tuple[str, str], tuple[str, str]] | None:
t = term.get("type")
v = term.get("value")
if not t or v is None:
return None
if t == "literal":
return None
if t == "bnode" and not settings.include_bnodes:
return None
if t == "bnode":
return (("bnode", v), ("bnode", f"_:{v}"))
# Default to "uri".
return (("uri", v), ("uri", v))
def _get_or_add(term: dict[str, str]) -> int | None:
out = _term_to_key_and_iri(term)
if out is None:
return None
key, meta = out
existing = node_id_by_key.get(key)
if existing is not None:
return existing
if len(node_meta) >= node_limit:
return None
nid = len(node_meta)
node_id_by_key[key] = nid
node_meta.append(meta)
return nid
for b in bindings:
s_term = b.get("s") or {}
o_term = b.get("o") or {}
p_term = b.get("p") or {}
sid = _get_or_add(s_term)
oid = _get_or_add(o_term)
if sid is None or oid is None:
continue
pred = p_term.get("value")
if not pred:
continue
out_edges.append({"source": sid, "target": oid, "predicate": pred})
out_nodes = [
{"id": i, "termType": term_type, "iri": iri, "label": None}
for i, (term_type, iri) in enumerate(node_meta)
]
return GraphResponse(nodes=out_nodes, edges=out_edges)