248 lines
7.6 KiB
Python
248 lines
7.6 KiB
Python
from __future__ import annotations
|
|
|
|
from contextlib import asynccontextmanager
|
|
|
|
from fastapi import FastAPI, HTTPException, Query
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
from .models import EdgesResponse, GraphResponse, NodesResponse, SparqlQueryRequest, StatsResponse
|
|
from .rdf_store import RDFStore
|
|
from .sparql_engine import AnzoGraphEngine, RdflibEngine, SparqlEngine, create_sparql_engine
|
|
from .settings import Settings
|
|
|
|
|
|
settings = Settings()
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
sparql: SparqlEngine = create_sparql_engine(settings)
|
|
await sparql.startup()
|
|
app.state.sparql = sparql
|
|
|
|
# Only build node/edge tables when running in rdflib mode.
|
|
if settings.graph_backend == "rdflib":
|
|
assert isinstance(sparql, RdflibEngine)
|
|
if sparql.graph is None:
|
|
raise RuntimeError("rdflib graph failed to load")
|
|
|
|
store = RDFStore(
|
|
ttl_path=settings.ttl_path,
|
|
include_bnodes=settings.include_bnodes,
|
|
max_triples=settings.max_triples,
|
|
)
|
|
store.load(sparql.graph)
|
|
app.state.store = store
|
|
|
|
yield
|
|
|
|
await sparql.shutdown()
|
|
|
|
|
|
app = FastAPI(title="visualizador_instanciados backend", lifespan=lifespan)
|
|
|
|
cors_origins = settings.cors_origin_list()
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=cors_origins,
|
|
allow_credentials=False,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
|
|
@app.get("/api/health")
|
|
def health() -> dict[str, str]:
|
|
return {"status": "ok"}
|
|
|
|
|
|
@app.get("/api/stats", response_model=StatsResponse)
|
|
async def stats() -> StatsResponse:
|
|
sparql: SparqlEngine = app.state.sparql
|
|
|
|
if settings.graph_backend == "rdflib":
|
|
store: RDFStore = app.state.store
|
|
return StatsResponse(
|
|
backend=sparql.name,
|
|
ttl_path=settings.ttl_path,
|
|
sparql_endpoint=None,
|
|
parsed_triples=store.parsed_triples,
|
|
nodes=store.node_count,
|
|
edges=store.edge_count,
|
|
)
|
|
|
|
# AnzoGraph: compute basic counts via SPARQL.
|
|
assert isinstance(sparql, AnzoGraphEngine)
|
|
|
|
def _count_from(result: dict, *, var: str = "count") -> int:
|
|
bindings = (((result.get("results") or {}).get("bindings")) or [])
|
|
if not bindings:
|
|
return 0
|
|
raw = bindings[0].get(var, {}).get("value")
|
|
try:
|
|
return int(raw)
|
|
except Exception:
|
|
return 0
|
|
|
|
bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?n))"
|
|
nodes_q = f"""
|
|
SELECT (COUNT(DISTINCT ?n) AS ?count)
|
|
WHERE {{
|
|
{{ ?n ?p ?o }} UNION {{ ?s ?p ?n }}
|
|
FILTER(!isLiteral(?n))
|
|
{bnode_filter}
|
|
}}
|
|
"""
|
|
triples_q = "SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }"
|
|
|
|
# Approximate "edges" similarly to our rdflib export: non-literal object, and skip label predicates.
|
|
edges_bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))"
|
|
edges_q = f"""
|
|
SELECT (COUNT(*) AS ?count)
|
|
WHERE {{
|
|
?s ?p ?o .
|
|
FILTER(!isLiteral(?o))
|
|
FILTER(?p NOT IN (
|
|
<http://www.w3.org/2000/01/rdf-schema#label>,
|
|
<http://www.w3.org/2004/02/skos/core#prefLabel>,
|
|
<http://www.w3.org/2004/02/skos/core#altLabel>
|
|
))
|
|
{edges_bnode_filter}
|
|
}}
|
|
"""
|
|
|
|
triples_res = await sparql.query_json(triples_q)
|
|
nodes_res = await sparql.query_json(nodes_q)
|
|
edges_res = await sparql.query_json(edges_q)
|
|
|
|
return StatsResponse(
|
|
backend=sparql.name,
|
|
ttl_path=settings.ttl_path,
|
|
sparql_endpoint=settings.effective_sparql_endpoint(),
|
|
parsed_triples=_count_from(triples_res),
|
|
nodes=_count_from(nodes_res),
|
|
edges=_count_from(edges_res),
|
|
)
|
|
|
|
|
|
@app.post("/api/sparql")
|
|
async def sparql_query(req: SparqlQueryRequest) -> dict:
|
|
sparql: SparqlEngine = app.state.sparql
|
|
data = await sparql.query_json(req.query)
|
|
return data
|
|
|
|
|
|
@app.get("/api/nodes", response_model=NodesResponse)
|
|
def nodes(
|
|
limit: int = Query(default=10_000, ge=1, le=200_000),
|
|
offset: int = Query(default=0, ge=0),
|
|
) -> NodesResponse:
|
|
if settings.graph_backend != "rdflib":
|
|
raise HTTPException(status_code=501, detail="GET /api/nodes is only supported in GRAPH_BACKEND=rdflib mode")
|
|
store: RDFStore = app.state.store
|
|
return NodesResponse(total=store.node_count, nodes=store.node_slice(offset=offset, limit=limit))
|
|
|
|
|
|
@app.get("/api/edges", response_model=EdgesResponse)
|
|
def edges(
|
|
limit: int = Query(default=50_000, ge=1, le=500_000),
|
|
offset: int = Query(default=0, ge=0),
|
|
) -> EdgesResponse:
|
|
if settings.graph_backend != "rdflib":
|
|
raise HTTPException(status_code=501, detail="GET /api/edges is only supported in GRAPH_BACKEND=rdflib mode")
|
|
store: RDFStore = app.state.store
|
|
return EdgesResponse(total=store.edge_count, edges=store.edge_slice(offset=offset, limit=limit))
|
|
|
|
|
|
@app.get("/api/graph", response_model=GraphResponse)
|
|
async def graph(
|
|
node_limit: int = Query(default=50_000, ge=1, le=200_000),
|
|
edge_limit: int = Query(default=100_000, ge=1, le=500_000),
|
|
) -> GraphResponse:
|
|
sparql: SparqlEngine = app.state.sparql
|
|
|
|
if settings.graph_backend == "rdflib":
|
|
store: RDFStore = app.state.store
|
|
return GraphResponse(
|
|
nodes=store.node_slice(offset=0, limit=node_limit),
|
|
edges=store.edge_slice(offset=0, limit=edge_limit),
|
|
)
|
|
|
|
# AnzoGraph mode: return a simple subgraph by pulling the first N triples.
|
|
assert isinstance(sparql, AnzoGraphEngine)
|
|
|
|
edges_bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))"
|
|
edges_q = f"""
|
|
SELECT ?s ?p ?o
|
|
WHERE {{
|
|
?s ?p ?o .
|
|
FILTER(!isLiteral(?o))
|
|
FILTER(?p NOT IN (
|
|
<http://www.w3.org/2000/01/rdf-schema#label>,
|
|
<http://www.w3.org/2004/02/skos/core#prefLabel>,
|
|
<http://www.w3.org/2004/02/skos/core#altLabel>
|
|
))
|
|
{edges_bnode_filter}
|
|
}}
|
|
LIMIT {edge_limit}
|
|
"""
|
|
|
|
res = await sparql.query_json(edges_q)
|
|
bindings = (((res.get("results") or {}).get("bindings")) or [])
|
|
|
|
node_id_by_key: dict[tuple[str, str], int] = {}
|
|
node_meta: list[tuple[str, str]] = [] # (termType, iri)
|
|
out_edges: list[dict[str, object]] = []
|
|
|
|
def _term_to_key_and_iri(term: dict[str, str]) -> tuple[tuple[str, str], tuple[str, str]] | None:
|
|
t = term.get("type")
|
|
v = term.get("value")
|
|
if not t or v is None:
|
|
return None
|
|
if t == "literal":
|
|
return None
|
|
if t == "bnode" and not settings.include_bnodes:
|
|
return None
|
|
if t == "bnode":
|
|
return (("bnode", v), ("bnode", f"_:{v}"))
|
|
# Default to "uri".
|
|
return (("uri", v), ("uri", v))
|
|
|
|
def _get_or_add(term: dict[str, str]) -> int | None:
|
|
out = _term_to_key_and_iri(term)
|
|
if out is None:
|
|
return None
|
|
key, meta = out
|
|
existing = node_id_by_key.get(key)
|
|
if existing is not None:
|
|
return existing
|
|
if len(node_meta) >= node_limit:
|
|
return None
|
|
nid = len(node_meta)
|
|
node_id_by_key[key] = nid
|
|
node_meta.append(meta)
|
|
return nid
|
|
|
|
for b in bindings:
|
|
s_term = b.get("s") or {}
|
|
o_term = b.get("o") or {}
|
|
p_term = b.get("p") or {}
|
|
|
|
sid = _get_or_add(s_term)
|
|
oid = _get_or_add(o_term)
|
|
if sid is None or oid is None:
|
|
continue
|
|
|
|
pred = p_term.get("value")
|
|
if not pred:
|
|
continue
|
|
|
|
out_edges.append({"source": sid, "target": oid, "predicate": pred})
|
|
|
|
out_nodes = [
|
|
{"id": i, "termType": term_type, "iri": iri, "label": None}
|
|
for i, (term_type, iri) in enumerate(node_meta)
|
|
]
|
|
|
|
return GraphResponse(nodes=out_nodes, edges=out_edges)
|