Reorganiza backend

This commit is contained in:
Oxy8
2026-03-02 17:33:45 -03:00
parent bba0ae887d
commit d4bfa5f064
8 changed files with 419 additions and 124 deletions

View File

@@ -5,10 +5,10 @@ from contextlib import asynccontextmanager
from fastapi import FastAPI, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware
from .graph_export import edge_retrieval_query, graph_from_sparql_bindings
from .models import EdgesResponse, GraphResponse, NodesResponse, SparqlQueryRequest, StatsResponse
from .pipelines.snapshot_service import GraphSnapshotService
from .rdf_store import RDFStore
from .sparql_engine import AnzoGraphEngine, RdflibEngine, SparqlEngine, create_sparql_engine
from .sparql_engine import RdflibEngine, SparqlEngine, create_sparql_engine
from .settings import Settings
@@ -20,6 +20,7 @@ async def lifespan(app: FastAPI):
sparql: SparqlEngine = create_sparql_engine(settings)
await sparql.startup()
app.state.sparql = sparql
app.state.snapshot_service = GraphSnapshotService(sparql=sparql, settings=settings)
# Only build node/edge tables when running in rdflib mode.
if settings.graph_backend == "rdflib":
@@ -59,70 +60,17 @@ def health() -> dict[str, str]:
@app.get("/api/stats", response_model=StatsResponse)
async def stats() -> StatsResponse:
sparql: SparqlEngine = app.state.sparql
if settings.graph_backend == "rdflib":
store: RDFStore = app.state.store
return StatsResponse(
backend=sparql.name,
ttl_path=settings.ttl_path,
sparql_endpoint=None,
parsed_triples=store.parsed_triples,
nodes=store.node_count,
edges=store.edge_count,
)
# AnzoGraph: compute basic counts via SPARQL.
assert isinstance(sparql, AnzoGraphEngine)
def _count_from(result: dict, *, var: str = "count") -> int:
bindings = (((result.get("results") or {}).get("bindings")) or [])
if not bindings:
return 0
raw = bindings[0].get(var, {}).get("value")
try:
return int(raw)
except Exception:
return 0
bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?n))"
nodes_q = f"""
SELECT (COUNT(DISTINCT ?n) AS ?count)
WHERE {{
{{ ?n ?p ?o }} UNION {{ ?s ?p ?n }}
FILTER(!isLiteral(?n))
{bnode_filter}
}}
"""
triples_q = "SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }"
# Approximate "edges" similarly to our rdflib export: non-literal object, and skip label predicates.
edges_bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))"
edges_q = f"""
SELECT (COUNT(*) AS ?count)
WHERE {{
?s ?p ?o .
FILTER(!isLiteral(?o))
FILTER(?p NOT IN (
<http://www.w3.org/2000/01/rdf-schema#label>,
<http://www.w3.org/2004/02/skos/core#prefLabel>,
<http://www.w3.org/2004/02/skos/core#altLabel>
))
{edges_bnode_filter}
}}
"""
triples_res = await sparql.query_json(triples_q)
nodes_res = await sparql.query_json(nodes_q)
edges_res = await sparql.query_json(edges_q)
# Stats reflect exactly what we send to the frontend (/api/graph), not global graph size.
svc: GraphSnapshotService = app.state.snapshot_service
snap = await svc.get(node_limit=50_000, edge_limit=100_000)
meta = snap.meta
return StatsResponse(
backend=sparql.name,
ttl_path=settings.ttl_path,
sparql_endpoint=settings.effective_sparql_endpoint(),
parsed_triples=_count_from(triples_res),
nodes=_count_from(nodes_res),
edges=_count_from(edges_res),
backend=meta.backend if meta else app.state.sparql.name,
ttl_path=meta.ttl_path if meta and meta.ttl_path else settings.ttl_path,
sparql_endpoint=meta.sparql_endpoint if meta else None,
parsed_triples=len(snap.edges),
nodes=len(snap.nodes),
edges=len(snap.edges),
)
@@ -160,15 +108,5 @@ async def graph(
node_limit: int = Query(default=50_000, ge=1, le=200_000),
edge_limit: int = Query(default=100_000, ge=1, le=500_000),
) -> GraphResponse:
sparql: SparqlEngine = app.state.sparql
# Use SPARQL for graph export in BOTH modes so callers don't care which backend is in use.
edges_q = edge_retrieval_query(edge_limit=edge_limit, include_bnodes=settings.include_bnodes)
res = await sparql.query_json(edges_q)
bindings = (((res.get("results") or {}).get("bindings")) or [])
nodes, edges = graph_from_sparql_bindings(
bindings,
node_limit=node_limit,
include_bnodes=settings.include_bnodes,
)
return GraphResponse(nodes=nodes, edges=edges)
svc: GraphSnapshotService = app.state.snapshot_service
return await svc.get(node_limit=node_limit, edge_limit=edge_limit)