Reorganiza backend
This commit is contained in:
@@ -5,10 +5,10 @@ from contextlib import asynccontextmanager
|
||||
from fastapi import FastAPI, HTTPException, Query
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from .graph_export import edge_retrieval_query, graph_from_sparql_bindings
|
||||
from .models import EdgesResponse, GraphResponse, NodesResponse, SparqlQueryRequest, StatsResponse
|
||||
from .pipelines.snapshot_service import GraphSnapshotService
|
||||
from .rdf_store import RDFStore
|
||||
from .sparql_engine import AnzoGraphEngine, RdflibEngine, SparqlEngine, create_sparql_engine
|
||||
from .sparql_engine import RdflibEngine, SparqlEngine, create_sparql_engine
|
||||
from .settings import Settings
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ async def lifespan(app: FastAPI):
|
||||
sparql: SparqlEngine = create_sparql_engine(settings)
|
||||
await sparql.startup()
|
||||
app.state.sparql = sparql
|
||||
app.state.snapshot_service = GraphSnapshotService(sparql=sparql, settings=settings)
|
||||
|
||||
# Only build node/edge tables when running in rdflib mode.
|
||||
if settings.graph_backend == "rdflib":
|
||||
@@ -59,70 +60,17 @@ def health() -> dict[str, str]:
|
||||
|
||||
@app.get("/api/stats", response_model=StatsResponse)
|
||||
async def stats() -> StatsResponse:
|
||||
sparql: SparqlEngine = app.state.sparql
|
||||
|
||||
if settings.graph_backend == "rdflib":
|
||||
store: RDFStore = app.state.store
|
||||
return StatsResponse(
|
||||
backend=sparql.name,
|
||||
ttl_path=settings.ttl_path,
|
||||
sparql_endpoint=None,
|
||||
parsed_triples=store.parsed_triples,
|
||||
nodes=store.node_count,
|
||||
edges=store.edge_count,
|
||||
)
|
||||
|
||||
# AnzoGraph: compute basic counts via SPARQL.
|
||||
assert isinstance(sparql, AnzoGraphEngine)
|
||||
|
||||
def _count_from(result: dict, *, var: str = "count") -> int:
|
||||
bindings = (((result.get("results") or {}).get("bindings")) or [])
|
||||
if not bindings:
|
||||
return 0
|
||||
raw = bindings[0].get(var, {}).get("value")
|
||||
try:
|
||||
return int(raw)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?n))"
|
||||
nodes_q = f"""
|
||||
SELECT (COUNT(DISTINCT ?n) AS ?count)
|
||||
WHERE {{
|
||||
{{ ?n ?p ?o }} UNION {{ ?s ?p ?n }}
|
||||
FILTER(!isLiteral(?n))
|
||||
{bnode_filter}
|
||||
}}
|
||||
"""
|
||||
triples_q = "SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }"
|
||||
|
||||
# Approximate "edges" similarly to our rdflib export: non-literal object, and skip label predicates.
|
||||
edges_bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))"
|
||||
edges_q = f"""
|
||||
SELECT (COUNT(*) AS ?count)
|
||||
WHERE {{
|
||||
?s ?p ?o .
|
||||
FILTER(!isLiteral(?o))
|
||||
FILTER(?p NOT IN (
|
||||
<http://www.w3.org/2000/01/rdf-schema#label>,
|
||||
<http://www.w3.org/2004/02/skos/core#prefLabel>,
|
||||
<http://www.w3.org/2004/02/skos/core#altLabel>
|
||||
))
|
||||
{edges_bnode_filter}
|
||||
}}
|
||||
"""
|
||||
|
||||
triples_res = await sparql.query_json(triples_q)
|
||||
nodes_res = await sparql.query_json(nodes_q)
|
||||
edges_res = await sparql.query_json(edges_q)
|
||||
|
||||
# Stats reflect exactly what we send to the frontend (/api/graph), not global graph size.
|
||||
svc: GraphSnapshotService = app.state.snapshot_service
|
||||
snap = await svc.get(node_limit=50_000, edge_limit=100_000)
|
||||
meta = snap.meta
|
||||
return StatsResponse(
|
||||
backend=sparql.name,
|
||||
ttl_path=settings.ttl_path,
|
||||
sparql_endpoint=settings.effective_sparql_endpoint(),
|
||||
parsed_triples=_count_from(triples_res),
|
||||
nodes=_count_from(nodes_res),
|
||||
edges=_count_from(edges_res),
|
||||
backend=meta.backend if meta else app.state.sparql.name,
|
||||
ttl_path=meta.ttl_path if meta and meta.ttl_path else settings.ttl_path,
|
||||
sparql_endpoint=meta.sparql_endpoint if meta else None,
|
||||
parsed_triples=len(snap.edges),
|
||||
nodes=len(snap.nodes),
|
||||
edges=len(snap.edges),
|
||||
)
|
||||
|
||||
|
||||
@@ -160,15 +108,5 @@ async def graph(
|
||||
node_limit: int = Query(default=50_000, ge=1, le=200_000),
|
||||
edge_limit: int = Query(default=100_000, ge=1, le=500_000),
|
||||
) -> GraphResponse:
|
||||
sparql: SparqlEngine = app.state.sparql
|
||||
|
||||
# Use SPARQL for graph export in BOTH modes so callers don't care which backend is in use.
|
||||
edges_q = edge_retrieval_query(edge_limit=edge_limit, include_bnodes=settings.include_bnodes)
|
||||
res = await sparql.query_json(edges_q)
|
||||
bindings = (((res.get("results") or {}).get("bindings")) or [])
|
||||
nodes, edges = graph_from_sparql_bindings(
|
||||
bindings,
|
||||
node_limit=node_limit,
|
||||
include_bnodes=settings.include_bnodes,
|
||||
)
|
||||
return GraphResponse(nodes=nodes, edges=edges)
|
||||
svc: GraphSnapshotService = app.state.snapshot_service
|
||||
return await svc.get(node_limit=node_limit, edge_limit=edge_limit)
|
||||
|
||||
Reference in New Issue
Block a user