149 lines
4.9 KiB
Python
149 lines
4.9 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
|
|
from ..graph_export import edge_retrieval_query, graph_from_sparql_bindings
|
|
from ..models import GraphResponse
|
|
from ..sparql_engine import SparqlEngine
|
|
from ..settings import Settings
|
|
from .layout_dag_radial import CycleError, level_synchronous_kahn_layers, radial_positions_from_layers
|
|
|
|
|
|
RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
|
|
|
|
|
|
def _bindings(res: dict[str, Any]) -> list[dict[str, Any]]:
|
|
return (((res.get("results") or {}).get("bindings")) or [])
|
|
|
|
|
|
def _label_score(label_binding: dict[str, Any]) -> int:
|
|
# Prefer English, then no-language, then anything else.
|
|
lang = (label_binding.get("xml:lang") or "").lower()
|
|
if lang == "en":
|
|
return 3
|
|
if lang == "":
|
|
return 2
|
|
return 1
|
|
|
|
|
|
async def _fetch_rdfs_labels_for_iris(
|
|
sparql: SparqlEngine,
|
|
iris: list[str],
|
|
*,
|
|
batch_size: int = 500,
|
|
) -> dict[str, str]:
|
|
best: dict[str, tuple[int, str]] = {}
|
|
|
|
for i in range(0, len(iris), batch_size):
|
|
batch = iris[i : i + batch_size]
|
|
values = " ".join(f"<{u}>" for u in batch)
|
|
q = f"""
|
|
SELECT ?s ?label
|
|
WHERE {{
|
|
VALUES ?s {{ {values} }}
|
|
?s <{RDFS_LABEL}> ?label .
|
|
}}
|
|
"""
|
|
res = await sparql.query_json(q)
|
|
for b in _bindings(res):
|
|
s = (b.get("s") or {}).get("value")
|
|
label_term = b.get("label") or {}
|
|
if not s or label_term.get("type") != "literal":
|
|
continue
|
|
label_value = label_term.get("value")
|
|
if label_value is None:
|
|
continue
|
|
score = _label_score(label_term)
|
|
prev = best.get(s)
|
|
if prev is None or score > prev[0]:
|
|
best[s] = (score, str(label_value))
|
|
|
|
return {iri: lbl for iri, (_, lbl) in best.items()}
|
|
|
|
|
|
async def fetch_graph_snapshot(
|
|
sparql: SparqlEngine,
|
|
*,
|
|
settings: Settings,
|
|
node_limit: int,
|
|
edge_limit: int,
|
|
) -> GraphResponse:
|
|
"""
|
|
Fetch a graph snapshot (nodes + edges) via SPARQL, independent of whether the
|
|
underlying engine is RDFLib or AnzoGraph.
|
|
"""
|
|
edges_q = edge_retrieval_query(edge_limit=edge_limit, include_bnodes=settings.include_bnodes)
|
|
res = await sparql.query_json(edges_q)
|
|
bindings = (((res.get("results") or {}).get("bindings")) or [])
|
|
nodes, edges = graph_from_sparql_bindings(
|
|
bindings,
|
|
node_limit=node_limit,
|
|
include_bnodes=settings.include_bnodes,
|
|
)
|
|
|
|
# Add positions so the frontend doesn't need to run a layout.
|
|
#
|
|
# We are exporting only rdfs:subClassOf triples. In the exported edges:
|
|
# source = subclass, target = superclass
|
|
# For hierarchical layout we invert edges to:
|
|
# superclass -> subclass
|
|
hier_edges: list[tuple[int, int]] = []
|
|
for e in edges:
|
|
s = e.get("source")
|
|
t = e.get("target")
|
|
try:
|
|
sid = int(s) # subclass
|
|
tid = int(t) # superclass
|
|
except Exception:
|
|
continue
|
|
hier_edges.append((tid, sid))
|
|
|
|
try:
|
|
layers = level_synchronous_kahn_layers(node_count=len(nodes), edges=hier_edges)
|
|
except CycleError as e:
|
|
# Add a small URI sample to aid debugging.
|
|
sample: list[str] = []
|
|
for nid in e.remaining_node_ids[:20]:
|
|
try:
|
|
sample.append(str(nodes[nid].get("iri")))
|
|
except Exception:
|
|
continue
|
|
raise CycleError(
|
|
processed=e.processed,
|
|
total=e.total,
|
|
remaining_node_ids=e.remaining_node_ids,
|
|
remaining_iri_sample=sample or None,
|
|
) from None
|
|
|
|
# Deterministic order within each ring/layer for stable layouts.
|
|
id_to_iri = [str(n.get("iri", "")) for n in nodes]
|
|
for layer in layers:
|
|
layer.sort(key=lambda nid: id_to_iri[nid])
|
|
|
|
xs, ys = radial_positions_from_layers(node_count=len(nodes), layers=layers)
|
|
for i, node in enumerate(nodes):
|
|
node["x"] = float(xs[i])
|
|
node["y"] = float(ys[i])
|
|
|
|
# Attach labels for URI nodes (blank nodes remain label-less).
|
|
uri_nodes = [n for n in nodes if n.get("termType") == "uri"]
|
|
if uri_nodes:
|
|
iris = [str(n["iri"]) for n in uri_nodes if isinstance(n.get("iri"), str)]
|
|
label_by_iri = await _fetch_rdfs_labels_for_iris(sparql, iris)
|
|
for n in uri_nodes:
|
|
iri = n.get("iri")
|
|
if isinstance(iri, str) and iri in label_by_iri:
|
|
n["label"] = label_by_iri[iri]
|
|
|
|
meta = GraphResponse.Meta(
|
|
backend=sparql.name,
|
|
ttl_path=settings.ttl_path if settings.graph_backend == "rdflib" else None,
|
|
sparql_endpoint=settings.effective_sparql_endpoint() if settings.graph_backend == "anzograph" else None,
|
|
include_bnodes=settings.include_bnodes,
|
|
node_limit=node_limit,
|
|
edge_limit=edge_limit,
|
|
nodes=len(nodes),
|
|
edges=len(edges),
|
|
)
|
|
return GraphResponse(nodes=nodes, edges=edges, meta=meta)
|