from __future__ import annotations from typing import Any from ..graph_export import edge_retrieval_query, graph_from_sparql_bindings from ..models import GraphResponse from ..sparql_engine import SparqlEngine from ..settings import Settings from .layout_dag_radial import CycleError, level_synchronous_kahn_layers, radial_positions_from_layers RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label" def _bindings(res: dict[str, Any]) -> list[dict[str, Any]]: return (((res.get("results") or {}).get("bindings")) or []) def _label_score(label_binding: dict[str, Any]) -> int: # Prefer English, then no-language, then anything else. lang = (label_binding.get("xml:lang") or "").lower() if lang == "en": return 3 if lang == "": return 2 return 1 async def _fetch_rdfs_labels_for_iris( sparql: SparqlEngine, iris: list[str], *, batch_size: int = 500, ) -> dict[str, str]: best: dict[str, tuple[int, str]] = {} for i in range(0, len(iris), batch_size): batch = iris[i : i + batch_size] values = " ".join(f"<{u}>" for u in batch) q = f""" SELECT ?s ?label WHERE {{ VALUES ?s {{ {values} }} ?s <{RDFS_LABEL}> ?label . }} """ res = await sparql.query_json(q) for b in _bindings(res): s = (b.get("s") or {}).get("value") label_term = b.get("label") or {} if not s or label_term.get("type") != "literal": continue label_value = label_term.get("value") if label_value is None: continue score = _label_score(label_term) prev = best.get(s) if prev is None or score > prev[0]: best[s] = (score, str(label_value)) return {iri: lbl for iri, (_, lbl) in best.items()} async def fetch_graph_snapshot( sparql: SparqlEngine, *, settings: Settings, node_limit: int, edge_limit: int, ) -> GraphResponse: """ Fetch a graph snapshot (nodes + edges) via SPARQL. """ edges_q = edge_retrieval_query(edge_limit=edge_limit, include_bnodes=settings.include_bnodes) res = await sparql.query_json(edges_q) bindings = (((res.get("results") or {}).get("bindings")) or []) nodes, edges = graph_from_sparql_bindings( bindings, node_limit=node_limit, include_bnodes=settings.include_bnodes, ) # Add positions so the frontend doesn't need to run a layout. # # We are exporting only rdfs:subClassOf triples. In the exported edges: # source = subclass, target = superclass # For hierarchical layout we invert edges to: # superclass -> subclass hier_edges: list[tuple[int, int]] = [] for e in edges: s = e.get("source") t = e.get("target") try: sid = int(s) # subclass tid = int(t) # superclass except Exception: continue hier_edges.append((tid, sid)) try: layers = level_synchronous_kahn_layers(node_count=len(nodes), edges=hier_edges) except CycleError as e: # Add a small URI sample to aid debugging. sample: list[str] = [] for nid in e.remaining_node_ids[:20]: try: sample.append(str(nodes[nid].get("iri"))) except Exception: continue raise CycleError( processed=e.processed, total=e.total, remaining_node_ids=e.remaining_node_ids, remaining_iri_sample=sample or None, ) from None # Deterministic order within each ring/layer for stable layouts. id_to_iri = [str(n.get("iri", "")) for n in nodes] for layer in layers: layer.sort(key=lambda nid: id_to_iri[nid]) xs, ys = radial_positions_from_layers(node_count=len(nodes), layers=layers) for i, node in enumerate(nodes): node["x"] = float(xs[i]) node["y"] = float(ys[i]) # Attach labels for URI nodes (blank nodes remain label-less). uri_nodes = [n for n in nodes if n.get("termType") == "uri"] if uri_nodes: iris = [str(n["iri"]) for n in uri_nodes if isinstance(n.get("iri"), str)] label_by_iri = await _fetch_rdfs_labels_for_iris(sparql, iris) for n in uri_nodes: iri = n.get("iri") if isinstance(iri, str) and iri in label_by_iri: n["label"] = label_by_iri[iri] meta = GraphResponse.Meta( backend=sparql.name, ttl_path=None, sparql_endpoint=settings.effective_sparql_endpoint(), include_bnodes=settings.include_bnodes, node_limit=node_limit, edge_limit=edge_limit, nodes=len(nodes), edges=len(edges), ) return GraphResponse(nodes=nodes, edges=edges, meta=meta)