diff --git a/.env.example b/.env.example deleted file mode 100644 index 61e4400..0000000 --- a/.env.example +++ /dev/null @@ -1,30 +0,0 @@ -# Choose which engine executes SPARQL: -# - rdflib: parse TTL locally and query in-memory -# - anzograph: query AnzoGraph over HTTP (optionally LOAD the TTL on startup) -GRAPH_BACKEND=rdflib - -# Backend (rdflib) file location inside the container. -# The TTL file must exist within the mounted ./data folder if you keep the default volume mount. -TTL_PATH=/data/o3po.ttl - -# Backend behavior -INCLUDE_BNODES=false -# MAX_TRIPLES=1000000 - -# AnzoGraph / SPARQL endpoint settings (used when GRAPH_BACKEND=anzograph) -SPARQL_HOST=http://anzograph:8080 -# SPARQL_ENDPOINT=http://anzograph:8080/sparql -SPARQL_USER=admin -SPARQL_PASS=Passw0rd1 - -# File URI as seen by the AnzoGraph container (used by SPARQL `LOAD`) -SPARQL_DATA_FILE=file:///opt/shared-files/o3po.ttl -# SPARQL_GRAPH_IRI=http://example.org/graph - -# Startup behavior for AnzoGraph mode -SPARQL_LOAD_ON_START=false -SPARQL_CLEAR_ON_START=false - -# Dev UX -CORS_ORIGINS=http://localhost:5173 -VITE_BACKEND_URL=http://backend:8000 diff --git a/backend/app/README.md b/backend/app/README.md index 2f90e9c..3f4afec 100644 --- a/backend/app/README.md +++ b/backend/app/README.md @@ -32,6 +32,11 @@ Callers (frontend or other clients) interact with a single API surface (`/api/*` - Used by `/api/nodes`, `/api/edges`, and `rdflib`-mode `/api/stats`. - `pipelines/graph_snapshot.py` - Pipeline used by `/api/graph` to return a `{nodes, edges}` snapshot via SPARQL (works for both RDFLib and AnzoGraph). +- `pipelines/layout_dag_radial.py` + - DAG layout helpers used by `pipelines/graph_snapshot.py`: + - cycle detection + - level-synchronous Kahn layering + - radial (ring-per-layer) positioning. - `pipelines/snapshot_service.py` - Snapshot cache layer used by `/api/graph` and `/api/stats` so the backend doesn't run expensive SPARQL twice. - `pipelines/subclass_labels.py` @@ -64,6 +69,14 @@ RDFLib mode: - `TTL_PATH`: path inside the backend container to a `.ttl` file (example: `/data/o3po.ttl`) - `MAX_TRIPLES`: optional int; if set, stops parsing after this many triples +Optional import-combining step (runs before the SPARQL engine starts): + +- `COMBINE_OWL_IMPORTS_ON_START`: `true` to recursively load `TTL_PATH` (or `COMBINE_ENTRY_LOCATION`) plus `owl:imports` and write a combined TTL file. +- `COMBINE_ENTRY_LOCATION`: optional override for the entry file/URL to load (defaults to `TTL_PATH`) +- `COMBINE_OUTPUT_LOCATION`: optional explicit output path (defaults to `${dirname(entry)}/${COMBINE_OUTPUT_NAME}`) +- `COMBINE_OUTPUT_NAME`: output filename when `COMBINE_OUTPUT_LOCATION` is not set (default: `combined_ontology.ttl`) +- `COMBINE_FORCE`: `true` to rebuild even if the output file already exists + AnzoGraph mode: - `SPARQL_HOST`: base host (example: `http://anzograph:8080`) @@ -129,8 +142,8 @@ Returned in `nodes[]` (dense IDs; suitable for indexing in typed arrays): - `id`: integer dense node ID used in edges - `termType`: `"uri"` or `"bnode"` - `iri`: URI string; blank nodes are normalized to `_:` -- `label`: currently `null` in `/api/graph` snapshots (pipelines can be used to populate later) -- `x`/`y`: world-space coordinates for rendering (currently a deterministic spiral layout) +- `label`: `rdfs:label` when available (best-effort; prefers English) +- `x`/`y`: world-space coordinates for rendering (currently a radial layered layout derived from `rdfs:subClassOf`) ### Edge @@ -149,11 +162,10 @@ Returned in `edges[]`: ## Snapshot Query (`/api/graph`) -`/api/graph` uses a SPARQL query that: +`/api/graph` currently uses a SPARQL query that returns only `rdfs:subClassOf` edges: -- selects triples `?s ?p ?o` -- excludes literal objects (`FILTER(!isLiteral(?o))`) -- excludes `rdfs:label`, `skos:prefLabel`, and `skos:altLabel` predicates +- selects bindings as `?s ?p ?o` (with `?p` bound to `rdfs:subClassOf`) +- excludes literal objects (`FILTER(!isLiteral(?o))`) for safety - optionally excludes blank nodes (unless `INCLUDE_BNODES=true`) - applies `LIMIT edge_limit` @@ -161,6 +173,8 @@ The result bindings are mapped to dense node IDs (first-seen order) and returned `/api/graph` also returns `meta` with snapshot counts and engine info so the frontend doesn't need to call `/api/stats`. +If a cycle is detected in the returned `rdfs:subClassOf` snapshot, `/api/graph` returns HTTP 422 (layout requires a DAG). + ## Pipelines ### `pipelines/graph_snapshot.py` diff --git a/backend/app/graph_export.py b/backend/app/graph_export.py index b937c9f..47e00eb 100644 --- a/backend/app/graph_export.py +++ b/backend/app/graph_export.py @@ -5,16 +5,25 @@ from typing import Any def edge_retrieval_query(*, edge_limit: int, include_bnodes: bool) -> str: bnode_filter = "" if include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))" + return f""" +PREFIX rdf: +PREFIX rdfs: +PREFIX owl: + SELECT ?s ?p ?o WHERE {{ - ?s ?p ?o . + {{ + VALUES ?p {{ rdf:type }} + ?s ?p ?o . + ?o rdf:type owl:Class . + }} + UNION + {{ + VALUES ?p {{ rdfs:subClassOf }} + ?s ?p ?o . + }} FILTER(!isLiteral(?o)) - FILTER(?p NOT IN ( - , - , - - )) {bnode_filter} }} LIMIT {edge_limit} @@ -91,4 +100,3 @@ def graph_from_sparql_bindings( ] return out_nodes, out_edges - diff --git a/backend/app/main.py b/backend/app/main.py index 146d73b..d8f67c8 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,11 +1,29 @@ from __future__ import annotations from contextlib import asynccontextmanager +import logging +import asyncio from fastapi import FastAPI, HTTPException, Query from fastapi.middleware.cors import CORSMiddleware -from .models import EdgesResponse, GraphResponse, NodesResponse, SparqlQueryRequest, StatsResponse +from .models import ( + EdgesResponse, + GraphResponse, + NeighborsRequest, + NeighborsResponse, + NodesResponse, + SparqlQueryRequest, + StatsResponse, +) +from .pipelines.layout_dag_radial import CycleError +from .pipelines.owl_imports_combiner import ( + build_combined_graph, + output_location_to_path, + resolve_output_location, + serialize_graph_to_ttl, +) +from .pipelines.selection_neighbors import fetch_neighbor_ids_for_selection from .pipelines.snapshot_service import GraphSnapshotService from .rdf_store import RDFStore from .sparql_engine import RdflibEngine, SparqlEngine, create_sparql_engine @@ -13,11 +31,33 @@ from .settings import Settings settings = Settings() +logger = logging.getLogger(__name__) @asynccontextmanager async def lifespan(app: FastAPI): - sparql: SparqlEngine = create_sparql_engine(settings) + rdflib_preloaded_graph = None + + if settings.combine_owl_imports_on_start: + entry_location = settings.combine_entry_location or settings.ttl_path + output_location = resolve_output_location( + entry_location, + output_location=settings.combine_output_location, + output_name=settings.combine_output_name, + ) + + output_path = output_location_to_path(output_location) + if output_path.exists() and not settings.combine_force: + logger.info("Skipping combine step (output exists): %s", output_location) + else: + rdflib_preloaded_graph = await asyncio.to_thread(build_combined_graph, entry_location) + logger.info("Finished combining imports; serializing to: %s", output_location) + await asyncio.to_thread(serialize_graph_to_ttl, rdflib_preloaded_graph, output_location) + + if settings.graph_backend == "rdflib": + settings.ttl_path = str(output_path) + + sparql: SparqlEngine = create_sparql_engine(settings, rdflib_graph=rdflib_preloaded_graph) await sparql.startup() app.state.sparql = sparql app.state.snapshot_service = GraphSnapshotService(sparql=sparql, settings=settings) @@ -62,7 +102,10 @@ def health() -> dict[str, str]: async def stats() -> StatsResponse: # Stats reflect exactly what we send to the frontend (/api/graph), not global graph size. svc: GraphSnapshotService = app.state.snapshot_service - snap = await svc.get(node_limit=50_000, edge_limit=100_000) + try: + snap = await svc.get(node_limit=50_000, edge_limit=100_000) + except CycleError as e: + raise HTTPException(status_code=422, detail=str(e)) from None meta = snap.meta return StatsResponse( backend=meta.backend if meta else app.state.sparql.name, @@ -81,6 +124,20 @@ async def sparql_query(req: SparqlQueryRequest) -> dict: return data +@app.post("/api/neighbors", response_model=NeighborsResponse) +async def neighbors(req: NeighborsRequest) -> NeighborsResponse: + svc: GraphSnapshotService = app.state.snapshot_service + snap = await svc.get(node_limit=req.node_limit, edge_limit=req.edge_limit) + sparql: SparqlEngine = app.state.sparql + neighbor_ids = await fetch_neighbor_ids_for_selection( + sparql, + snapshot=snap, + selected_ids=req.selected_ids, + include_bnodes=settings.include_bnodes, + ) + return NeighborsResponse(selected_ids=req.selected_ids, neighbor_ids=neighbor_ids) + + @app.get("/api/nodes", response_model=NodesResponse) def nodes( limit: int = Query(default=10_000, ge=1, le=200_000), @@ -109,4 +166,7 @@ async def graph( edge_limit: int = Query(default=100_000, ge=1, le=500_000), ) -> GraphResponse: svc: GraphSnapshotService = app.state.snapshot_service - return await svc.get(node_limit=node_limit, edge_limit=edge_limit) + try: + return await svc.get(node_limit=node_limit, edge_limit=edge_limit) + except CycleError as e: + raise HTTPException(status_code=422, detail=str(e)) from None diff --git a/backend/app/models.py b/backend/app/models.py index fcf3af2..8b662a7 100644 --- a/backend/app/models.py +++ b/backend/app/models.py @@ -56,3 +56,14 @@ class GraphResponse(BaseModel): class SparqlQueryRequest(BaseModel): query: str + + +class NeighborsRequest(BaseModel): + selected_ids: list[int] + node_limit: int = 50_000 + edge_limit: int = 100_000 + + +class NeighborsResponse(BaseModel): + selected_ids: list[int] + neighbor_ids: list[int] diff --git a/backend/app/pipelines/graph_snapshot.py b/backend/app/pipelines/graph_snapshot.py index 4b6529f..f8211be 100644 --- a/backend/app/pipelines/graph_snapshot.py +++ b/backend/app/pipelines/graph_snapshot.py @@ -1,10 +1,64 @@ from __future__ import annotations +from typing import Any + from ..graph_export import edge_retrieval_query, graph_from_sparql_bindings from ..models import GraphResponse from ..sparql_engine import SparqlEngine from ..settings import Settings -from .layout_spiral import spiral_positions +from .layout_dag_radial import CycleError, level_synchronous_kahn_layers, radial_positions_from_layers + + +RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label" + + +def _bindings(res: dict[str, Any]) -> list[dict[str, Any]]: + return (((res.get("results") or {}).get("bindings")) or []) + + +def _label_score(label_binding: dict[str, Any]) -> int: + # Prefer English, then no-language, then anything else. + lang = (label_binding.get("xml:lang") or "").lower() + if lang == "en": + return 3 + if lang == "": + return 2 + return 1 + + +async def _fetch_rdfs_labels_for_iris( + sparql: SparqlEngine, + iris: list[str], + *, + batch_size: int = 500, +) -> dict[str, str]: + best: dict[str, tuple[int, str]] = {} + + for i in range(0, len(iris), batch_size): + batch = iris[i : i + batch_size] + values = " ".join(f"<{u}>" for u in batch) + q = f""" +SELECT ?s ?label +WHERE {{ + VALUES ?s {{ {values} }} + ?s <{RDFS_LABEL}> ?label . +}} +""" + res = await sparql.query_json(q) + for b in _bindings(res): + s = (b.get("s") or {}).get("value") + label_term = b.get("label") or {} + if not s or label_term.get("type") != "literal": + continue + label_value = label_term.get("value") + if label_value is None: + continue + score = _label_score(label_term) + prev = best.get(s) + if prev is None or score > prev[0]: + best[s] = (score, str(label_value)) + + return {iri: lbl for iri, (_, lbl) in best.items()} async def fetch_graph_snapshot( @@ -28,11 +82,59 @@ async def fetch_graph_snapshot( ) # Add positions so the frontend doesn't need to run a layout. - xs, ys = spiral_positions(len(nodes)) + # + # We are exporting only rdfs:subClassOf triples. In the exported edges: + # source = subclass, target = superclass + # For hierarchical layout we invert edges to: + # superclass -> subclass + hier_edges: list[tuple[int, int]] = [] + for e in edges: + s = e.get("source") + t = e.get("target") + try: + sid = int(s) # subclass + tid = int(t) # superclass + except Exception: + continue + hier_edges.append((tid, sid)) + + try: + layers = level_synchronous_kahn_layers(node_count=len(nodes), edges=hier_edges) + except CycleError as e: + # Add a small URI sample to aid debugging. + sample: list[str] = [] + for nid in e.remaining_node_ids[:20]: + try: + sample.append(str(nodes[nid].get("iri"))) + except Exception: + continue + raise CycleError( + processed=e.processed, + total=e.total, + remaining_node_ids=e.remaining_node_ids, + remaining_iri_sample=sample or None, + ) from None + + # Deterministic order within each ring/layer for stable layouts. + id_to_iri = [str(n.get("iri", "")) for n in nodes] + for layer in layers: + layer.sort(key=lambda nid: id_to_iri[nid]) + + xs, ys = radial_positions_from_layers(node_count=len(nodes), layers=layers) for i, node in enumerate(nodes): node["x"] = float(xs[i]) node["y"] = float(ys[i]) + # Attach labels for URI nodes (blank nodes remain label-less). + uri_nodes = [n for n in nodes if n.get("termType") == "uri"] + if uri_nodes: + iris = [str(n["iri"]) for n in uri_nodes if isinstance(n.get("iri"), str)] + label_by_iri = await _fetch_rdfs_labels_for_iris(sparql, iris) + for n in uri_nodes: + iri = n.get("iri") + if isinstance(iri, str) and iri in label_by_iri: + n["label"] = label_by_iri[iri] + meta = GraphResponse.Meta( backend=sparql.name, ttl_path=settings.ttl_path if settings.graph_backend == "rdflib" else None, diff --git a/backend/app/pipelines/layout_dag_radial.py b/backend/app/pipelines/layout_dag_radial.py new file mode 100644 index 0000000..d6b4a12 --- /dev/null +++ b/backend/app/pipelines/layout_dag_radial.py @@ -0,0 +1,141 @@ +from __future__ import annotations + +import math +from collections import deque +from typing import Iterable, Sequence + + +class CycleError(RuntimeError): + """ + Raised when the requested layout requires a DAG, but a cycle is detected. + + `remaining_node_ids` are the node ids that still had indegree > 0 after Kahn. + """ + + def __init__( + self, + *, + processed: int, + total: int, + remaining_node_ids: list[int], + remaining_iri_sample: list[str] | None = None, + ) -> None: + self.processed = int(processed) + self.total = int(total) + self.remaining_node_ids = remaining_node_ids + self.remaining_iri_sample = remaining_iri_sample + + msg = f"Cycle detected in subClassOf graph (processed {self.processed}/{self.total} nodes)." + if remaining_iri_sample: + msg += f" Example nodes: {', '.join(remaining_iri_sample)}" + super().__init__(msg) + + +def level_synchronous_kahn_layers( + *, + node_count: int, + edges: Iterable[tuple[int, int]], +) -> list[list[int]]: + """ + Level-synchronous Kahn's algorithm: + - process the entire current queue as one batch (one layer) + - only then enqueue newly-unlocked nodes for the next batch + + `edges` are directed (u -> v). + """ + n = int(node_count) + if n <= 0: + return [] + + adj: list[list[int]] = [[] for _ in range(n)] + indeg = [0] * n + + for u, v in edges: + if u == v: + # Self-loops don't help layout and would trivially violate DAG-ness. + continue + if not (0 <= u < n and 0 <= v < n): + continue + adj[u].append(v) + indeg[v] += 1 + + q: deque[int] = deque(i for i, d in enumerate(indeg) if d == 0) + layers: list[list[int]] = [] + + processed = 0 + while q: + # Consume the full current queue as a single layer. + layer = list(q) + q.clear() + layers.append(layer) + + for u in layer: + processed += 1 + for v in adj[u]: + indeg[v] -= 1 + if indeg[v] == 0: + q.append(v) + + if processed != n: + remaining = [i for i, d in enumerate(indeg) if d > 0] + raise CycleError(processed=processed, total=n, remaining_node_ids=remaining) + + return layers + + +def radial_positions_from_layers( + *, + node_count: int, + layers: Sequence[Sequence[int]], + max_r: float = 5000.0, +) -> tuple[list[float], list[float]]: + """ + Assign node positions in concentric rings (one ring per layer). + + - radius increases with layer index + - nodes within a layer are placed evenly by angle + - each ring gets a "golden-angle" rotation to reduce spoke artifacts + """ + n = int(node_count) + if n <= 0: + return ([], []) + + xs = [0.0] * n + ys = [0.0] * n + if not layers: + return (xs, ys) + + two_pi = 2.0 * math.pi + golden = math.pi * (3.0 - math.sqrt(5.0)) + + layer_count = len(layers) + denom = float(layer_count + 1) + + for li, layer in enumerate(layers): + m = len(layer) + if m <= 0: + continue + + # Keep everything within ~[-max_r, max_r] like the previous spiral layout. + r = ((li + 1) / denom) * max_r + + # Rotate each layer deterministically to avoid radial spokes aligning. + offset = (li * golden) % two_pi + + if m == 1: + nid = int(layer[0]) + if 0 <= nid < n: + xs[nid] = r * math.cos(offset) + ys[nid] = r * math.sin(offset) + continue + + step = two_pi / float(m) + for j, raw_id in enumerate(layer): + nid = int(raw_id) + if not (0 <= nid < n): + continue + t = offset + step * float(j) + xs[nid] = r * math.cos(t) + ys[nid] = r * math.sin(t) + + return (xs, ys) diff --git a/backend/app/pipelines/owl_imports_combiner.py b/backend/app/pipelines/owl_imports_combiner.py new file mode 100644 index 0000000..ce7de44 --- /dev/null +++ b/backend/app/pipelines/owl_imports_combiner.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +import logging +import os +from pathlib import Path +from urllib.parse import unquote, urlparse + +from rdflib import Graph +from rdflib.namespace import OWL + + +logger = logging.getLogger(__name__) + + +def _is_http_url(location: str) -> bool: + scheme = urlparse(location).scheme.lower() + return scheme in {"http", "https"} + + +def _is_file_uri(location: str) -> bool: + return urlparse(location).scheme.lower() == "file" + + +def _file_uri_to_path(location: str) -> Path: + u = urlparse(location) + if u.scheme.lower() != "file": + raise ValueError(f"Not a file:// URI: {location!r}") + return Path(unquote(u.path)) + + +def resolve_output_location( + entry_location: str, + *, + output_location: str | None, + output_name: str, +) -> str: + if output_location: + return output_location + + if _is_http_url(entry_location): + raise ValueError( + "COMBINE_ENTRY_LOCATION points to an http(s) URL; set COMBINE_OUTPUT_LOCATION to a writable file path." + ) + + entry_path = _file_uri_to_path(entry_location) if _is_file_uri(entry_location) else Path(entry_location) + return str(entry_path.parent / output_name) + + +def _output_destination_to_path(output_location: str) -> Path: + if _is_file_uri(output_location): + return _file_uri_to_path(output_location) + if _is_http_url(output_location): + raise ValueError("Output location must be a local file path (or file:// URI), not http(s).") + return Path(output_location) + + +def output_location_to_path(output_location: str) -> Path: + return _output_destination_to_path(output_location) + + +def build_combined_graph(entry_location: str) -> Graph: + """ + Recursively loads an RDF document (file path, file:// URI, or http(s) URL) and its + owl:imports into a single in-memory graph. + """ + combined_graph = Graph() + visited_locations: set[str] = set() + + def resolve_imports(location: str) -> None: + if location in visited_locations: + return + visited_locations.add(location) + + logger.info("Loading ontology: %s", location) + try: + combined_graph.parse(location=location) + except Exception as e: + logger.warning("Failed to load %s (%s)", location, e) + return + + imports = [str(o) for _, _, o in combined_graph.triples((None, OWL.imports, None))] + for imported_location in imports: + if imported_location not in visited_locations: + resolve_imports(imported_location) + + resolve_imports(entry_location) + return combined_graph + + +def serialize_graph_to_ttl(graph: Graph, output_location: str) -> None: + output_path = _output_destination_to_path(output_location) + output_path.parent.mkdir(parents=True, exist_ok=True) + + tmp_path = output_path.with_suffix(output_path.suffix + ".tmp") + graph.serialize(destination=str(tmp_path), format="turtle") + os.replace(str(tmp_path), str(output_path)) diff --git a/backend/app/pipelines/selection_neighbors.py b/backend/app/pipelines/selection_neighbors.py new file mode 100644 index 0000000..5f3dee0 --- /dev/null +++ b/backend/app/pipelines/selection_neighbors.py @@ -0,0 +1,137 @@ +from __future__ import annotations + +from typing import Any, Iterable + +from ..models import GraphResponse, Node +from ..sparql_engine import SparqlEngine + + +def _values_term(node: Node) -> str | None: + iri = node.iri + if node.termType == "uri": + return f"<{iri}>" + if node.termType == "bnode": + if iri.startswith("_:"): + return iri + return f"_:{iri}" + return None + + +def selection_neighbors_query(*, selected_nodes: Iterable[Node], include_bnodes: bool) -> str: + values_terms: list[str] = [] + for n in selected_nodes: + t = _values_term(n) + if t is None: + continue + values_terms.append(t) + + if not values_terms: + # Caller should avoid running this query when selection is empty, but keep this safe. + return "SELECT ?nbr WHERE { FILTER(false) }" + + bnode_filter = "" if include_bnodes else "FILTER(!isBlank(?nbr))" + values = " ".join(values_terms) + + # Neighbors are defined as any node directly connected by rdf:type (to owl:Class) + # or rdfs:subClassOf, in either direction (treating edges as undirected). + return f""" +PREFIX rdf: +PREFIX rdfs: +PREFIX owl: + +SELECT DISTINCT ?nbr +WHERE {{ + VALUES ?sel {{ {values} }} + {{ + ?sel rdf:type ?o . + ?o rdf:type owl:Class . + BIND(?o AS ?nbr) + }} + UNION + {{ + ?s rdf:type ?sel . + ?sel rdf:type owl:Class . + BIND(?s AS ?nbr) + }} + UNION + {{ + ?sel rdfs:subClassOf ?o . + BIND(?o AS ?nbr) + }} + UNION + {{ + ?s rdfs:subClassOf ?sel . + BIND(?s AS ?nbr) + }} + FILTER(!isLiteral(?nbr)) + FILTER(?nbr != ?sel) + {bnode_filter} +}} +""" + + +def _bindings(res: dict[str, Any]) -> list[dict[str, Any]]: + return (((res.get("results") or {}).get("bindings")) or []) + + +def _term_key(term: dict[str, Any], *, include_bnodes: bool) -> tuple[str, str] | None: + t = term.get("type") + v = term.get("value") + if not t or v is None: + return None + if t == "literal": + return None + if t == "bnode": + if not include_bnodes: + return None + return ("bnode", f"_:{v}") + return ("uri", str(v)) + + +async def fetch_neighbor_ids_for_selection( + sparql: SparqlEngine, + *, + snapshot: GraphResponse, + selected_ids: list[int], + include_bnodes: bool, +) -> list[int]: + id_to_node: dict[int, Node] = {n.id: n for n in snapshot.nodes} + + selected_nodes: list[Node] = [] + selected_id_set: set[int] = set() + for nid in selected_ids: + if not isinstance(nid, int): + continue + n = id_to_node.get(nid) + if n is None: + continue + if n.termType == "bnode" and not include_bnodes: + continue + selected_nodes.append(n) + selected_id_set.add(nid) + + if not selected_nodes: + return [] + + key_to_id: dict[tuple[str, str], int] = {} + for n in snapshot.nodes: + key_to_id[(n.termType, n.iri)] = n.id + + q = selection_neighbors_query(selected_nodes=selected_nodes, include_bnodes=include_bnodes) + res = await sparql.query_json(q) + + neighbor_ids: set[int] = set() + for b in _bindings(res): + nbr_term = b.get("nbr") or {} + key = _term_key(nbr_term, include_bnodes=include_bnodes) + if key is None: + continue + nid = key_to_id.get(key) + if nid is None: + continue + if nid in selected_id_set: + continue + neighbor_ids.add(nid) + + # Stable ordering for consistent frontend behavior. + return sorted(neighbor_ids) diff --git a/backend/app/settings.py b/backend/app/settings.py index 4288572..9e49b18 100644 --- a/backend/app/settings.py +++ b/backend/app/settings.py @@ -16,6 +16,13 @@ class Settings(BaseSettings): include_bnodes: bool = Field(default=False, alias="INCLUDE_BNODES") max_triples: int | None = Field(default=None, alias="MAX_TRIPLES") + # Optional: Combine owl:imports into a single TTL file on backend startup. + combine_owl_imports_on_start: bool = Field(default=False, alias="COMBINE_OWL_IMPORTS_ON_START") + combine_entry_location: str | None = Field(default=None, alias="COMBINE_ENTRY_LOCATION") + combine_output_location: str | None = Field(default=None, alias="COMBINE_OUTPUT_LOCATION") + combine_output_name: str = Field(default="combined_ontology.ttl", alias="COMBINE_OUTPUT_NAME") + combine_force: bool = Field(default=False, alias="COMBINE_FORCE") + # AnzoGraph / SPARQL endpoint configuration sparql_host: str = Field(default="http://anzograph:8080", alias="SPARQL_HOST") # If not set, the backend uses `${SPARQL_HOST}/sparql`. diff --git a/backend/app/sparql_engine.py b/backend/app/sparql_engine.py index 9f41ac3..75a3559 100644 --- a/backend/app/sparql_engine.py +++ b/backend/app/sparql_engine.py @@ -24,11 +24,13 @@ class SparqlEngine(Protocol): class RdflibEngine: name = "rdflib" - def __init__(self, *, ttl_path: str): + def __init__(self, *, ttl_path: str, graph: Graph | None = None): self.ttl_path = ttl_path - self.graph: Graph | None = None + self.graph: Graph | None = graph async def startup(self) -> None: + if self.graph is not None: + return g = Graph() g.parse(self.ttl_path, format="turtle") self.graph = g @@ -167,9 +169,9 @@ class AnzoGraphEngine: raise RuntimeError(f"AnzoGraph not ready at {self.endpoint}") from last_err -def create_sparql_engine(settings: Settings) -> SparqlEngine: +def create_sparql_engine(settings: Settings, *, rdflib_graph: Graph | None = None) -> SparqlEngine: if settings.graph_backend == "rdflib": - return RdflibEngine(ttl_path=settings.ttl_path) + return RdflibEngine(ttl_path=settings.ttl_path, graph=rdflib_graph) if settings.graph_backend == "anzograph": return AnzoGraphEngine(settings=settings) raise RuntimeError(f"Unsupported GRAPH_BACKEND={settings.graph_backend!r}") diff --git a/docker-compose.yml b/docker-compose.yml index 85ce165..28af26b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,9 +21,14 @@ services: - SPARQL_READY_RETRIES=${SPARQL_READY_RETRIES:-30} - SPARQL_READY_DELAY_S=${SPARQL_READY_DELAY_S:-4} - SPARQL_READY_TIMEOUT_S=${SPARQL_READY_TIMEOUT_S:-10} + - COMBINE_OWL_IMPORTS_ON_START=${COMBINE_OWL_IMPORTS_ON_START:-false} + - COMBINE_ENTRY_LOCATION + - COMBINE_OUTPUT_LOCATION + - COMBINE_OUTPUT_NAME + - COMBINE_FORCE=${COMBINE_FORCE:-false} volumes: - ./backend:/app - - ./data:/data:ro + - ./data:/data:Z command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload healthcheck: test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/health').read()"] diff --git a/docs/anzograph-readiness-julia.md b/docs/anzograph-readiness-julia.md deleted file mode 100644 index e5d5bbb..0000000 --- a/docs/anzograph-readiness-julia.md +++ /dev/null @@ -1,371 +0,0 @@ -# Waiting for AnzoGraph readiness from Julia (how this repo does it) - -This repo runs a Julia pipeline (`julia/main.jl`) against an AnzoGraph SPARQL endpoint. The key problem is that **“container started” ≠ “SPARQL endpoint is ready to accept queries”**. - -So, before the Julia code does anything that depends on SPARQL (like `LOAD <...>` or large `SELECT`s), it explicitly **waits until AnzoGraph is actually responding to a real SPARQL POST request with valid JSON results**. - -This document explains the exact mechanism used here, why it works, and gives copy/paste-ready patterns you can transfer to another project. - ---- - -## 1) Where the waiting happens (pipeline control flow) - -In `julia/main.jl`, the entrypoint calls: - -```julia -# Step 1: Wait for AnzoGraph -wait_for_anzograph() - -# Step 2: Load TTL file -result = sparql_update("LOAD <$SPARQL_DATA_FILE>") -``` - -So the “await” is not a Julia `Task`/`async` wait; it is a **blocking retry loop** that only returns when it can successfully execute a small SPARQL query. - -Reference: `julia/main.jl` defines `wait_for_anzograph()` and calls it from `main()`. - ---- - -## 2) Why this is needed even with Docker Compose `depends_on` - -This repo’s `docker-compose.yml` includes an AnzoGraph `healthcheck`: - -```yaml -anzograph: - healthcheck: - test: ["CMD-SHELL", "curl -f http://localhost:8080/sparql || exit 1"] - interval: 10s - timeout: 5s - retries: 30 - start_period: 60s -``` - -However, `julia-layout` currently depends on `anzograph` with: - -```yaml -depends_on: - anzograph: - condition: service_started -``` - -Meaning: -- Compose will ensure the **container process has started**. -- Compose does **not** guarantee the AnzoGraph HTTP/SPARQL endpoint is ready (unless you use `service_healthy`, and even then a “healthy GET” is not always equivalent to “SPARQL POST works with auth + JSON”). - -So the Julia code includes its own readiness gate to prevent failures like: -- TCP connection refused (port not open yet) -- HTTP endpoint reachable but not fully initialized -- Non-JSON/HTML error responses while the service is still booting - ---- - -## 3) What “ready” means in this repo - -In this repo, “AnzoGraph is ready” means: - -1. An HTTP `POST` to `${SPARQL_HOST}/sparql` succeeds, with headers: - - `Content-Type: application/x-www-form-urlencoded` - - `Accept: application/sparql-results+json` - - `Authorization: Basic ...` -2. The body parses as SPARQL JSON results (`application/sparql-results+json`) - -It does **not** strictly mean: -- Your dataset is already loaded -- The loaded data is fully indexed (that can matter in some systems after `LOAD`) - -This repo uses readiness as a **“SPARQL endpoint is alive and speaking the protocol”** check. - ---- - -## 4) The actual Julia implementation (as in `julia/main.jl`) - -### 4.1 Configuration (endpoint + auth) - -The Julia script builds endpoint and auth from environment variables: - -```julia -const SPARQL_HOST = get(ENV, "SPARQL_HOST", "http://localhost:8080") -const SPARQL_ENDPOINT = "$SPARQL_HOST/sparql" -const SPARQL_USER = get(ENV, "SPARQL_USER", "admin") -const SPARQL_PASS = get(ENV, "SPARQL_PASS", "Passw0rd1") -const AUTH_HEADER = "Basic " * base64encode("$SPARQL_USER:$SPARQL_PASS") -``` - -In Docker Compose for this repo, the Julia container overrides `SPARQL_HOST` to use the service DNS name: - -```yaml -environment: - - SPARQL_HOST=http://anzograph:8080 -``` - -### 4.2 The smoke query used for readiness - -This is the query used in the wait loop: - -```julia -const SMOKE_TEST_QUERY = "SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 3" -``` - -Notes: -- It’s intentionally small (`LIMIT 3`) to keep the readiness check cheap. -- It returns *some* bindings when data exists, but **even an empty dataset can still return a valid empty result set**. The code treats “valid response” as ready. - -If you want a readiness check that does not depend on any data being present, an `ASK` query is also common: - -```sparql -ASK WHERE { ?s ?p ?o } -``` - -### 4.3 SPARQL query function (request + minimal retry) - -`sparql_query(query; retries=...)` is a generic helper that makes SPARQL POST requests: - -```julia -function sparql_query(query::String; retries::Int=5)::SparqlResult - for attempt in 1:retries - try - response = HTTP.post( - SPARQL_ENDPOINT, - [ - "Content-Type" => "application/x-www-form-urlencoded", - "Accept" => "application/sparql-results+json", - "Authorization" => AUTH_HEADER - ]; - body = "query=" * HTTP.URIs.escapeuri(query) - ) - - if response.status == 200 - json = JSON.parse(String(response.body)) - return SparqlResult(json["results"]["bindings"]) - elseif response.status >= 500 && attempt < retries - sleep(10) - continue - else - error("SPARQL query failed with status $(response.status)") - end - catch e - if attempt < retries - sleep(10) - continue - end - rethrow(e) - end - end - error("SPARQL query failed after $retries attempts") -end -``` - -Important behaviors to preserve when transferring: -- It uses **POST** (not GET) to the SPARQL endpoint. -- It requires a **200** response and successfully parses SPARQL JSON results. -- It retries on: - - `>= 500` server errors - - network / protocol / parsing errors (caught exceptions) - -### 4.4 The readiness gate: `wait_for_anzograph` - -This is the “await until ready” logic: - -```julia -function wait_for_anzograph(max_retries::Int=30)::Bool - println("Waiting for AnzoGraph at $SPARQL_ENDPOINT...") - - for attempt in 1:max_retries - try - smoke_result = sparql_query(SMOKE_TEST_QUERY; retries=1) - println(" AnzoGraph is ready (attempt $attempt, smoke rows=$(length(smoke_result.bindings)))") - return true - catch e - println(" Attempt $attempt/$max_retries: $(typeof(e))") - sleep(4) - end - end - - error("AnzoGraph not available after $max_retries attempts") -end -``` - -Why it calls `sparql_query(...; retries=1)`: -- It makes each outer “readiness attempt” a **single** request. -- The outer loop controls cadence (`sleep(4)`) and total wait time. -- This avoids “nested retry loops” (inner sleeps + outer sleeps) that can make waits much longer than intended. - -Time bound in the current implementation: -- `max_retries = 30` -- `sleep(4)` between attempts -- Roughly ~120 seconds of waiting (plus request time). - ---- - -## 5) What failures cause it to keep waiting - -`wait_for_anzograph()` catches any exception thrown by `sparql_query()` and retries. In practice, that includes: - -- **Connection errors** (DNS not ready, connection refused, etc.) -- **Timeouts** (if HTTP request takes too long and the library throws) -- **Non-200 HTTP statuses** that cause `error(...)` -- **Non-JSON / unexpected JSON** responses causing `JSON.parse(...)` to throw - -That last point is a big reason a “real SPARQL request + parse” is stronger than just “ping the port”. - ---- - -## 6) Transferable, self-contained version (recommended pattern) - -If you want to reuse this in another project, it’s usually easier to: -- avoid globals, -- make endpoint/auth explicit, -- use a **time-based timeout** instead of `max_retries` (more robust), -- add request timeouts so the wait loop can’t hang forever on a single request. - -Below is a drop-in module you can copy into your project. - -```julia -module AnzoGraphReady - -using HTTP -using JSON -using Base64 -using Dates - -struct SparqlResult - bindings::Vector{Dict{String, Any}} -end - -function basic_auth_header(user::AbstractString, pass::AbstractString)::String - return "Basic " * base64encode("$(user):$(pass)") -end - -function sparql_query( - endpoint::AbstractString, - auth_header::AbstractString, - query::AbstractString; - retries::Int = 1, - retry_sleep_s::Real = 2, - request_timeout_s::Real = 15, -)::SparqlResult - for attempt in 1:retries - try - response = HTTP.post( - String(endpoint), - [ - "Content-Type" => "application/x-www-form-urlencoded", - "Accept" => "application/sparql-results+json", - "Authorization" => auth_header, - ]; - body = "query=" * HTTP.URIs.escapeuri(String(query)), - readtimeout = request_timeout_s, - ) - - if response.status != 200 - error("SPARQL query failed with status $(response.status)") - end - - parsed = JSON.parse(String(response.body)) - bindings = get(get(parsed, "results", Dict()), "bindings", Any[]) - return SparqlResult(Vector{Dict{String, Any}}(bindings)) - catch e - if attempt < retries - sleep(retry_sleep_s) - continue - end - rethrow(e) - end - end - error("sparql_query: unreachable") -end - -""" -Wait until AnzoGraph responds to a real SPARQL POST with parseable JSON. - -This is the direct analog of this repo's `wait_for_anzograph()`, but with: -- a time-based timeout (`timeout`) -- a request timeout per attempt (`request_timeout_s`) -- simple exponential backoff -""" -function wait_for_anzograph( - endpoint::AbstractString, - auth_header::AbstractString; - timeout::Period = Minute(3), - initial_delay_s::Real = 0.5, - max_delay_s::Real = 5.0, - request_timeout_s::Real = 10.0, - query::AbstractString = "ASK WHERE { ?s ?p ?o }", -)::Nothing - deadline = now() + timeout - delay_s = initial_delay_s - - while now() < deadline - try - # A single attempt: if it succeeds, we declare "ready". - sparql_query( - endpoint, - auth_header, - query; - retries = 1, - request_timeout_s = request_timeout_s, - ) - return - catch - sleep(delay_s) - delay_s = min(max_delay_s, delay_s * 1.5) - end - end - - error("AnzoGraph not available before timeout=$(timeout)") -end - -end # module -``` - -Typical usage (matching this repo’s environment variables): - -```julia -using .AnzoGraphReady - -sparql_host = get(ENV, "SPARQL_HOST", "http://localhost:8080") -endpoint = "$(sparql_host)/sparql" -user = get(ENV, "SPARQL_USER", "admin") -pass = get(ENV, "SPARQL_PASS", "Passw0rd1") - -auth = AnzoGraphReady.basic_auth_header(user, pass) -AnzoGraphReady.wait_for_anzograph(endpoint, auth; timeout=Minute(5)) - -# Now it is safe to LOAD / query. -``` - ---- - -## 7) Optional: waiting for “data is ready” after `LOAD` - -Some systems accept `LOAD` but need time before results show up reliably (indexing / transaction visibility). -If you run into that in your other project, add a second gate after `LOAD`, for example: - -1) load, then -2) poll a query that must be true after load (e.g., “triple count > 0”, or a known IRI exists). - -Example “post-load gate”: - -```julia -post_load_query = """ -SELECT (COUNT(*) AS ?n) -WHERE { ?s ?p ?o } -""" - -res = AnzoGraphReady.sparql_query(endpoint, auth, post_load_query; retries=1) -# Parse `?n` out of bindings and require it to be > 0; retry until it is. -``` - -(This repo does not currently enforce “non-empty”; it only enforces “SPARQL is working”.) - ---- - -## 8) Practical checklist when transferring to another project - -- Make readiness checks hit the **real SPARQL POST** path you will use in production. -- Require a **valid JSON parse**, not just “port open”. -- Add **per-request timeouts**, so a single hung request cannot hang the whole pipeline. -- Prefer **time-based overall timeout** for predictable behavior in CI. -- Keep the query **cheap** (`ASK` or `LIMIT 1/3`). -- If you use Docker Compose healthchecks, consider also using `depends_on: condition: service_healthy`, but still keep the in-app wait as a safety net (it’s closer to the real contract your code needs). - diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index f22b183..5414f2d 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -5,6 +5,17 @@ function sleep(ms: number): Promise { return new Promise((r) => setTimeout(r, ms)); } +type GraphMeta = { + backend?: string; + ttl_path?: string | null; + sparql_endpoint?: string | null; + include_bnodes?: boolean; + node_limit?: number; + edge_limit?: number; + nodes?: number; + edges?: number; +}; + export default function App() { const canvasRef = useRef(null); const rendererRef = useRef(null); @@ -18,12 +29,15 @@ export default function App() { ptSize: 0, }); const [error, setError] = useState(""); - const [hoveredNode, setHoveredNode] = useState<{ x: number; y: number; screenX: number; screenY: number } | null>(null); + const [hoveredNode, setHoveredNode] = useState<{ x: number; y: number; screenX: number; screenY: number; label?: string; iri?: string } | null>(null); const [selectedNodes, setSelectedNodes] = useState>(new Set()); const [backendStats, setBackendStats] = useState<{ nodes: number; edges: number; backend?: string } | null>(null); + const graphMetaRef = useRef(null); + const neighborsReqIdRef = useRef(0); // Store mouse position in a ref so it can be accessed in render loop without re-renders const mousePos = useRef({ x: 0, y: 0 }); + const nodesRef = useRef([]); useEffect(() => { const canvas = canvasRef.current; @@ -70,6 +84,9 @@ export default function App() { const meta = graph.meta || null; const count = nodes.length; + nodesRef.current = nodes; + graphMetaRef.current = meta && typeof meta === "object" ? (meta as GraphMeta) : null; + // Build positions from backend-provided node coordinates. setStatus("Preparing buffers…"); const xs = new Float32Array(count); @@ -196,9 +213,18 @@ export default function App() { frameCount++; // Find hovered node using quadtree - const node = renderer.findNodeAt(mousePos.current.x, mousePos.current.y); - if (node) { - setHoveredNode({ ...node, screenX: mousePos.current.x, screenY: mousePos.current.y }); + const hit = renderer.findNodeIndexAt(mousePos.current.x, mousePos.current.y); + if (hit) { + const origIdx = renderer.sortedIndexToOriginalIndex(hit.index); + const meta = origIdx === null ? null : nodesRef.current[origIdx]; + setHoveredNode({ + x: hit.x, + y: hit.y, + screenX: mousePos.current.x, + screenY: mousePos.current.y, + label: meta && typeof meta.label === "string" ? meta.label : undefined, + iri: meta && typeof meta.iri === "string" ? meta.iri : undefined, + }); } else { setHoveredNode(null); } @@ -234,9 +260,72 @@ export default function App() { // Sync selection state to renderer useEffect(() => { - if (rendererRef.current) { - rendererRef.current.updateSelection(selectedNodes); + const renderer = rendererRef.current; + if (!renderer) return; + + // Optimistically reflect selection immediately; neighbors will be filled in by backend. + renderer.updateSelection(selectedNodes, new Set()); + + // Invalidate any in-flight neighbor request for the previous selection. + const reqId = ++neighborsReqIdRef.current; + + // Convert selected sorted indices to backend node IDs (graph-export dense IDs). + const selectedIds: number[] = []; + for (const sortedIdx of selectedNodes) { + const origIdx = renderer.sortedIndexToOriginalIndex(sortedIdx); + if (origIdx === null) continue; + const nodeId = nodesRef.current?.[origIdx]?.id; + if (typeof nodeId === "number") selectedIds.push(nodeId); } + + if (selectedIds.length === 0) { + return; + } + + // Always send the full current selection list; backend returns the merged neighbor set. + const ctrl = new AbortController(); + + (async () => { + try { + const meta = graphMetaRef.current; + const body = { + selected_ids: selectedIds, + node_limit: typeof meta?.node_limit === "number" ? meta.node_limit : undefined, + edge_limit: typeof meta?.edge_limit === "number" ? meta.edge_limit : undefined, + }; + + const res = await fetch("/api/neighbors", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(body), + signal: ctrl.signal, + }); + if (!res.ok) throw new Error(`POST /api/neighbors failed: ${res.status}`); + const data = await res.json(); + if (ctrl.signal.aborted) return; + if (reqId !== neighborsReqIdRef.current) return; + + const neighborIds: unknown = data?.neighbor_ids; + const neighborSorted = new Set(); + if (Array.isArray(neighborIds)) { + for (const id of neighborIds) { + if (typeof id !== "number") continue; + const sorted = renderer.vertexIdToSortedIndexOrNull(id); + if (sorted === null) continue; + if (!selectedNodes.has(sorted)) neighborSorted.add(sorted); + } + } + + renderer.updateSelection(selectedNodes, neighborSorted); + } catch (e) { + if (ctrl.signal.aborted) return; + console.warn(e); + // Keep the UI usable even if neighbors fail to load. + renderer.updateSelection(selectedNodes, new Set()); + } + })(); + + return () => ctrl.abort(); }, [selectedNodes]); return ( @@ -350,7 +439,12 @@ export default function App() { boxShadow: "0 2px 8px rgba(0,0,0,0.5)", }} > - ({hoveredNode.x.toFixed(2)}, {hoveredNode.y.toFixed(2)}) +
+ {hoveredNode.label || hoveredNode.iri || "(unknown)"} +
+
+ ({hoveredNode.x.toFixed(2)}, {hoveredNode.y.toFixed(2)}) +
)} diff --git a/frontend/src/renderer.ts b/frontend/src/renderer.ts index 5ee2779..2c74169 100644 --- a/frontend/src/renderer.ts +++ b/frontend/src/renderer.ts @@ -80,9 +80,11 @@ export class Renderer { // Data private leaves: Leaf[] = []; private sorted: Float32Array = new Float32Array(0); + // order[sortedIdx] = originalIdx (original ordering matches input arrays) + private sortedToOriginal: Uint32Array = new Uint32Array(0); + private vertexIdToSortedIndex: Map = new Map(); private nodeCount = 0; private edgeCount = 0; - private neighborMap: Map = new Map(); private leafEdgeStarts: Uint32Array = new Uint32Array(0); private leafEdgeCounts: Uint32Array = new Uint32Array(0); private maxPtSize = 256; @@ -202,6 +204,7 @@ export class Renderer { const { sorted, leaves, order } = buildSpatialIndex(xs, ys); this.leaves = leaves; this.sorted = sorted; + this.sortedToOriginal = order; // Pre-allocate arrays for render loop (zero-allocation rendering) this.visibleLeafIndices = new Uint32Array(leaves.length); @@ -226,6 +229,13 @@ export class Renderer { originalToSorted[order[i]] = i; } + // Build vertex ID → sorted index mapping (used by backend-driven neighbor highlighting) + const vertexIdToSortedIndex = new Map(); + for (let i = 0; i < count; i++) { + vertexIdToSortedIndex.set(vertexIds[i], originalToSorted[i]); + } + this.vertexIdToSortedIndex = vertexIdToSortedIndex; + // Remap edges from vertex IDs to sorted indices const lineIndices = new Uint32Array(edgeCount * 2); let validEdges = 0; @@ -241,18 +251,6 @@ export class Renderer { } this.edgeCount = validEdges; - // Build per-node neighbor list from edges for selection queries - const neighborMap = new Map(); - for (let i = 0; i < validEdges; i++) { - const src = lineIndices[i * 2]; - const dst = lineIndices[i * 2 + 1]; - if (!neighborMap.has(src)) neighborMap.set(src, []); - neighborMap.get(src)!.push(dst); - if (!neighborMap.has(dst)) neighborMap.set(dst, []); - neighborMap.get(dst)!.push(src); - } - this.neighborMap = neighborMap; - // Build per-leaf edge index for efficient visible-only edge drawing // Find which leaf each sorted index belongs to const nodeToLeaf = new Uint32Array(count); @@ -331,6 +329,28 @@ export class Renderer { return this.nodeCount; } + /** + * Map a sorted buffer index (what findNodeIndexAt returns) back to the original + * index in the input arrays used to initialize the renderer. + */ + sortedIndexToOriginalIndex(sortedIndex: number): number | null { + if ( + sortedIndex < 0 || + sortedIndex >= this.sortedToOriginal.length + ) { + return null; + } + return this.sortedToOriginal[sortedIndex]; + } + + /** + * Convert a backend node ID (node.id from /api/graph) to a sorted index used by the renderer. + */ + vertexIdToSortedIndexOrNull(vertexId: number): number | null { + const idx = this.vertexIdToSortedIndex.get(vertexId); + return typeof idx === "number" ? idx : null; + } + /** * Convert screen coordinates (CSS pixels) to world coordinates. */ @@ -412,10 +432,10 @@ export class Renderer { /** * Update the selection buffer with the given set of node indices. - * Also computes neighbors of selected nodes. - * Call this whenever React's selection state changes. + * Neighbor indices are provided by the backend (SPARQL query) and uploaded separately. + * Call this whenever selection or backend neighbor results change. */ - updateSelection(selectedIndices: Set): void { + updateSelection(selectedIndices: Set, neighborIndices: Set = new Set()): void { const gl = this.gl; // Upload selected indices @@ -425,23 +445,11 @@ export class Renderer { gl.bufferData(gl.ELEMENT_ARRAY_BUFFER, indices, gl.DYNAMIC_DRAW); gl.bindBuffer(gl.ELEMENT_ARRAY_BUFFER, null); - // Compute neighbors of selected nodes (excluding already selected) - const neighborSet = new Set(); - for (const nodeIdx of selectedIndices) { - const nodeNeighbors = this.neighborMap.get(nodeIdx); - if (!nodeNeighbors) continue; - for (const n of nodeNeighbors) { - if (!selectedIndices.has(n)) { - neighborSet.add(n); - } - } - } - // Upload neighbor indices - const neighborIndices = new Uint32Array(neighborSet); - this.neighborCount = neighborIndices.length; + const neighborIndexArray = new Uint32Array(neighborIndices); + this.neighborCount = neighborIndexArray.length; gl.bindBuffer(gl.ELEMENT_ARRAY_BUFFER, this.neighborIbo); - gl.bufferData(gl.ELEMENT_ARRAY_BUFFER, neighborIndices, gl.DYNAMIC_DRAW); + gl.bufferData(gl.ELEMENT_ARRAY_BUFFER, neighborIndexArray, gl.DYNAMIC_DRAW); gl.bindBuffer(gl.ELEMENT_ARRAY_BUFFER, null); }