Import Solver + neighbors via sparql query

2026-03-04 13:49:14 -03:00
parent d4bfa5f064
commit a75b5b93da
15 changed files with 747 additions and 463 deletions
--- a/.env.example
+++ b/.env.example
@@ -1,30 +0,0 @@
-# Choose which engine executes SPARQL:
-# - rdflib: parse TTL locally and query in-memory
-# - anzograph: query AnzoGraph over HTTP (optionally LOAD the TTL on startup)
-GRAPH_BACKEND=rdflib
-
-# Backend (rdflib) file location inside the container.
-# The TTL file must exist within the mounted ./data folder if you keep the default volume mount.
-TTL_PATH=/data/o3po.ttl
-
-# Backend behavior
-INCLUDE_BNODES=false
-# MAX_TRIPLES=1000000
-
-# AnzoGraph / SPARQL endpoint settings (used when GRAPH_BACKEND=anzograph)
-SPARQL_HOST=http://anzograph:8080
-# SPARQL_ENDPOINT=http://anzograph:8080/sparql
-SPARQL_USER=admin
-SPARQL_PASS=Passw0rd1
-
-# File URI as seen by the AnzoGraph container (used by SPARQL `LOAD`)
-SPARQL_DATA_FILE=file:///opt/shared-files/o3po.ttl
-# SPARQL_GRAPH_IRI=http://example.org/graph
-
-# Startup behavior for AnzoGraph mode
-SPARQL_LOAD_ON_START=false
-SPARQL_CLEAR_ON_START=false
-
-# Dev UX
-CORS_ORIGINS=http://localhost:5173
-VITE_BACKEND_URL=http://backend:8000
--- a/backend/app/README.md
+++ b/backend/app/README.md
@@ -32,6 +32,11 @@ Callers (frontend or other clients) interact with a single API surface (`/api/*`
  - Used by `/api/nodes`, `/api/edges`, and `rdflib`-mode `/api/stats`.
 - `pipelines/graph_snapshot.py`
  - Pipeline used by `/api/graph` to return a `{nodes, edges}` snapshot via SPARQL (works for both RDFLib and AnzoGraph).
+- `pipelines/layout_dag_radial.py`
+  - DAG layout helpers used by `pipelines/graph_snapshot.py`:
+    - cycle detection
+    - level-synchronous Kahn layering
+    - radial (ring-per-layer) positioning.
 - `pipelines/snapshot_service.py`
  - Snapshot cache layer used by `/api/graph` and `/api/stats` so the backend doesn't run expensive SPARQL twice.
 - `pipelines/subclass_labels.py`
@@ -64,6 +69,14 @@ RDFLib mode:
 - `TTL_PATH`: path inside the backend container to a `.ttl` file (example: `/data/o3po.ttl`)
 - `MAX_TRIPLES`: optional int; if set, stops parsing after this many triples

+Optional import-combining step (runs before the SPARQL engine starts):
+
+- `COMBINE_OWL_IMPORTS_ON_START`: `true` to recursively load `TTL_PATH` (or `COMBINE_ENTRY_LOCATION`) plus `owl:imports` and write a combined TTL file.
+- `COMBINE_ENTRY_LOCATION`: optional override for the entry file/URL to load (defaults to `TTL_PATH`)
+- `COMBINE_OUTPUT_LOCATION`: optional explicit output path (defaults to `${dirname(entry)}/${COMBINE_OUTPUT_NAME}`)
+- `COMBINE_OUTPUT_NAME`: output filename when `COMBINE_OUTPUT_LOCATION` is not set (default: `combined_ontology.ttl`)
+- `COMBINE_FORCE`: `true` to rebuild even if the output file already exists
+
 AnzoGraph mode:

 - `SPARQL_HOST`: base host (example: `http://anzograph:8080`)
@@ -129,8 +142,8 @@ Returned in `nodes[]` (dense IDs; suitable for indexing in typed arrays):
 - `id`: integer dense node ID used in edges
 - `termType`: `"uri"` or `"bnode"`
 - `iri`: URI string; blank nodes are normalized to `_:<id>`
- `label`: currently `null` in `/api/graph` snapshots (pipelines can be used to populate later)
- `x`/`y`: world-space coordinates for rendering (currently a deterministic spiral layout)
+- `label`: `rdfs:label` when available (best-effort; prefers English)
+- `x`/`y`: world-space coordinates for rendering (currently a radial layered layout derived from `rdfs:subClassOf`)

 ### Edge

@@ -149,11 +162,10 @@ Returned in `edges[]`:

 ## Snapshot Query (`/api/graph`)

-`/api/graph` uses a SPARQL query that:
+`/api/graph` currently uses a SPARQL query that returns only `rdfs:subClassOf` edges:

- selects triples `?s ?p ?o`
- excludes literal objects (`FILTER(!isLiteral(?o))`)
- excludes `rdfs:label`, `skos:prefLabel`, and `skos:altLabel` predicates
+- selects bindings as `?s ?p ?o` (with `?p` bound to `rdfs:subClassOf`)
+- excludes literal objects (`FILTER(!isLiteral(?o))`) for safety
 - optionally excludes blank nodes (unless `INCLUDE_BNODES=true`)
 - applies `LIMIT edge_limit`

@@ -161,6 +173,8 @@ The result bindings are mapped to dense node IDs (first-seen order) and returned

 `/api/graph` also returns `meta` with snapshot counts and engine info so the frontend doesn't need to call `/api/stats`.

+If a cycle is detected in the returned `rdfs:subClassOf` snapshot, `/api/graph` returns HTTP 422 (layout requires a DAG).
+
 ## Pipelines

 ### `pipelines/graph_snapshot.py`
--- a/backend/app/graph_export.py
+++ b/backend/app/graph_export.py
@@ -5,16 +5,25 @@ from typing import Any

 def edge_retrieval_query(*, edge_limit: int, include_bnodes: bool) -> str:
    bnode_filter = "" if include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))"
+    
    return f"""
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+PREFIX owl: <http://www.w3.org/2002/07/owl#>
+
 SELECT ?s ?p ?o
 WHERE {{
-  ?s ?p ?o .
+  {{
+    VALUES ?p {{ rdf:type }}
+    ?s ?p ?o .
+    ?o rdf:type owl:Class .
+  }}
+  UNION
+  {{
+    VALUES ?p {{ rdfs:subClassOf }}
+    ?s ?p ?o .
+  }}
  FILTER(!isLiteral(?o))
-  FILTER(?p NOT IN (
-    <http://www.w3.org/2000/01/rdf-schema#label>,
-    <http://www.w3.org/2004/02/skos/core#prefLabel>,
-    <http://www.w3.org/2004/02/skos/core#altLabel>
-  ))
  {bnode_filter}
 }}
 LIMIT {edge_limit}
@@ -91,4 +100,3 @@ def graph_from_sparql_bindings(
    ]

    return out_nodes, out_edges
-
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -1,11 +1,29 @@
 from __future__ import annotations

 from contextlib import asynccontextmanager
+import logging
+import asyncio

 from fastapi import FastAPI, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware

-from .models import EdgesResponse, GraphResponse, NodesResponse, SparqlQueryRequest, StatsResponse
+from .models import (
+    EdgesResponse,
+    GraphResponse,
+    NeighborsRequest,
+    NeighborsResponse,
+    NodesResponse,
+    SparqlQueryRequest,
+    StatsResponse,
+)
+from .pipelines.layout_dag_radial import CycleError
+from .pipelines.owl_imports_combiner import (
+    build_combined_graph,
+    output_location_to_path,
+    resolve_output_location,
+    serialize_graph_to_ttl,
+)
+from .pipelines.selection_neighbors import fetch_neighbor_ids_for_selection
 from .pipelines.snapshot_service import GraphSnapshotService
 from .rdf_store import RDFStore
 from .sparql_engine import RdflibEngine, SparqlEngine, create_sparql_engine
@@ -13,11 +31,33 @@ from .settings import Settings


 settings = Settings()
+logger = logging.getLogger(__name__)


@asynccontextmanager
 async def lifespan(app: FastAPI):
-    sparql: SparqlEngine = create_sparql_engine(settings)
+    rdflib_preloaded_graph = None
+
+    if settings.combine_owl_imports_on_start:
+        entry_location = settings.combine_entry_location or settings.ttl_path
+        output_location = resolve_output_location(
+            entry_location,
+            output_location=settings.combine_output_location,
+            output_name=settings.combine_output_name,
+        )
+
+        output_path = output_location_to_path(output_location)
+        if output_path.exists() and not settings.combine_force:
+            logger.info("Skipping combine step (output exists): %s", output_location)
+        else:
+            rdflib_preloaded_graph = await asyncio.to_thread(build_combined_graph, entry_location)
+            logger.info("Finished combining imports; serializing to: %s", output_location)
+            await asyncio.to_thread(serialize_graph_to_ttl, rdflib_preloaded_graph, output_location)
+
+        if settings.graph_backend == "rdflib":
+            settings.ttl_path = str(output_path)
+
+    sparql: SparqlEngine = create_sparql_engine(settings, rdflib_graph=rdflib_preloaded_graph)
    await sparql.startup()
    app.state.sparql = sparql
    app.state.snapshot_service = GraphSnapshotService(sparql=sparql, settings=settings)
@@ -62,7 +102,10 @@ def health() -> dict[str, str]:
 async def stats() -> StatsResponse:
    # Stats reflect exactly what we send to the frontend (/api/graph), not global graph size.
    svc: GraphSnapshotService = app.state.snapshot_service
-    snap = await svc.get(node_limit=50_000, edge_limit=100_000)
+    try:
+        snap = await svc.get(node_limit=50_000, edge_limit=100_000)
+    except CycleError as e:
+        raise HTTPException(status_code=422, detail=str(e)) from None
    meta = snap.meta
    return StatsResponse(
        backend=meta.backend if meta else app.state.sparql.name,
@@ -81,6 +124,20 @@ async def sparql_query(req: SparqlQueryRequest) -> dict:
    return data


+@app.post("/api/neighbors", response_model=NeighborsResponse)
+async def neighbors(req: NeighborsRequest) -> NeighborsResponse:
+    svc: GraphSnapshotService = app.state.snapshot_service
+    snap = await svc.get(node_limit=req.node_limit, edge_limit=req.edge_limit)
+    sparql: SparqlEngine = app.state.sparql
+    neighbor_ids = await fetch_neighbor_ids_for_selection(
+        sparql,
+        snapshot=snap,
+        selected_ids=req.selected_ids,
+        include_bnodes=settings.include_bnodes,
+    )
+    return NeighborsResponse(selected_ids=req.selected_ids, neighbor_ids=neighbor_ids)
+
+
@app.get("/api/nodes", response_model=NodesResponse)
 def nodes(
    limit: int = Query(default=10_000, ge=1, le=200_000),
@@ -109,4 +166,7 @@ async def graph(
    edge_limit: int = Query(default=100_000, ge=1, le=500_000),
 ) -> GraphResponse:
    svc: GraphSnapshotService = app.state.snapshot_service
-    return await svc.get(node_limit=node_limit, edge_limit=edge_limit)
+    try:
+        return await svc.get(node_limit=node_limit, edge_limit=edge_limit)
+    except CycleError as e:
+        raise HTTPException(status_code=422, detail=str(e)) from None
--- a/backend/app/models.py
+++ b/backend/app/models.py
@@ -56,3 +56,14 @@ class GraphResponse(BaseModel):

 class SparqlQueryRequest(BaseModel):
    query: str
+
+
+class NeighborsRequest(BaseModel):
+    selected_ids: list[int]
+    node_limit: int = 50_000
+    edge_limit: int = 100_000
+
+
+class NeighborsResponse(BaseModel):
+    selected_ids: list[int]
+    neighbor_ids: list[int]
--- a/backend/app/pipelines/graph_snapshot.py
+++ b/backend/app/pipelines/graph_snapshot.py
@@ -1,10 +1,64 @@
 from __future__ import annotations

+from typing import Any
+
 from ..graph_export import edge_retrieval_query, graph_from_sparql_bindings
 from ..models import GraphResponse
 from ..sparql_engine import SparqlEngine
 from ..settings import Settings
-from .layout_spiral import spiral_positions
+from .layout_dag_radial import CycleError, level_synchronous_kahn_layers, radial_positions_from_layers
+
+
+RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
+
+
+def _bindings(res: dict[str, Any]) -> list[dict[str, Any]]:
+    return (((res.get("results") or {}).get("bindings")) or [])
+
+
+def _label_score(label_binding: dict[str, Any]) -> int:
+    # Prefer English, then no-language, then anything else.
+    lang = (label_binding.get("xml:lang") or "").lower()
+    if lang == "en":
+        return 3
+    if lang == "":
+        return 2
+    return 1
+
+
+async def _fetch_rdfs_labels_for_iris(
+    sparql: SparqlEngine,
+    iris: list[str],
+    *,
+    batch_size: int = 500,
+) -> dict[str, str]:
+    best: dict[str, tuple[int, str]] = {}
+
+    for i in range(0, len(iris), batch_size):
+        batch = iris[i : i + batch_size]
+        values = " ".join(f"<{u}>" for u in batch)
+        q = f"""
+SELECT ?s ?label
+WHERE {{
+  VALUES ?s {{ {values} }}
+  ?s <{RDFS_LABEL}> ?label .
+}}
+"""
+        res = await sparql.query_json(q)
+        for b in _bindings(res):
+            s = (b.get("s") or {}).get("value")
+            label_term = b.get("label") or {}
+            if not s or label_term.get("type") != "literal":
+                continue
+            label_value = label_term.get("value")
+            if label_value is None:
+                continue
+            score = _label_score(label_term)
+            prev = best.get(s)
+            if prev is None or score > prev[0]:
+                best[s] = (score, str(label_value))
+
+    return {iri: lbl for iri, (_, lbl) in best.items()}


 async def fetch_graph_snapshot(
@@ -28,11 +82,59 @@ async def fetch_graph_snapshot(
    )

    # Add positions so the frontend doesn't need to run a layout.
-    xs, ys = spiral_positions(len(nodes))
+    #
+    # We are exporting only rdfs:subClassOf triples. In the exported edges:
+    #   source = subclass, target = superclass
+    # For hierarchical layout we invert edges to:
+    #   superclass -> subclass
+    hier_edges: list[tuple[int, int]] = []
+    for e in edges:
+        s = e.get("source")
+        t = e.get("target")
+        try:
+            sid = int(s)  # subclass
+            tid = int(t)  # superclass
+        except Exception:
+            continue
+        hier_edges.append((tid, sid))
+
+    try:
+        layers = level_synchronous_kahn_layers(node_count=len(nodes), edges=hier_edges)
+    except CycleError as e:
+        # Add a small URI sample to aid debugging.
+        sample: list[str] = []
+        for nid in e.remaining_node_ids[:20]:
+            try:
+                sample.append(str(nodes[nid].get("iri")))
+            except Exception:
+                continue
+        raise CycleError(
+            processed=e.processed,
+            total=e.total,
+            remaining_node_ids=e.remaining_node_ids,
+            remaining_iri_sample=sample or None,
+        ) from None
+
+    # Deterministic order within each ring/layer for stable layouts.
+    id_to_iri = [str(n.get("iri", "")) for n in nodes]
+    for layer in layers:
+        layer.sort(key=lambda nid: id_to_iri[nid])
+
+    xs, ys = radial_positions_from_layers(node_count=len(nodes), layers=layers)
    for i, node in enumerate(nodes):
        node["x"] = float(xs[i])
        node["y"] = float(ys[i])

+    # Attach labels for URI nodes (blank nodes remain label-less).
+    uri_nodes = [n for n in nodes if n.get("termType") == "uri"]
+    if uri_nodes:
+        iris = [str(n["iri"]) for n in uri_nodes if isinstance(n.get("iri"), str)]
+        label_by_iri = await _fetch_rdfs_labels_for_iris(sparql, iris)
+        for n in uri_nodes:
+            iri = n.get("iri")
+            if isinstance(iri, str) and iri in label_by_iri:
+                n["label"] = label_by_iri[iri]
+
    meta = GraphResponse.Meta(
        backend=sparql.name,
        ttl_path=settings.ttl_path if settings.graph_backend == "rdflib" else None,
--- a/backend/app/pipelines/layout_dag_radial.py
+++ b/backend/app/pipelines/layout_dag_radial.py
@@ -0,0 +1,141 @@
+from __future__ import annotations
+
+import math
+from collections import deque
+from typing import Iterable, Sequence
+
+
+class CycleError(RuntimeError):
+    """
+    Raised when the requested layout requires a DAG, but a cycle is detected.
+
+    `remaining_node_ids` are the node ids that still had indegree > 0 after Kahn.
+    """
+
+    def __init__(
+        self,
+        *,
+        processed: int,
+        total: int,
+        remaining_node_ids: list[int],
+        remaining_iri_sample: list[str] | None = None,
+    ) -> None:
+        self.processed = int(processed)
+        self.total = int(total)
+        self.remaining_node_ids = remaining_node_ids
+        self.remaining_iri_sample = remaining_iri_sample
+
+        msg = f"Cycle detected in subClassOf graph (processed {self.processed}/{self.total} nodes)."
+        if remaining_iri_sample:
+            msg += f" Example nodes: {', '.join(remaining_iri_sample)}"
+        super().__init__(msg)
+
+
+def level_synchronous_kahn_layers(
+    *,
+    node_count: int,
+    edges: Iterable[tuple[int, int]],
+) -> list[list[int]]:
+    """
+    Level-synchronous Kahn's algorithm:
+    - process the entire current queue as one batch (one layer)
+    - only then enqueue newly-unlocked nodes for the next batch
+
+    `edges` are directed (u -> v).
+    """
+    n = int(node_count)
+    if n <= 0:
+        return []
+
+    adj: list[list[int]] = [[] for _ in range(n)]
+    indeg = [0] * n
+
+    for u, v in edges:
+        if u == v:
+            # Self-loops don't help layout and would trivially violate DAG-ness.
+            continue
+        if not (0 <= u < n and 0 <= v < n):
+            continue
+        adj[u].append(v)
+        indeg[v] += 1
+
+    q: deque[int] = deque(i for i, d in enumerate(indeg) if d == 0)
+    layers: list[list[int]] = []
+
+    processed = 0
+    while q:
+        # Consume the full current queue as a single layer.
+        layer = list(q)
+        q.clear()
+        layers.append(layer)
+
+        for u in layer:
+            processed += 1
+            for v in adj[u]:
+                indeg[v] -= 1
+                if indeg[v] == 0:
+                    q.append(v)
+
+    if processed != n:
+        remaining = [i for i, d in enumerate(indeg) if d > 0]
+        raise CycleError(processed=processed, total=n, remaining_node_ids=remaining)
+
+    return layers
+
+
+def radial_positions_from_layers(
+    *,
+    node_count: int,
+    layers: Sequence[Sequence[int]],
+    max_r: float = 5000.0,
+) -> tuple[list[float], list[float]]:
+    """
+    Assign node positions in concentric rings (one ring per layer).
+
+    - radius increases with layer index
+    - nodes within a layer are placed evenly by angle
+    - each ring gets a "golden-angle" rotation to reduce spoke artifacts
+    """
+    n = int(node_count)
+    if n <= 0:
+        return ([], [])
+
+    xs = [0.0] * n
+    ys = [0.0] * n
+    if not layers:
+        return (xs, ys)
+
+    two_pi = 2.0 * math.pi
+    golden = math.pi * (3.0 - math.sqrt(5.0))
+
+    layer_count = len(layers)
+    denom = float(layer_count + 1)
+
+    for li, layer in enumerate(layers):
+        m = len(layer)
+        if m <= 0:
+            continue
+
+        # Keep everything within ~[-max_r, max_r] like the previous spiral layout.
+        r = ((li + 1) / denom) * max_r
+
+        # Rotate each layer deterministically to avoid radial spokes aligning.
+        offset = (li * golden) % two_pi
+
+        if m == 1:
+            nid = int(layer[0])
+            if 0 <= nid < n:
+                xs[nid] = r * math.cos(offset)
+                ys[nid] = r * math.sin(offset)
+            continue
+
+        step = two_pi / float(m)
+        for j, raw_id in enumerate(layer):
+            nid = int(raw_id)
+            if not (0 <= nid < n):
+                continue
+            t = offset + step * float(j)
+            xs[nid] = r * math.cos(t)
+            ys[nid] = r * math.sin(t)
+
+    return (xs, ys)
--- a/backend/app/pipelines/owl_imports_combiner.py
+++ b/backend/app/pipelines/owl_imports_combiner.py
@@ -0,0 +1,96 @@
+from __future__ import annotations
+
+import logging
+import os
+from pathlib import Path
+from urllib.parse import unquote, urlparse
+
+from rdflib import Graph
+from rdflib.namespace import OWL
+
+
+logger = logging.getLogger(__name__)
+
+
+def _is_http_url(location: str) -> bool:
+    scheme = urlparse(location).scheme.lower()
+    return scheme in {"http", "https"}
+
+
+def _is_file_uri(location: str) -> bool:
+    return urlparse(location).scheme.lower() == "file"
+
+
+def _file_uri_to_path(location: str) -> Path:
+    u = urlparse(location)
+    if u.scheme.lower() != "file":
+        raise ValueError(f"Not a file:// URI: {location!r}")
+    return Path(unquote(u.path))
+
+
+def resolve_output_location(
+    entry_location: str,
+    *,
+    output_location: str | None,
+    output_name: str,
+) -> str:
+    if output_location:
+        return output_location
+
+    if _is_http_url(entry_location):
+        raise ValueError(
+            "COMBINE_ENTRY_LOCATION points to an http(s) URL; set COMBINE_OUTPUT_LOCATION to a writable file path."
+        )
+
+    entry_path = _file_uri_to_path(entry_location) if _is_file_uri(entry_location) else Path(entry_location)
+    return str(entry_path.parent / output_name)
+
+
+def _output_destination_to_path(output_location: str) -> Path:
+    if _is_file_uri(output_location):
+        return _file_uri_to_path(output_location)
+    if _is_http_url(output_location):
+        raise ValueError("Output location must be a local file path (or file:// URI), not http(s).")
+    return Path(output_location)
+
+
+def output_location_to_path(output_location: str) -> Path:
+    return _output_destination_to_path(output_location)
+
+
+def build_combined_graph(entry_location: str) -> Graph:
+    """
+    Recursively loads an RDF document (file path, file:// URI, or http(s) URL) and its
+    owl:imports into a single in-memory graph.
+    """
+    combined_graph = Graph()
+    visited_locations: set[str] = set()
+
+    def resolve_imports(location: str) -> None:
+        if location in visited_locations:
+            return
+        visited_locations.add(location)
+
+        logger.info("Loading ontology: %s", location)
+        try:
+            combined_graph.parse(location=location)
+        except Exception as e:
+            logger.warning("Failed to load %s (%s)", location, e)
+            return
+
+        imports = [str(o) for _, _, o in combined_graph.triples((None, OWL.imports, None))]
+        for imported_location in imports:
+            if imported_location not in visited_locations:
+                resolve_imports(imported_location)
+
+    resolve_imports(entry_location)
+    return combined_graph
+
+
+def serialize_graph_to_ttl(graph: Graph, output_location: str) -> None:
+    output_path = _output_destination_to_path(output_location)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    tmp_path = output_path.with_suffix(output_path.suffix + ".tmp")
+    graph.serialize(destination=str(tmp_path), format="turtle")
+    os.replace(str(tmp_path), str(output_path))
--- a/backend/app/pipelines/selection_neighbors.py
+++ b/backend/app/pipelines/selection_neighbors.py
@@ -0,0 +1,137 @@
+from __future__ import annotations
+
+from typing import Any, Iterable
+
+from ..models import GraphResponse, Node
+from ..sparql_engine import SparqlEngine
+
+
+def _values_term(node: Node) -> str | None:
+    iri = node.iri
+    if node.termType == "uri":
+        return f"<{iri}>"
+    if node.termType == "bnode":
+        if iri.startswith("_:"):
+            return iri
+        return f"_:{iri}"
+    return None
+
+
+def selection_neighbors_query(*, selected_nodes: Iterable[Node], include_bnodes: bool) -> str:
+    values_terms: list[str] = []
+    for n in selected_nodes:
+        t = _values_term(n)
+        if t is None:
+            continue
+        values_terms.append(t)
+
+    if not values_terms:
+        # Caller should avoid running this query when selection is empty, but keep this safe.
+        return "SELECT ?nbr WHERE { FILTER(false) }"
+
+    bnode_filter = "" if include_bnodes else "FILTER(!isBlank(?nbr))"
+    values = " ".join(values_terms)
+
+    # Neighbors are defined as any node directly connected by rdf:type (to owl:Class)
+    # or rdfs:subClassOf, in either direction (treating edges as undirected).
+    return f"""
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+PREFIX owl: <http://www.w3.org/2002/07/owl#>
+
+SELECT DISTINCT ?nbr
+WHERE {{
+  VALUES ?sel {{ {values} }}
+  {{
+    ?sel rdf:type ?o .
+    ?o rdf:type owl:Class .
+    BIND(?o AS ?nbr)
+  }}
+  UNION
+  {{
+    ?s rdf:type ?sel .
+    ?sel rdf:type owl:Class .
+    BIND(?s AS ?nbr)
+  }}
+  UNION
+  {{
+    ?sel rdfs:subClassOf ?o .
+    BIND(?o AS ?nbr)
+  }}
+  UNION
+  {{
+    ?s rdfs:subClassOf ?sel .
+    BIND(?s AS ?nbr)
+  }}
+  FILTER(!isLiteral(?nbr))
+  FILTER(?nbr != ?sel)
+  {bnode_filter}
+}}
+"""
+
+
+def _bindings(res: dict[str, Any]) -> list[dict[str, Any]]:
+    return (((res.get("results") or {}).get("bindings")) or [])
+
+
+def _term_key(term: dict[str, Any], *, include_bnodes: bool) -> tuple[str, str] | None:
+    t = term.get("type")
+    v = term.get("value")
+    if not t or v is None:
+        return None
+    if t == "literal":
+        return None
+    if t == "bnode":
+        if not include_bnodes:
+            return None
+        return ("bnode", f"_:{v}")
+    return ("uri", str(v))
+
+
+async def fetch_neighbor_ids_for_selection(
+    sparql: SparqlEngine,
+    *,
+    snapshot: GraphResponse,
+    selected_ids: list[int],
+    include_bnodes: bool,
+) -> list[int]:
+    id_to_node: dict[int, Node] = {n.id: n for n in snapshot.nodes}
+
+    selected_nodes: list[Node] = []
+    selected_id_set: set[int] = set()
+    for nid in selected_ids:
+        if not isinstance(nid, int):
+            continue
+        n = id_to_node.get(nid)
+        if n is None:
+            continue
+        if n.termType == "bnode" and not include_bnodes:
+            continue
+        selected_nodes.append(n)
+        selected_id_set.add(nid)
+
+    if not selected_nodes:
+        return []
+
+    key_to_id: dict[tuple[str, str], int] = {}
+    for n in snapshot.nodes:
+        key_to_id[(n.termType, n.iri)] = n.id
+
+    q = selection_neighbors_query(selected_nodes=selected_nodes, include_bnodes=include_bnodes)
+    res = await sparql.query_json(q)
+
+    neighbor_ids: set[int] = set()
+    for b in _bindings(res):
+        nbr_term = b.get("nbr") or {}
+        key = _term_key(nbr_term, include_bnodes=include_bnodes)
+        if key is None:
+            continue
+        nid = key_to_id.get(key)
+        if nid is None:
+            continue
+        if nid in selected_id_set:
+            continue
+        neighbor_ids.add(nid)
+
+    # Stable ordering for consistent frontend behavior.
+    return sorted(neighbor_ids)
--- a/backend/app/settings.py
+++ b/backend/app/settings.py
@@ -16,6 +16,13 @@ class Settings(BaseSettings):
    include_bnodes: bool = Field(default=False, alias="INCLUDE_BNODES")
    max_triples: int | None = Field(default=None, alias="MAX_TRIPLES")

+    # Optional: Combine owl:imports into a single TTL file on backend startup.
+    combine_owl_imports_on_start: bool = Field(default=False, alias="COMBINE_OWL_IMPORTS_ON_START")
+    combine_entry_location: str | None = Field(default=None, alias="COMBINE_ENTRY_LOCATION")
+    combine_output_location: str | None = Field(default=None, alias="COMBINE_OUTPUT_LOCATION")
+    combine_output_name: str = Field(default="combined_ontology.ttl", alias="COMBINE_OUTPUT_NAME")
+    combine_force: bool = Field(default=False, alias="COMBINE_FORCE")
+
    # AnzoGraph / SPARQL endpoint configuration
    sparql_host: str = Field(default="http://anzograph:8080", alias="SPARQL_HOST")
    # If not set, the backend uses `${SPARQL_HOST}/sparql`.
--- a/backend/app/sparql_engine.py
+++ b/backend/app/sparql_engine.py
@@ -24,11 +24,13 @@ class SparqlEngine(Protocol):
 class RdflibEngine:
    name = "rdflib"

-    def __init__(self, *, ttl_path: str):
+    def __init__(self, *, ttl_path: str, graph: Graph | None = None):
        self.ttl_path = ttl_path
-        self.graph: Graph | None = None
+        self.graph: Graph | None = graph

    async def startup(self) -> None:
+        if self.graph is not None:
+            return
        g = Graph()
        g.parse(self.ttl_path, format="turtle")
        self.graph = g
@@ -167,9 +169,9 @@ class AnzoGraphEngine:
        raise RuntimeError(f"AnzoGraph not ready at {self.endpoint}") from last_err


-def create_sparql_engine(settings: Settings) -> SparqlEngine:
+def create_sparql_engine(settings: Settings, *, rdflib_graph: Graph | None = None) -> SparqlEngine:
    if settings.graph_backend == "rdflib":
-        return RdflibEngine(ttl_path=settings.ttl_path)
+        return RdflibEngine(ttl_path=settings.ttl_path, graph=rdflib_graph)
    if settings.graph_backend == "anzograph":
        return AnzoGraphEngine(settings=settings)
    raise RuntimeError(f"Unsupported GRAPH_BACKEND={settings.graph_backend!r}")
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -21,9 +21,14 @@ services:
      - SPARQL_READY_RETRIES=${SPARQL_READY_RETRIES:-30}
      - SPARQL_READY_DELAY_S=${SPARQL_READY_DELAY_S:-4}
      - SPARQL_READY_TIMEOUT_S=${SPARQL_READY_TIMEOUT_S:-10}
+      - COMBINE_OWL_IMPORTS_ON_START=${COMBINE_OWL_IMPORTS_ON_START:-false}
+      - COMBINE_ENTRY_LOCATION
+      - COMBINE_OUTPUT_LOCATION
+      - COMBINE_OUTPUT_NAME
+      - COMBINE_FORCE=${COMBINE_FORCE:-false}
    volumes:
      - ./backend:/app
-      - ./data:/data:ro
+      - ./data:/data:Z
    command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
    healthcheck:
      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/health').read()"]
--- a/docs/anzograph-readiness-julia.md
+++ b/docs/anzograph-readiness-julia.md
@@ -1,371 +0,0 @@
-# Waiting for AnzoGraph readiness from Julia (how this repo does it)
-
-This repo runs a Julia pipeline (`julia/main.jl`) against an AnzoGraph SPARQL endpoint. The key problem is that **“container started” ≠ “SPARQL endpoint is ready to accept queries”**.
-
-So, before the Julia code does anything that depends on SPARQL (like `LOAD <...>` or large `SELECT`s), it explicitly **waits until AnzoGraph is actually responding to a real SPARQL POST request with valid JSON results**.
-
-This document explains the exact mechanism used here, why it works, and gives copy/paste-ready patterns you can transfer to another project.
-
---
-
-## 1) Where the waiting happens (pipeline control flow)
-
-In `julia/main.jl`, the entrypoint calls:
-
-```julia
-# Step 1: Wait for AnzoGraph
-wait_for_anzograph()
-
-# Step 2: Load TTL file
-result = sparql_update("LOAD <$SPARQL_DATA_FILE>")
-```
-
-So the “await” is not a Julia `Task`/`async` wait; it is a **blocking retry loop** that only returns when it can successfully execute a small SPARQL query.
-
-Reference: `julia/main.jl` defines `wait_for_anzograph()` and calls it from `main()`.
-
---
-
-## 2) Why this is needed even with Docker Compose `depends_on`
-
-This repo’s `docker-compose.yml` includes an AnzoGraph `healthcheck`:
-
-```yaml
-anzograph:
-  healthcheck:
-    test: ["CMD-SHELL", "curl -f http://localhost:8080/sparql || exit 1"]
-    interval: 10s
-    timeout: 5s
-    retries: 30
-    start_period: 60s
-```
-
-However, `julia-layout` currently depends on `anzograph` with:
-
-```yaml
-depends_on:
-  anzograph:
-    condition: service_started
-```
-
-Meaning:
- Compose will ensure the **container process has started**.
- Compose does **not** guarantee the AnzoGraph HTTP/SPARQL endpoint is ready (unless you use `service_healthy`, and even then a “healthy GET” is not always equivalent to “SPARQL POST works with auth + JSON”).
-
-So the Julia code includes its own readiness gate to prevent failures like:
- TCP connection refused (port not open yet)
- HTTP endpoint reachable but not fully initialized
- Non-JSON/HTML error responses while the service is still booting
-
---
-
-## 3) What “ready” means in this repo
-
-In this repo, “AnzoGraph is ready” means:
-
-1. An HTTP `POST` to `${SPARQL_HOST}/sparql` succeeds, with headers:
-   - `Content-Type: application/x-www-form-urlencoded`
-   - `Accept: application/sparql-results+json`
-   - `Authorization: Basic ...`
-2. The body parses as SPARQL JSON results (`application/sparql-results+json`)
-
-It does **not** strictly mean:
- Your dataset is already loaded
- The loaded data is fully indexed (that can matter in some systems after `LOAD`)
-
-This repo uses readiness as a **“SPARQL endpoint is alive and speaking the protocol”** check.
-
---
-
-## 4) The actual Julia implementation (as in `julia/main.jl`)
-
-### 4.1 Configuration (endpoint + auth)
-
-The Julia script builds endpoint and auth from environment variables:
-
-```julia
-const SPARQL_HOST = get(ENV, "SPARQL_HOST", "http://localhost:8080")
-const SPARQL_ENDPOINT = "$SPARQL_HOST/sparql"
-const SPARQL_USER = get(ENV, "SPARQL_USER", "admin")
-const SPARQL_PASS = get(ENV, "SPARQL_PASS", "Passw0rd1")
-const AUTH_HEADER = "Basic " * base64encode("$SPARQL_USER:$SPARQL_PASS")
-```
-
-In Docker Compose for this repo, the Julia container overrides `SPARQL_HOST` to use the service DNS name:
-
-```yaml
-environment:
-  - SPARQL_HOST=http://anzograph:8080
-```
-
-### 4.2 The smoke query used for readiness
-
-This is the query used in the wait loop:
-
-```julia
-const SMOKE_TEST_QUERY = "SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 3"
-```
-
-Notes:
- It’s intentionally small (`LIMIT 3`) to keep the readiness check cheap.
- It returns *some* bindings when data exists, but **even an empty dataset can still return a valid empty result set**. The code treats “valid response” as ready.
-
-If you want a readiness check that does not depend on any data being present, an `ASK` query is also common:
-
-```sparql
-ASK WHERE { ?s ?p ?o }
-```
-
-### 4.3 SPARQL query function (request + minimal retry)
-
-`sparql_query(query; retries=...)` is a generic helper that makes SPARQL POST requests:
-
-```julia
-function sparql_query(query::String; retries::Int=5)::SparqlResult
-    for attempt in 1:retries
-        try
-            response = HTTP.post(
-                SPARQL_ENDPOINT,
-                [
-                    "Content-Type" => "application/x-www-form-urlencoded",
-                    "Accept" => "application/sparql-results+json",
-                    "Authorization" => AUTH_HEADER
-                ];
-                body = "query=" * HTTP.URIs.escapeuri(query)
-            )
-
-            if response.status == 200
-                json = JSON.parse(String(response.body))
-                return SparqlResult(json["results"]["bindings"])
-            elseif response.status >= 500 && attempt < retries
-                sleep(10)
-                continue
-            else
-                error("SPARQL query failed with status $(response.status)")
-            end
-        catch e
-            if attempt < retries
-                sleep(10)
-                continue
-            end
-            rethrow(e)
-        end
-    end
-    error("SPARQL query failed after $retries attempts")
-end
-```
-
-Important behaviors to preserve when transferring:
- It uses **POST** (not GET) to the SPARQL endpoint.
- It requires a **200** response and successfully parses SPARQL JSON results.
- It retries on:
-  - `>= 500` server errors
-  - network / protocol / parsing errors (caught exceptions)
-
-### 4.4 The readiness gate: `wait_for_anzograph`
-
-This is the “await until ready” logic:
-
-```julia
-function wait_for_anzograph(max_retries::Int=30)::Bool
-    println("Waiting for AnzoGraph at $SPARQL_ENDPOINT...")
-
-    for attempt in 1:max_retries
-        try
-            smoke_result = sparql_query(SMOKE_TEST_QUERY; retries=1)
-            println("  AnzoGraph is ready (attempt $attempt, smoke rows=$(length(smoke_result.bindings)))")
-            return true
-        catch e
-            println("  Attempt $attempt/$max_retries: $(typeof(e))")
-            sleep(4)
-        end
-    end
-
-    error("AnzoGraph not available after $max_retries attempts")
-end
-```
-
-Why it calls `sparql_query(...; retries=1)`:
- It makes each outer “readiness attempt” a **single** request.
- The outer loop controls cadence (`sleep(4)`) and total wait time.
- This avoids “nested retry loops” (inner sleeps + outer sleeps) that can make waits much longer than intended.
-
-Time bound in the current implementation:
- `max_retries = 30`
- `sleep(4)` between attempts
- Roughly ~120 seconds of waiting (plus request time).
-
---
-
-## 5) What failures cause it to keep waiting
-
-`wait_for_anzograph()` catches any exception thrown by `sparql_query()` and retries. In practice, that includes:
-
- **Connection errors** (DNS not ready, connection refused, etc.)
- **Timeouts** (if HTTP request takes too long and the library throws)
- **Non-200 HTTP statuses** that cause `error(...)`
- **Non-JSON / unexpected JSON** responses causing `JSON.parse(...)` to throw
-
-That last point is a big reason a “real SPARQL request + parse” is stronger than just “ping the port”.
-
---
-
-## 6) Transferable, self-contained version (recommended pattern)
-
-If you want to reuse this in another project, it’s usually easier to:
- avoid globals,
- make endpoint/auth explicit,
- use a **time-based timeout** instead of `max_retries` (more robust),
- add request timeouts so the wait loop can’t hang forever on a single request.
-
-Below is a drop-in module you can copy into your project.
-
-```julia
-module AnzoGraphReady
-
-using HTTP
-using JSON
-using Base64
-using Dates
-
-struct SparqlResult
-    bindings::Vector{Dict{String, Any}}
-end
-
-function basic_auth_header(user::AbstractString, pass::AbstractString)::String
-    return "Basic " * base64encode("$(user):$(pass)")
-end
-
-function sparql_query(
-    endpoint::AbstractString,
-    auth_header::AbstractString,
-    query::AbstractString;
-    retries::Int = 1,
-    retry_sleep_s::Real = 2,
-    request_timeout_s::Real = 15,
-)::SparqlResult
-    for attempt in 1:retries
-        try
-            response = HTTP.post(
-                String(endpoint),
-                [
-                    "Content-Type" => "application/x-www-form-urlencoded",
-                    "Accept" => "application/sparql-results+json",
-                    "Authorization" => auth_header,
-                ];
-                body = "query=" * HTTP.URIs.escapeuri(String(query)),
-                readtimeout = request_timeout_s,
-            )
-
-            if response.status != 200
-                error("SPARQL query failed with status $(response.status)")
-            end
-
-            parsed = JSON.parse(String(response.body))
-            bindings = get(get(parsed, "results", Dict()), "bindings", Any[])
-            return SparqlResult(Vector{Dict{String, Any}}(bindings))
-        catch e
-            if attempt < retries
-                sleep(retry_sleep_s)
-                continue
-            end
-            rethrow(e)
-        end
-    end
-    error("sparql_query: unreachable")
-end
-
-"""
-Wait until AnzoGraph responds to a real SPARQL POST with parseable JSON.
-
-This is the direct analog of this repo's `wait_for_anzograph()`, but with:
- a time-based timeout (`timeout`)
- a request timeout per attempt (`request_timeout_s`)
- simple exponential backoff
-"""
-function wait_for_anzograph(
-    endpoint::AbstractString,
-    auth_header::AbstractString;
-    timeout::Period = Minute(3),
-    initial_delay_s::Real = 0.5,
-    max_delay_s::Real = 5.0,
-    request_timeout_s::Real = 10.0,
-    query::AbstractString = "ASK WHERE { ?s ?p ?o }",
-)::Nothing
-    deadline = now() + timeout
-    delay_s = initial_delay_s
-
-    while now() < deadline
-        try
-            # A single attempt: if it succeeds, we declare "ready".
-            sparql_query(
-                endpoint,
-                auth_header,
-                query;
-                retries = 1,
-                request_timeout_s = request_timeout_s,
-            )
-            return
-        catch
-            sleep(delay_s)
-            delay_s = min(max_delay_s, delay_s * 1.5)
-        end
-    end
-
-    error("AnzoGraph not available before timeout=$(timeout)")
-end
-
-end # module
-```
-
-Typical usage (matching this repo’s environment variables):
-
-```julia
-using .AnzoGraphReady
-
-sparql_host = get(ENV, "SPARQL_HOST", "http://localhost:8080")
-endpoint = "$(sparql_host)/sparql"
-user = get(ENV, "SPARQL_USER", "admin")
-pass = get(ENV, "SPARQL_PASS", "Passw0rd1")
-
-auth = AnzoGraphReady.basic_auth_header(user, pass)
-AnzoGraphReady.wait_for_anzograph(endpoint, auth; timeout=Minute(5))
-
-# Now it is safe to LOAD / query.
-```
-
---
-
-## 7) Optional: waiting for “data is ready” after `LOAD`
-
-Some systems accept `LOAD` but need time before results show up reliably (indexing / transaction visibility).
-If you run into that in your other project, add a second gate after `LOAD`, for example:
-
-1) load, then
-2) poll a query that must be true after load (e.g., “triple count > 0”, or a known IRI exists).
-
-Example “post-load gate”:
-
-```julia
-post_load_query = """
-SELECT (COUNT(*) AS ?n)
-WHERE { ?s ?p ?o }
-"""
-
-res = AnzoGraphReady.sparql_query(endpoint, auth, post_load_query; retries=1)
-# Parse `?n` out of bindings and require it to be > 0; retry until it is.
-```
-
-(This repo does not currently enforce “non-empty”; it only enforces “SPARQL is working”.)
-
---
-
-## 8) Practical checklist when transferring to another project
-
- Make readiness checks hit the **real SPARQL POST** path you will use in production.
- Require a **valid JSON parse**, not just “port open”.
- Add **per-request timeouts**, so a single hung request cannot hang the whole pipeline.
- Prefer **time-based overall timeout** for predictable behavior in CI.
- Keep the query **cheap** (`ASK` or `LIMIT 1/3`).
- If you use Docker Compose healthchecks, consider also using `depends_on: condition: service_healthy`, but still keep the in-app wait as a safety net (it’s closer to the real contract your code needs).
-
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -5,6 +5,17 @@ function sleep(ms: number): Promise<void> {
  return new Promise((r) => setTimeout(r, ms));
 }

+type GraphMeta = {
+  backend?: string;
+  ttl_path?: string | null;
+  sparql_endpoint?: string | null;
+  include_bnodes?: boolean;
+  node_limit?: number;
+  edge_limit?: number;
+  nodes?: number;
+  edges?: number;
+};
+
 export default function App() {
  const canvasRef = useRef<HTMLCanvasElement>(null);
  const rendererRef = useRef<Renderer | null>(null);
@@ -18,12 +29,15 @@ export default function App() {
    ptSize: 0,
  });
  const [error, setError] = useState("");
-  const [hoveredNode, setHoveredNode] = useState<{ x: number; y: number; screenX: number; screenY: number } | null>(null);
+  const [hoveredNode, setHoveredNode] = useState<{ x: number; y: number; screenX: number; screenY: number; label?: string; iri?: string } | null>(null);
  const [selectedNodes, setSelectedNodes] = useState<Set<number>>(new Set());
  const [backendStats, setBackendStats] = useState<{ nodes: number; edges: number; backend?: string } | null>(null);
+  const graphMetaRef = useRef<GraphMeta | null>(null);
+  const neighborsReqIdRef = useRef(0);

  // Store mouse position in a ref so it can be accessed in render loop without re-renders
  const mousePos = useRef({ x: 0, y: 0 });
+  const nodesRef = useRef<any[]>([]);

  useEffect(() => {
    const canvas = canvasRef.current;
@@ -70,6 +84,9 @@ export default function App() {
        const meta = graph.meta || null;
        const count = nodes.length;

+        nodesRef.current = nodes;
+        graphMetaRef.current = meta && typeof meta === "object" ? (meta as GraphMeta) : null;
+
        // Build positions from backend-provided node coordinates.
        setStatus("Preparing buffers…");
        const xs = new Float32Array(count);
@@ -196,9 +213,18 @@ export default function App() {
      frameCount++;

      // Find hovered node using quadtree
-      const node = renderer.findNodeAt(mousePos.current.x, mousePos.current.y);
-      if (node) {
-        setHoveredNode({ ...node, screenX: mousePos.current.x, screenY: mousePos.current.y });
+      const hit = renderer.findNodeIndexAt(mousePos.current.x, mousePos.current.y);
+      if (hit) {
+        const origIdx = renderer.sortedIndexToOriginalIndex(hit.index);
+        const meta = origIdx === null ? null : nodesRef.current[origIdx];
+        setHoveredNode({
+          x: hit.x,
+          y: hit.y,
+          screenX: mousePos.current.x,
+          screenY: mousePos.current.y,
+          label: meta && typeof meta.label === "string" ? meta.label : undefined,
+          iri: meta && typeof meta.iri === "string" ? meta.iri : undefined,
+        });
      } else {
        setHoveredNode(null);
      }
@@ -234,9 +260,72 @@ export default function App() {

  // Sync selection state to renderer
  useEffect(() => {
-    if (rendererRef.current) {
-      rendererRef.current.updateSelection(selectedNodes);
+    const renderer = rendererRef.current;
+    if (!renderer) return;
+
+    // Optimistically reflect selection immediately; neighbors will be filled in by backend.
+    renderer.updateSelection(selectedNodes, new Set());
+
+    // Invalidate any in-flight neighbor request for the previous selection.
+    const reqId = ++neighborsReqIdRef.current;
+
+    // Convert selected sorted indices to backend node IDs (graph-export dense IDs).
+    const selectedIds: number[] = [];
+    for (const sortedIdx of selectedNodes) {
+      const origIdx = renderer.sortedIndexToOriginalIndex(sortedIdx);
+      if (origIdx === null) continue;
+      const nodeId = nodesRef.current?.[origIdx]?.id;
+      if (typeof nodeId === "number") selectedIds.push(nodeId);
    }
+
+    if (selectedIds.length === 0) {
+      return;
+    }
+
+    // Always send the full current selection list; backend returns the merged neighbor set.
+    const ctrl = new AbortController();
+
+    (async () => {
+      try {
+        const meta = graphMetaRef.current;
+        const body = {
+          selected_ids: selectedIds,
+          node_limit: typeof meta?.node_limit === "number" ? meta.node_limit : undefined,
+          edge_limit: typeof meta?.edge_limit === "number" ? meta.edge_limit : undefined,
+        };
+
+        const res = await fetch("/api/neighbors", {
+          method: "POST",
+          headers: { "content-type": "application/json" },
+          body: JSON.stringify(body),
+          signal: ctrl.signal,
+        });
+        if (!res.ok) throw new Error(`POST /api/neighbors failed: ${res.status}`);
+        const data = await res.json();
+        if (ctrl.signal.aborted) return;
+        if (reqId !== neighborsReqIdRef.current) return;
+
+        const neighborIds: unknown = data?.neighbor_ids;
+        const neighborSorted = new Set<number>();
+        if (Array.isArray(neighborIds)) {
+          for (const id of neighborIds) {
+            if (typeof id !== "number") continue;
+            const sorted = renderer.vertexIdToSortedIndexOrNull(id);
+            if (sorted === null) continue;
+            if (!selectedNodes.has(sorted)) neighborSorted.add(sorted);
+          }
+        }
+
+        renderer.updateSelection(selectedNodes, neighborSorted);
+      } catch (e) {
+        if (ctrl.signal.aborted) return;
+        console.warn(e);
+        // Keep the UI usable even if neighbors fail to load.
+        renderer.updateSelection(selectedNodes, new Set());
+      }
+    })();
+
+    return () => ctrl.abort();
  }, [selectedNodes]);

  return (
@@ -350,7 +439,12 @@ export default function App() {
                boxShadow: "0 2px 8px rgba(0,0,0,0.5)",
              }}
            >
-              ({hoveredNode.x.toFixed(2)}, {hoveredNode.y.toFixed(2)})
+              <div style={{ color: "#0ff" }}>
+                {hoveredNode.label || hoveredNode.iri || "(unknown)"}
+              </div>
+              <div style={{ color: "#688" }}>
+                ({hoveredNode.x.toFixed(2)}, {hoveredNode.y.toFixed(2)})
+              </div>
            </div>
          )}
        </>
--- a/frontend/src/renderer.ts
+++ b/frontend/src/renderer.ts
@@ -80,9 +80,11 @@ export class Renderer {
  // Data
  private leaves: Leaf[] = [];
  private sorted: Float32Array = new Float32Array(0);
+  // order[sortedIdx] = originalIdx (original ordering matches input arrays)
+  private sortedToOriginal: Uint32Array = new Uint32Array(0);
+  private vertexIdToSortedIndex: Map<number, number> = new Map();
  private nodeCount = 0;
  private edgeCount = 0;
-  private neighborMap: Map<number, number[]> = new Map();
  private leafEdgeStarts: Uint32Array = new Uint32Array(0);
  private leafEdgeCounts: Uint32Array = new Uint32Array(0);
  private maxPtSize = 256;
@@ -202,6 +204,7 @@ export class Renderer {
    const { sorted, leaves, order } = buildSpatialIndex(xs, ys);
    this.leaves = leaves;
    this.sorted = sorted;
+    this.sortedToOriginal = order;

    // Pre-allocate arrays for render loop (zero-allocation rendering)
    this.visibleLeafIndices = new Uint32Array(leaves.length);
@@ -226,6 +229,13 @@ export class Renderer {
      originalToSorted[order[i]] = i;
    }

+    // Build vertex ID → sorted index mapping (used by backend-driven neighbor highlighting)
+    const vertexIdToSortedIndex = new Map<number, number>();
+    for (let i = 0; i < count; i++) {
+      vertexIdToSortedIndex.set(vertexIds[i], originalToSorted[i]);
+    }
+    this.vertexIdToSortedIndex = vertexIdToSortedIndex;
+
    // Remap edges from vertex IDs to sorted indices
    const lineIndices = new Uint32Array(edgeCount * 2);
    let validEdges = 0;
@@ -241,18 +251,6 @@ export class Renderer {
    }
    this.edgeCount = validEdges;

-    // Build per-node neighbor list from edges for selection queries
-    const neighborMap = new Map<number, number[]>();
-    for (let i = 0; i < validEdges; i++) {
-      const src = lineIndices[i * 2];
-      const dst = lineIndices[i * 2 + 1];
-      if (!neighborMap.has(src)) neighborMap.set(src, []);
-      neighborMap.get(src)!.push(dst);
-      if (!neighborMap.has(dst)) neighborMap.set(dst, []);
-      neighborMap.get(dst)!.push(src);
-    }
-    this.neighborMap = neighborMap;
-
    // Build per-leaf edge index for efficient visible-only edge drawing
    // Find which leaf each sorted index belongs to
    const nodeToLeaf = new Uint32Array(count);
@@ -331,6 +329,28 @@ export class Renderer {
    return this.nodeCount;
  }

+  /**
+   * Map a sorted buffer index (what findNodeIndexAt returns) back to the original
+   * index in the input arrays used to initialize the renderer.
+   */
+  sortedIndexToOriginalIndex(sortedIndex: number): number | null {
+    if (
+      sortedIndex < 0 ||
+      sortedIndex >= this.sortedToOriginal.length
+    ) {
+      return null;
+    }
+    return this.sortedToOriginal[sortedIndex];
+  }
+
+  /**
+   * Convert a backend node ID (node.id from /api/graph) to a sorted index used by the renderer.
+   */
+  vertexIdToSortedIndexOrNull(vertexId: number): number | null {
+    const idx = this.vertexIdToSortedIndex.get(vertexId);
+    return typeof idx === "number" ? idx : null;
+  }
+
  /**
   * Convert screen coordinates (CSS pixels) to world coordinates.
   */
@@ -412,10 +432,10 @@ export class Renderer {

  /**
   * Update the selection buffer with the given set of node indices.
-   * Also computes neighbors of selected nodes.
-   * Call this whenever React's selection state changes.
+   * Neighbor indices are provided by the backend (SPARQL query) and uploaded separately.
+   * Call this whenever selection or backend neighbor results change.
   */
-  updateSelection(selectedIndices: Set<number>): void {
+  updateSelection(selectedIndices: Set<number>, neighborIndices: Set<number> = new Set()): void {
    const gl = this.gl;

    // Upload selected indices
@@ -425,23 +445,11 @@ export class Renderer {
    gl.bufferData(gl.ELEMENT_ARRAY_BUFFER, indices, gl.DYNAMIC_DRAW);
    gl.bindBuffer(gl.ELEMENT_ARRAY_BUFFER, null);

-    // Compute neighbors of selected nodes (excluding already selected)
-    const neighborSet = new Set<number>();
-    for (const nodeIdx of selectedIndices) {
-      const nodeNeighbors = this.neighborMap.get(nodeIdx);
-      if (!nodeNeighbors) continue;
-      for (const n of nodeNeighbors) {
-        if (!selectedIndices.has(n)) {
-          neighborSet.add(n);
-        }
-      }
-    }
-
    // Upload neighbor indices
-    const neighborIndices = new Uint32Array(neighborSet);
-    this.neighborCount = neighborIndices.length;
+    const neighborIndexArray = new Uint32Array(neighborIndices);
+    this.neighborCount = neighborIndexArray.length;
    gl.bindBuffer(gl.ELEMENT_ARRAY_BUFFER, this.neighborIbo);
-    gl.bufferData(gl.ELEMENT_ARRAY_BUFFER, neighborIndices, gl.DYNAMIC_DRAW);
+    gl.bufferData(gl.ELEMENT_ARRAY_BUFFER, neighborIndexArray, gl.DYNAMIC_DRAW);
    gl.bindBuffer(gl.ELEMENT_ARRAY_BUFFER, null);
  }