Import Solver + neighbors via sparql query
This commit is contained in:
30
.env.example
30
.env.example
@@ -1,30 +0,0 @@
|
|||||||
# Choose which engine executes SPARQL:
|
|
||||||
# - rdflib: parse TTL locally and query in-memory
|
|
||||||
# - anzograph: query AnzoGraph over HTTP (optionally LOAD the TTL on startup)
|
|
||||||
GRAPH_BACKEND=rdflib
|
|
||||||
|
|
||||||
# Backend (rdflib) file location inside the container.
|
|
||||||
# The TTL file must exist within the mounted ./data folder if you keep the default volume mount.
|
|
||||||
TTL_PATH=/data/o3po.ttl
|
|
||||||
|
|
||||||
# Backend behavior
|
|
||||||
INCLUDE_BNODES=false
|
|
||||||
# MAX_TRIPLES=1000000
|
|
||||||
|
|
||||||
# AnzoGraph / SPARQL endpoint settings (used when GRAPH_BACKEND=anzograph)
|
|
||||||
SPARQL_HOST=http://anzograph:8080
|
|
||||||
# SPARQL_ENDPOINT=http://anzograph:8080/sparql
|
|
||||||
SPARQL_USER=admin
|
|
||||||
SPARQL_PASS=Passw0rd1
|
|
||||||
|
|
||||||
# File URI as seen by the AnzoGraph container (used by SPARQL `LOAD`)
|
|
||||||
SPARQL_DATA_FILE=file:///opt/shared-files/o3po.ttl
|
|
||||||
# SPARQL_GRAPH_IRI=http://example.org/graph
|
|
||||||
|
|
||||||
# Startup behavior for AnzoGraph mode
|
|
||||||
SPARQL_LOAD_ON_START=false
|
|
||||||
SPARQL_CLEAR_ON_START=false
|
|
||||||
|
|
||||||
# Dev UX
|
|
||||||
CORS_ORIGINS=http://localhost:5173
|
|
||||||
VITE_BACKEND_URL=http://backend:8000
|
|
||||||
@@ -32,6 +32,11 @@ Callers (frontend or other clients) interact with a single API surface (`/api/*`
|
|||||||
- Used by `/api/nodes`, `/api/edges`, and `rdflib`-mode `/api/stats`.
|
- Used by `/api/nodes`, `/api/edges`, and `rdflib`-mode `/api/stats`.
|
||||||
- `pipelines/graph_snapshot.py`
|
- `pipelines/graph_snapshot.py`
|
||||||
- Pipeline used by `/api/graph` to return a `{nodes, edges}` snapshot via SPARQL (works for both RDFLib and AnzoGraph).
|
- Pipeline used by `/api/graph` to return a `{nodes, edges}` snapshot via SPARQL (works for both RDFLib and AnzoGraph).
|
||||||
|
- `pipelines/layout_dag_radial.py`
|
||||||
|
- DAG layout helpers used by `pipelines/graph_snapshot.py`:
|
||||||
|
- cycle detection
|
||||||
|
- level-synchronous Kahn layering
|
||||||
|
- radial (ring-per-layer) positioning.
|
||||||
- `pipelines/snapshot_service.py`
|
- `pipelines/snapshot_service.py`
|
||||||
- Snapshot cache layer used by `/api/graph` and `/api/stats` so the backend doesn't run expensive SPARQL twice.
|
- Snapshot cache layer used by `/api/graph` and `/api/stats` so the backend doesn't run expensive SPARQL twice.
|
||||||
- `pipelines/subclass_labels.py`
|
- `pipelines/subclass_labels.py`
|
||||||
@@ -64,6 +69,14 @@ RDFLib mode:
|
|||||||
- `TTL_PATH`: path inside the backend container to a `.ttl` file (example: `/data/o3po.ttl`)
|
- `TTL_PATH`: path inside the backend container to a `.ttl` file (example: `/data/o3po.ttl`)
|
||||||
- `MAX_TRIPLES`: optional int; if set, stops parsing after this many triples
|
- `MAX_TRIPLES`: optional int; if set, stops parsing after this many triples
|
||||||
|
|
||||||
|
Optional import-combining step (runs before the SPARQL engine starts):
|
||||||
|
|
||||||
|
- `COMBINE_OWL_IMPORTS_ON_START`: `true` to recursively load `TTL_PATH` (or `COMBINE_ENTRY_LOCATION`) plus `owl:imports` and write a combined TTL file.
|
||||||
|
- `COMBINE_ENTRY_LOCATION`: optional override for the entry file/URL to load (defaults to `TTL_PATH`)
|
||||||
|
- `COMBINE_OUTPUT_LOCATION`: optional explicit output path (defaults to `${dirname(entry)}/${COMBINE_OUTPUT_NAME}`)
|
||||||
|
- `COMBINE_OUTPUT_NAME`: output filename when `COMBINE_OUTPUT_LOCATION` is not set (default: `combined_ontology.ttl`)
|
||||||
|
- `COMBINE_FORCE`: `true` to rebuild even if the output file already exists
|
||||||
|
|
||||||
AnzoGraph mode:
|
AnzoGraph mode:
|
||||||
|
|
||||||
- `SPARQL_HOST`: base host (example: `http://anzograph:8080`)
|
- `SPARQL_HOST`: base host (example: `http://anzograph:8080`)
|
||||||
@@ -129,8 +142,8 @@ Returned in `nodes[]` (dense IDs; suitable for indexing in typed arrays):
|
|||||||
- `id`: integer dense node ID used in edges
|
- `id`: integer dense node ID used in edges
|
||||||
- `termType`: `"uri"` or `"bnode"`
|
- `termType`: `"uri"` or `"bnode"`
|
||||||
- `iri`: URI string; blank nodes are normalized to `_:<id>`
|
- `iri`: URI string; blank nodes are normalized to `_:<id>`
|
||||||
- `label`: currently `null` in `/api/graph` snapshots (pipelines can be used to populate later)
|
- `label`: `rdfs:label` when available (best-effort; prefers English)
|
||||||
- `x`/`y`: world-space coordinates for rendering (currently a deterministic spiral layout)
|
- `x`/`y`: world-space coordinates for rendering (currently a radial layered layout derived from `rdfs:subClassOf`)
|
||||||
|
|
||||||
### Edge
|
### Edge
|
||||||
|
|
||||||
@@ -149,11 +162,10 @@ Returned in `edges[]`:
|
|||||||
|
|
||||||
## Snapshot Query (`/api/graph`)
|
## Snapshot Query (`/api/graph`)
|
||||||
|
|
||||||
`/api/graph` uses a SPARQL query that:
|
`/api/graph` currently uses a SPARQL query that returns only `rdfs:subClassOf` edges:
|
||||||
|
|
||||||
- selects triples `?s ?p ?o`
|
- selects bindings as `?s ?p ?o` (with `?p` bound to `rdfs:subClassOf`)
|
||||||
- excludes literal objects (`FILTER(!isLiteral(?o))`)
|
- excludes literal objects (`FILTER(!isLiteral(?o))`) for safety
|
||||||
- excludes `rdfs:label`, `skos:prefLabel`, and `skos:altLabel` predicates
|
|
||||||
- optionally excludes blank nodes (unless `INCLUDE_BNODES=true`)
|
- optionally excludes blank nodes (unless `INCLUDE_BNODES=true`)
|
||||||
- applies `LIMIT edge_limit`
|
- applies `LIMIT edge_limit`
|
||||||
|
|
||||||
@@ -161,6 +173,8 @@ The result bindings are mapped to dense node IDs (first-seen order) and returned
|
|||||||
|
|
||||||
`/api/graph` also returns `meta` with snapshot counts and engine info so the frontend doesn't need to call `/api/stats`.
|
`/api/graph` also returns `meta` with snapshot counts and engine info so the frontend doesn't need to call `/api/stats`.
|
||||||
|
|
||||||
|
If a cycle is detected in the returned `rdfs:subClassOf` snapshot, `/api/graph` returns HTTP 422 (layout requires a DAG).
|
||||||
|
|
||||||
## Pipelines
|
## Pipelines
|
||||||
|
|
||||||
### `pipelines/graph_snapshot.py`
|
### `pipelines/graph_snapshot.py`
|
||||||
|
|||||||
@@ -5,16 +5,25 @@ from typing import Any
|
|||||||
|
|
||||||
def edge_retrieval_query(*, edge_limit: int, include_bnodes: bool) -> str:
|
def edge_retrieval_query(*, edge_limit: int, include_bnodes: bool) -> str:
|
||||||
bnode_filter = "" if include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))"
|
bnode_filter = "" if include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))"
|
||||||
|
|
||||||
return f"""
|
return f"""
|
||||||
|
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
||||||
|
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
||||||
|
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
||||||
|
|
||||||
SELECT ?s ?p ?o
|
SELECT ?s ?p ?o
|
||||||
WHERE {{
|
WHERE {{
|
||||||
?s ?p ?o .
|
{{
|
||||||
|
VALUES ?p {{ rdf:type }}
|
||||||
|
?s ?p ?o .
|
||||||
|
?o rdf:type owl:Class .
|
||||||
|
}}
|
||||||
|
UNION
|
||||||
|
{{
|
||||||
|
VALUES ?p {{ rdfs:subClassOf }}
|
||||||
|
?s ?p ?o .
|
||||||
|
}}
|
||||||
FILTER(!isLiteral(?o))
|
FILTER(!isLiteral(?o))
|
||||||
FILTER(?p NOT IN (
|
|
||||||
<http://www.w3.org/2000/01/rdf-schema#label>,
|
|
||||||
<http://www.w3.org/2004/02/skos/core#prefLabel>,
|
|
||||||
<http://www.w3.org/2004/02/skos/core#altLabel>
|
|
||||||
))
|
|
||||||
{bnode_filter}
|
{bnode_filter}
|
||||||
}}
|
}}
|
||||||
LIMIT {edge_limit}
|
LIMIT {edge_limit}
|
||||||
@@ -91,4 +100,3 @@ def graph_from_sparql_bindings(
|
|||||||
]
|
]
|
||||||
|
|
||||||
return out_nodes, out_edges
|
return out_nodes, out_edges
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,29 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
|
import logging
|
||||||
|
import asyncio
|
||||||
|
|
||||||
from fastapi import FastAPI, HTTPException, Query
|
from fastapi import FastAPI, HTTPException, Query
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
from .models import EdgesResponse, GraphResponse, NodesResponse, SparqlQueryRequest, StatsResponse
|
from .models import (
|
||||||
|
EdgesResponse,
|
||||||
|
GraphResponse,
|
||||||
|
NeighborsRequest,
|
||||||
|
NeighborsResponse,
|
||||||
|
NodesResponse,
|
||||||
|
SparqlQueryRequest,
|
||||||
|
StatsResponse,
|
||||||
|
)
|
||||||
|
from .pipelines.layout_dag_radial import CycleError
|
||||||
|
from .pipelines.owl_imports_combiner import (
|
||||||
|
build_combined_graph,
|
||||||
|
output_location_to_path,
|
||||||
|
resolve_output_location,
|
||||||
|
serialize_graph_to_ttl,
|
||||||
|
)
|
||||||
|
from .pipelines.selection_neighbors import fetch_neighbor_ids_for_selection
|
||||||
from .pipelines.snapshot_service import GraphSnapshotService
|
from .pipelines.snapshot_service import GraphSnapshotService
|
||||||
from .rdf_store import RDFStore
|
from .rdf_store import RDFStore
|
||||||
from .sparql_engine import RdflibEngine, SparqlEngine, create_sparql_engine
|
from .sparql_engine import RdflibEngine, SparqlEngine, create_sparql_engine
|
||||||
@@ -13,11 +31,33 @@ from .settings import Settings
|
|||||||
|
|
||||||
|
|
||||||
settings = Settings()
|
settings = Settings()
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
sparql: SparqlEngine = create_sparql_engine(settings)
|
rdflib_preloaded_graph = None
|
||||||
|
|
||||||
|
if settings.combine_owl_imports_on_start:
|
||||||
|
entry_location = settings.combine_entry_location or settings.ttl_path
|
||||||
|
output_location = resolve_output_location(
|
||||||
|
entry_location,
|
||||||
|
output_location=settings.combine_output_location,
|
||||||
|
output_name=settings.combine_output_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
output_path = output_location_to_path(output_location)
|
||||||
|
if output_path.exists() and not settings.combine_force:
|
||||||
|
logger.info("Skipping combine step (output exists): %s", output_location)
|
||||||
|
else:
|
||||||
|
rdflib_preloaded_graph = await asyncio.to_thread(build_combined_graph, entry_location)
|
||||||
|
logger.info("Finished combining imports; serializing to: %s", output_location)
|
||||||
|
await asyncio.to_thread(serialize_graph_to_ttl, rdflib_preloaded_graph, output_location)
|
||||||
|
|
||||||
|
if settings.graph_backend == "rdflib":
|
||||||
|
settings.ttl_path = str(output_path)
|
||||||
|
|
||||||
|
sparql: SparqlEngine = create_sparql_engine(settings, rdflib_graph=rdflib_preloaded_graph)
|
||||||
await sparql.startup()
|
await sparql.startup()
|
||||||
app.state.sparql = sparql
|
app.state.sparql = sparql
|
||||||
app.state.snapshot_service = GraphSnapshotService(sparql=sparql, settings=settings)
|
app.state.snapshot_service = GraphSnapshotService(sparql=sparql, settings=settings)
|
||||||
@@ -62,7 +102,10 @@ def health() -> dict[str, str]:
|
|||||||
async def stats() -> StatsResponse:
|
async def stats() -> StatsResponse:
|
||||||
# Stats reflect exactly what we send to the frontend (/api/graph), not global graph size.
|
# Stats reflect exactly what we send to the frontend (/api/graph), not global graph size.
|
||||||
svc: GraphSnapshotService = app.state.snapshot_service
|
svc: GraphSnapshotService = app.state.snapshot_service
|
||||||
snap = await svc.get(node_limit=50_000, edge_limit=100_000)
|
try:
|
||||||
|
snap = await svc.get(node_limit=50_000, edge_limit=100_000)
|
||||||
|
except CycleError as e:
|
||||||
|
raise HTTPException(status_code=422, detail=str(e)) from None
|
||||||
meta = snap.meta
|
meta = snap.meta
|
||||||
return StatsResponse(
|
return StatsResponse(
|
||||||
backend=meta.backend if meta else app.state.sparql.name,
|
backend=meta.backend if meta else app.state.sparql.name,
|
||||||
@@ -81,6 +124,20 @@ async def sparql_query(req: SparqlQueryRequest) -> dict:
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/neighbors", response_model=NeighborsResponse)
|
||||||
|
async def neighbors(req: NeighborsRequest) -> NeighborsResponse:
|
||||||
|
svc: GraphSnapshotService = app.state.snapshot_service
|
||||||
|
snap = await svc.get(node_limit=req.node_limit, edge_limit=req.edge_limit)
|
||||||
|
sparql: SparqlEngine = app.state.sparql
|
||||||
|
neighbor_ids = await fetch_neighbor_ids_for_selection(
|
||||||
|
sparql,
|
||||||
|
snapshot=snap,
|
||||||
|
selected_ids=req.selected_ids,
|
||||||
|
include_bnodes=settings.include_bnodes,
|
||||||
|
)
|
||||||
|
return NeighborsResponse(selected_ids=req.selected_ids, neighbor_ids=neighbor_ids)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/nodes", response_model=NodesResponse)
|
@app.get("/api/nodes", response_model=NodesResponse)
|
||||||
def nodes(
|
def nodes(
|
||||||
limit: int = Query(default=10_000, ge=1, le=200_000),
|
limit: int = Query(default=10_000, ge=1, le=200_000),
|
||||||
@@ -109,4 +166,7 @@ async def graph(
|
|||||||
edge_limit: int = Query(default=100_000, ge=1, le=500_000),
|
edge_limit: int = Query(default=100_000, ge=1, le=500_000),
|
||||||
) -> GraphResponse:
|
) -> GraphResponse:
|
||||||
svc: GraphSnapshotService = app.state.snapshot_service
|
svc: GraphSnapshotService = app.state.snapshot_service
|
||||||
return await svc.get(node_limit=node_limit, edge_limit=edge_limit)
|
try:
|
||||||
|
return await svc.get(node_limit=node_limit, edge_limit=edge_limit)
|
||||||
|
except CycleError as e:
|
||||||
|
raise HTTPException(status_code=422, detail=str(e)) from None
|
||||||
|
|||||||
@@ -56,3 +56,14 @@ class GraphResponse(BaseModel):
|
|||||||
|
|
||||||
class SparqlQueryRequest(BaseModel):
|
class SparqlQueryRequest(BaseModel):
|
||||||
query: str
|
query: str
|
||||||
|
|
||||||
|
|
||||||
|
class NeighborsRequest(BaseModel):
|
||||||
|
selected_ids: list[int]
|
||||||
|
node_limit: int = 50_000
|
||||||
|
edge_limit: int = 100_000
|
||||||
|
|
||||||
|
|
||||||
|
class NeighborsResponse(BaseModel):
|
||||||
|
selected_ids: list[int]
|
||||||
|
neighbor_ids: list[int]
|
||||||
|
|||||||
@@ -1,10 +1,64 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from ..graph_export import edge_retrieval_query, graph_from_sparql_bindings
|
from ..graph_export import edge_retrieval_query, graph_from_sparql_bindings
|
||||||
from ..models import GraphResponse
|
from ..models import GraphResponse
|
||||||
from ..sparql_engine import SparqlEngine
|
from ..sparql_engine import SparqlEngine
|
||||||
from ..settings import Settings
|
from ..settings import Settings
|
||||||
from .layout_spiral import spiral_positions
|
from .layout_dag_radial import CycleError, level_synchronous_kahn_layers, radial_positions_from_layers
|
||||||
|
|
||||||
|
|
||||||
|
RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
|
||||||
|
|
||||||
|
|
||||||
|
def _bindings(res: dict[str, Any]) -> list[dict[str, Any]]:
|
||||||
|
return (((res.get("results") or {}).get("bindings")) or [])
|
||||||
|
|
||||||
|
|
||||||
|
def _label_score(label_binding: dict[str, Any]) -> int:
|
||||||
|
# Prefer English, then no-language, then anything else.
|
||||||
|
lang = (label_binding.get("xml:lang") or "").lower()
|
||||||
|
if lang == "en":
|
||||||
|
return 3
|
||||||
|
if lang == "":
|
||||||
|
return 2
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
async def _fetch_rdfs_labels_for_iris(
|
||||||
|
sparql: SparqlEngine,
|
||||||
|
iris: list[str],
|
||||||
|
*,
|
||||||
|
batch_size: int = 500,
|
||||||
|
) -> dict[str, str]:
|
||||||
|
best: dict[str, tuple[int, str]] = {}
|
||||||
|
|
||||||
|
for i in range(0, len(iris), batch_size):
|
||||||
|
batch = iris[i : i + batch_size]
|
||||||
|
values = " ".join(f"<{u}>" for u in batch)
|
||||||
|
q = f"""
|
||||||
|
SELECT ?s ?label
|
||||||
|
WHERE {{
|
||||||
|
VALUES ?s {{ {values} }}
|
||||||
|
?s <{RDFS_LABEL}> ?label .
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
res = await sparql.query_json(q)
|
||||||
|
for b in _bindings(res):
|
||||||
|
s = (b.get("s") or {}).get("value")
|
||||||
|
label_term = b.get("label") or {}
|
||||||
|
if not s or label_term.get("type") != "literal":
|
||||||
|
continue
|
||||||
|
label_value = label_term.get("value")
|
||||||
|
if label_value is None:
|
||||||
|
continue
|
||||||
|
score = _label_score(label_term)
|
||||||
|
prev = best.get(s)
|
||||||
|
if prev is None or score > prev[0]:
|
||||||
|
best[s] = (score, str(label_value))
|
||||||
|
|
||||||
|
return {iri: lbl for iri, (_, lbl) in best.items()}
|
||||||
|
|
||||||
|
|
||||||
async def fetch_graph_snapshot(
|
async def fetch_graph_snapshot(
|
||||||
@@ -28,11 +82,59 @@ async def fetch_graph_snapshot(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Add positions so the frontend doesn't need to run a layout.
|
# Add positions so the frontend doesn't need to run a layout.
|
||||||
xs, ys = spiral_positions(len(nodes))
|
#
|
||||||
|
# We are exporting only rdfs:subClassOf triples. In the exported edges:
|
||||||
|
# source = subclass, target = superclass
|
||||||
|
# For hierarchical layout we invert edges to:
|
||||||
|
# superclass -> subclass
|
||||||
|
hier_edges: list[tuple[int, int]] = []
|
||||||
|
for e in edges:
|
||||||
|
s = e.get("source")
|
||||||
|
t = e.get("target")
|
||||||
|
try:
|
||||||
|
sid = int(s) # subclass
|
||||||
|
tid = int(t) # superclass
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
hier_edges.append((tid, sid))
|
||||||
|
|
||||||
|
try:
|
||||||
|
layers = level_synchronous_kahn_layers(node_count=len(nodes), edges=hier_edges)
|
||||||
|
except CycleError as e:
|
||||||
|
# Add a small URI sample to aid debugging.
|
||||||
|
sample: list[str] = []
|
||||||
|
for nid in e.remaining_node_ids[:20]:
|
||||||
|
try:
|
||||||
|
sample.append(str(nodes[nid].get("iri")))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
raise CycleError(
|
||||||
|
processed=e.processed,
|
||||||
|
total=e.total,
|
||||||
|
remaining_node_ids=e.remaining_node_ids,
|
||||||
|
remaining_iri_sample=sample or None,
|
||||||
|
) from None
|
||||||
|
|
||||||
|
# Deterministic order within each ring/layer for stable layouts.
|
||||||
|
id_to_iri = [str(n.get("iri", "")) for n in nodes]
|
||||||
|
for layer in layers:
|
||||||
|
layer.sort(key=lambda nid: id_to_iri[nid])
|
||||||
|
|
||||||
|
xs, ys = radial_positions_from_layers(node_count=len(nodes), layers=layers)
|
||||||
for i, node in enumerate(nodes):
|
for i, node in enumerate(nodes):
|
||||||
node["x"] = float(xs[i])
|
node["x"] = float(xs[i])
|
||||||
node["y"] = float(ys[i])
|
node["y"] = float(ys[i])
|
||||||
|
|
||||||
|
# Attach labels for URI nodes (blank nodes remain label-less).
|
||||||
|
uri_nodes = [n for n in nodes if n.get("termType") == "uri"]
|
||||||
|
if uri_nodes:
|
||||||
|
iris = [str(n["iri"]) for n in uri_nodes if isinstance(n.get("iri"), str)]
|
||||||
|
label_by_iri = await _fetch_rdfs_labels_for_iris(sparql, iris)
|
||||||
|
for n in uri_nodes:
|
||||||
|
iri = n.get("iri")
|
||||||
|
if isinstance(iri, str) and iri in label_by_iri:
|
||||||
|
n["label"] = label_by_iri[iri]
|
||||||
|
|
||||||
meta = GraphResponse.Meta(
|
meta = GraphResponse.Meta(
|
||||||
backend=sparql.name,
|
backend=sparql.name,
|
||||||
ttl_path=settings.ttl_path if settings.graph_backend == "rdflib" else None,
|
ttl_path=settings.ttl_path if settings.graph_backend == "rdflib" else None,
|
||||||
|
|||||||
141
backend/app/pipelines/layout_dag_radial.py
Normal file
141
backend/app/pipelines/layout_dag_radial.py
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
from collections import deque
|
||||||
|
from typing import Iterable, Sequence
|
||||||
|
|
||||||
|
|
||||||
|
class CycleError(RuntimeError):
|
||||||
|
"""
|
||||||
|
Raised when the requested layout requires a DAG, but a cycle is detected.
|
||||||
|
|
||||||
|
`remaining_node_ids` are the node ids that still had indegree > 0 after Kahn.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
processed: int,
|
||||||
|
total: int,
|
||||||
|
remaining_node_ids: list[int],
|
||||||
|
remaining_iri_sample: list[str] | None = None,
|
||||||
|
) -> None:
|
||||||
|
self.processed = int(processed)
|
||||||
|
self.total = int(total)
|
||||||
|
self.remaining_node_ids = remaining_node_ids
|
||||||
|
self.remaining_iri_sample = remaining_iri_sample
|
||||||
|
|
||||||
|
msg = f"Cycle detected in subClassOf graph (processed {self.processed}/{self.total} nodes)."
|
||||||
|
if remaining_iri_sample:
|
||||||
|
msg += f" Example nodes: {', '.join(remaining_iri_sample)}"
|
||||||
|
super().__init__(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def level_synchronous_kahn_layers(
|
||||||
|
*,
|
||||||
|
node_count: int,
|
||||||
|
edges: Iterable[tuple[int, int]],
|
||||||
|
) -> list[list[int]]:
|
||||||
|
"""
|
||||||
|
Level-synchronous Kahn's algorithm:
|
||||||
|
- process the entire current queue as one batch (one layer)
|
||||||
|
- only then enqueue newly-unlocked nodes for the next batch
|
||||||
|
|
||||||
|
`edges` are directed (u -> v).
|
||||||
|
"""
|
||||||
|
n = int(node_count)
|
||||||
|
if n <= 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
adj: list[list[int]] = [[] for _ in range(n)]
|
||||||
|
indeg = [0] * n
|
||||||
|
|
||||||
|
for u, v in edges:
|
||||||
|
if u == v:
|
||||||
|
# Self-loops don't help layout and would trivially violate DAG-ness.
|
||||||
|
continue
|
||||||
|
if not (0 <= u < n and 0 <= v < n):
|
||||||
|
continue
|
||||||
|
adj[u].append(v)
|
||||||
|
indeg[v] += 1
|
||||||
|
|
||||||
|
q: deque[int] = deque(i for i, d in enumerate(indeg) if d == 0)
|
||||||
|
layers: list[list[int]] = []
|
||||||
|
|
||||||
|
processed = 0
|
||||||
|
while q:
|
||||||
|
# Consume the full current queue as a single layer.
|
||||||
|
layer = list(q)
|
||||||
|
q.clear()
|
||||||
|
layers.append(layer)
|
||||||
|
|
||||||
|
for u in layer:
|
||||||
|
processed += 1
|
||||||
|
for v in adj[u]:
|
||||||
|
indeg[v] -= 1
|
||||||
|
if indeg[v] == 0:
|
||||||
|
q.append(v)
|
||||||
|
|
||||||
|
if processed != n:
|
||||||
|
remaining = [i for i, d in enumerate(indeg) if d > 0]
|
||||||
|
raise CycleError(processed=processed, total=n, remaining_node_ids=remaining)
|
||||||
|
|
||||||
|
return layers
|
||||||
|
|
||||||
|
|
||||||
|
def radial_positions_from_layers(
|
||||||
|
*,
|
||||||
|
node_count: int,
|
||||||
|
layers: Sequence[Sequence[int]],
|
||||||
|
max_r: float = 5000.0,
|
||||||
|
) -> tuple[list[float], list[float]]:
|
||||||
|
"""
|
||||||
|
Assign node positions in concentric rings (one ring per layer).
|
||||||
|
|
||||||
|
- radius increases with layer index
|
||||||
|
- nodes within a layer are placed evenly by angle
|
||||||
|
- each ring gets a "golden-angle" rotation to reduce spoke artifacts
|
||||||
|
"""
|
||||||
|
n = int(node_count)
|
||||||
|
if n <= 0:
|
||||||
|
return ([], [])
|
||||||
|
|
||||||
|
xs = [0.0] * n
|
||||||
|
ys = [0.0] * n
|
||||||
|
if not layers:
|
||||||
|
return (xs, ys)
|
||||||
|
|
||||||
|
two_pi = 2.0 * math.pi
|
||||||
|
golden = math.pi * (3.0 - math.sqrt(5.0))
|
||||||
|
|
||||||
|
layer_count = len(layers)
|
||||||
|
denom = float(layer_count + 1)
|
||||||
|
|
||||||
|
for li, layer in enumerate(layers):
|
||||||
|
m = len(layer)
|
||||||
|
if m <= 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Keep everything within ~[-max_r, max_r] like the previous spiral layout.
|
||||||
|
r = ((li + 1) / denom) * max_r
|
||||||
|
|
||||||
|
# Rotate each layer deterministically to avoid radial spokes aligning.
|
||||||
|
offset = (li * golden) % two_pi
|
||||||
|
|
||||||
|
if m == 1:
|
||||||
|
nid = int(layer[0])
|
||||||
|
if 0 <= nid < n:
|
||||||
|
xs[nid] = r * math.cos(offset)
|
||||||
|
ys[nid] = r * math.sin(offset)
|
||||||
|
continue
|
||||||
|
|
||||||
|
step = two_pi / float(m)
|
||||||
|
for j, raw_id in enumerate(layer):
|
||||||
|
nid = int(raw_id)
|
||||||
|
if not (0 <= nid < n):
|
||||||
|
continue
|
||||||
|
t = offset + step * float(j)
|
||||||
|
xs[nid] = r * math.cos(t)
|
||||||
|
ys[nid] = r * math.sin(t)
|
||||||
|
|
||||||
|
return (xs, ys)
|
||||||
96
backend/app/pipelines/owl_imports_combiner.py
Normal file
96
backend/app/pipelines/owl_imports_combiner.py
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from urllib.parse import unquote, urlparse
|
||||||
|
|
||||||
|
from rdflib import Graph
|
||||||
|
from rdflib.namespace import OWL
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_http_url(location: str) -> bool:
|
||||||
|
scheme = urlparse(location).scheme.lower()
|
||||||
|
return scheme in {"http", "https"}
|
||||||
|
|
||||||
|
|
||||||
|
def _is_file_uri(location: str) -> bool:
|
||||||
|
return urlparse(location).scheme.lower() == "file"
|
||||||
|
|
||||||
|
|
||||||
|
def _file_uri_to_path(location: str) -> Path:
|
||||||
|
u = urlparse(location)
|
||||||
|
if u.scheme.lower() != "file":
|
||||||
|
raise ValueError(f"Not a file:// URI: {location!r}")
|
||||||
|
return Path(unquote(u.path))
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_output_location(
|
||||||
|
entry_location: str,
|
||||||
|
*,
|
||||||
|
output_location: str | None,
|
||||||
|
output_name: str,
|
||||||
|
) -> str:
|
||||||
|
if output_location:
|
||||||
|
return output_location
|
||||||
|
|
||||||
|
if _is_http_url(entry_location):
|
||||||
|
raise ValueError(
|
||||||
|
"COMBINE_ENTRY_LOCATION points to an http(s) URL; set COMBINE_OUTPUT_LOCATION to a writable file path."
|
||||||
|
)
|
||||||
|
|
||||||
|
entry_path = _file_uri_to_path(entry_location) if _is_file_uri(entry_location) else Path(entry_location)
|
||||||
|
return str(entry_path.parent / output_name)
|
||||||
|
|
||||||
|
|
||||||
|
def _output_destination_to_path(output_location: str) -> Path:
|
||||||
|
if _is_file_uri(output_location):
|
||||||
|
return _file_uri_to_path(output_location)
|
||||||
|
if _is_http_url(output_location):
|
||||||
|
raise ValueError("Output location must be a local file path (or file:// URI), not http(s).")
|
||||||
|
return Path(output_location)
|
||||||
|
|
||||||
|
|
||||||
|
def output_location_to_path(output_location: str) -> Path:
|
||||||
|
return _output_destination_to_path(output_location)
|
||||||
|
|
||||||
|
|
||||||
|
def build_combined_graph(entry_location: str) -> Graph:
|
||||||
|
"""
|
||||||
|
Recursively loads an RDF document (file path, file:// URI, or http(s) URL) and its
|
||||||
|
owl:imports into a single in-memory graph.
|
||||||
|
"""
|
||||||
|
combined_graph = Graph()
|
||||||
|
visited_locations: set[str] = set()
|
||||||
|
|
||||||
|
def resolve_imports(location: str) -> None:
|
||||||
|
if location in visited_locations:
|
||||||
|
return
|
||||||
|
visited_locations.add(location)
|
||||||
|
|
||||||
|
logger.info("Loading ontology: %s", location)
|
||||||
|
try:
|
||||||
|
combined_graph.parse(location=location)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Failed to load %s (%s)", location, e)
|
||||||
|
return
|
||||||
|
|
||||||
|
imports = [str(o) for _, _, o in combined_graph.triples((None, OWL.imports, None))]
|
||||||
|
for imported_location in imports:
|
||||||
|
if imported_location not in visited_locations:
|
||||||
|
resolve_imports(imported_location)
|
||||||
|
|
||||||
|
resolve_imports(entry_location)
|
||||||
|
return combined_graph
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_graph_to_ttl(graph: Graph, output_location: str) -> None:
|
||||||
|
output_path = _output_destination_to_path(output_location)
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
tmp_path = output_path.with_suffix(output_path.suffix + ".tmp")
|
||||||
|
graph.serialize(destination=str(tmp_path), format="turtle")
|
||||||
|
os.replace(str(tmp_path), str(output_path))
|
||||||
137
backend/app/pipelines/selection_neighbors.py
Normal file
137
backend/app/pipelines/selection_neighbors.py
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Iterable
|
||||||
|
|
||||||
|
from ..models import GraphResponse, Node
|
||||||
|
from ..sparql_engine import SparqlEngine
|
||||||
|
|
||||||
|
|
||||||
|
def _values_term(node: Node) -> str | None:
|
||||||
|
iri = node.iri
|
||||||
|
if node.termType == "uri":
|
||||||
|
return f"<{iri}>"
|
||||||
|
if node.termType == "bnode":
|
||||||
|
if iri.startswith("_:"):
|
||||||
|
return iri
|
||||||
|
return f"_:{iri}"
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def selection_neighbors_query(*, selected_nodes: Iterable[Node], include_bnodes: bool) -> str:
|
||||||
|
values_terms: list[str] = []
|
||||||
|
for n in selected_nodes:
|
||||||
|
t = _values_term(n)
|
||||||
|
if t is None:
|
||||||
|
continue
|
||||||
|
values_terms.append(t)
|
||||||
|
|
||||||
|
if not values_terms:
|
||||||
|
# Caller should avoid running this query when selection is empty, but keep this safe.
|
||||||
|
return "SELECT ?nbr WHERE { FILTER(false) }"
|
||||||
|
|
||||||
|
bnode_filter = "" if include_bnodes else "FILTER(!isBlank(?nbr))"
|
||||||
|
values = " ".join(values_terms)
|
||||||
|
|
||||||
|
# Neighbors are defined as any node directly connected by rdf:type (to owl:Class)
|
||||||
|
# or rdfs:subClassOf, in either direction (treating edges as undirected).
|
||||||
|
return f"""
|
||||||
|
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
||||||
|
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
||||||
|
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
||||||
|
|
||||||
|
SELECT DISTINCT ?nbr
|
||||||
|
WHERE {{
|
||||||
|
VALUES ?sel {{ {values} }}
|
||||||
|
{{
|
||||||
|
?sel rdf:type ?o .
|
||||||
|
?o rdf:type owl:Class .
|
||||||
|
BIND(?o AS ?nbr)
|
||||||
|
}}
|
||||||
|
UNION
|
||||||
|
{{
|
||||||
|
?s rdf:type ?sel .
|
||||||
|
?sel rdf:type owl:Class .
|
||||||
|
BIND(?s AS ?nbr)
|
||||||
|
}}
|
||||||
|
UNION
|
||||||
|
{{
|
||||||
|
?sel rdfs:subClassOf ?o .
|
||||||
|
BIND(?o AS ?nbr)
|
||||||
|
}}
|
||||||
|
UNION
|
||||||
|
{{
|
||||||
|
?s rdfs:subClassOf ?sel .
|
||||||
|
BIND(?s AS ?nbr)
|
||||||
|
}}
|
||||||
|
FILTER(!isLiteral(?nbr))
|
||||||
|
FILTER(?nbr != ?sel)
|
||||||
|
{bnode_filter}
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def _bindings(res: dict[str, Any]) -> list[dict[str, Any]]:
|
||||||
|
return (((res.get("results") or {}).get("bindings")) or [])
|
||||||
|
|
||||||
|
|
||||||
|
def _term_key(term: dict[str, Any], *, include_bnodes: bool) -> tuple[str, str] | None:
|
||||||
|
t = term.get("type")
|
||||||
|
v = term.get("value")
|
||||||
|
if not t or v is None:
|
||||||
|
return None
|
||||||
|
if t == "literal":
|
||||||
|
return None
|
||||||
|
if t == "bnode":
|
||||||
|
if not include_bnodes:
|
||||||
|
return None
|
||||||
|
return ("bnode", f"_:{v}")
|
||||||
|
return ("uri", str(v))
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_neighbor_ids_for_selection(
|
||||||
|
sparql: SparqlEngine,
|
||||||
|
*,
|
||||||
|
snapshot: GraphResponse,
|
||||||
|
selected_ids: list[int],
|
||||||
|
include_bnodes: bool,
|
||||||
|
) -> list[int]:
|
||||||
|
id_to_node: dict[int, Node] = {n.id: n for n in snapshot.nodes}
|
||||||
|
|
||||||
|
selected_nodes: list[Node] = []
|
||||||
|
selected_id_set: set[int] = set()
|
||||||
|
for nid in selected_ids:
|
||||||
|
if not isinstance(nid, int):
|
||||||
|
continue
|
||||||
|
n = id_to_node.get(nid)
|
||||||
|
if n is None:
|
||||||
|
continue
|
||||||
|
if n.termType == "bnode" and not include_bnodes:
|
||||||
|
continue
|
||||||
|
selected_nodes.append(n)
|
||||||
|
selected_id_set.add(nid)
|
||||||
|
|
||||||
|
if not selected_nodes:
|
||||||
|
return []
|
||||||
|
|
||||||
|
key_to_id: dict[tuple[str, str], int] = {}
|
||||||
|
for n in snapshot.nodes:
|
||||||
|
key_to_id[(n.termType, n.iri)] = n.id
|
||||||
|
|
||||||
|
q = selection_neighbors_query(selected_nodes=selected_nodes, include_bnodes=include_bnodes)
|
||||||
|
res = await sparql.query_json(q)
|
||||||
|
|
||||||
|
neighbor_ids: set[int] = set()
|
||||||
|
for b in _bindings(res):
|
||||||
|
nbr_term = b.get("nbr") or {}
|
||||||
|
key = _term_key(nbr_term, include_bnodes=include_bnodes)
|
||||||
|
if key is None:
|
||||||
|
continue
|
||||||
|
nid = key_to_id.get(key)
|
||||||
|
if nid is None:
|
||||||
|
continue
|
||||||
|
if nid in selected_id_set:
|
||||||
|
continue
|
||||||
|
neighbor_ids.add(nid)
|
||||||
|
|
||||||
|
# Stable ordering for consistent frontend behavior.
|
||||||
|
return sorted(neighbor_ids)
|
||||||
@@ -16,6 +16,13 @@ class Settings(BaseSettings):
|
|||||||
include_bnodes: bool = Field(default=False, alias="INCLUDE_BNODES")
|
include_bnodes: bool = Field(default=False, alias="INCLUDE_BNODES")
|
||||||
max_triples: int | None = Field(default=None, alias="MAX_TRIPLES")
|
max_triples: int | None = Field(default=None, alias="MAX_TRIPLES")
|
||||||
|
|
||||||
|
# Optional: Combine owl:imports into a single TTL file on backend startup.
|
||||||
|
combine_owl_imports_on_start: bool = Field(default=False, alias="COMBINE_OWL_IMPORTS_ON_START")
|
||||||
|
combine_entry_location: str | None = Field(default=None, alias="COMBINE_ENTRY_LOCATION")
|
||||||
|
combine_output_location: str | None = Field(default=None, alias="COMBINE_OUTPUT_LOCATION")
|
||||||
|
combine_output_name: str = Field(default="combined_ontology.ttl", alias="COMBINE_OUTPUT_NAME")
|
||||||
|
combine_force: bool = Field(default=False, alias="COMBINE_FORCE")
|
||||||
|
|
||||||
# AnzoGraph / SPARQL endpoint configuration
|
# AnzoGraph / SPARQL endpoint configuration
|
||||||
sparql_host: str = Field(default="http://anzograph:8080", alias="SPARQL_HOST")
|
sparql_host: str = Field(default="http://anzograph:8080", alias="SPARQL_HOST")
|
||||||
# If not set, the backend uses `${SPARQL_HOST}/sparql`.
|
# If not set, the backend uses `${SPARQL_HOST}/sparql`.
|
||||||
|
|||||||
@@ -24,11 +24,13 @@ class SparqlEngine(Protocol):
|
|||||||
class RdflibEngine:
|
class RdflibEngine:
|
||||||
name = "rdflib"
|
name = "rdflib"
|
||||||
|
|
||||||
def __init__(self, *, ttl_path: str):
|
def __init__(self, *, ttl_path: str, graph: Graph | None = None):
|
||||||
self.ttl_path = ttl_path
|
self.ttl_path = ttl_path
|
||||||
self.graph: Graph | None = None
|
self.graph: Graph | None = graph
|
||||||
|
|
||||||
async def startup(self) -> None:
|
async def startup(self) -> None:
|
||||||
|
if self.graph is not None:
|
||||||
|
return
|
||||||
g = Graph()
|
g = Graph()
|
||||||
g.parse(self.ttl_path, format="turtle")
|
g.parse(self.ttl_path, format="turtle")
|
||||||
self.graph = g
|
self.graph = g
|
||||||
@@ -167,9 +169,9 @@ class AnzoGraphEngine:
|
|||||||
raise RuntimeError(f"AnzoGraph not ready at {self.endpoint}") from last_err
|
raise RuntimeError(f"AnzoGraph not ready at {self.endpoint}") from last_err
|
||||||
|
|
||||||
|
|
||||||
def create_sparql_engine(settings: Settings) -> SparqlEngine:
|
def create_sparql_engine(settings: Settings, *, rdflib_graph: Graph | None = None) -> SparqlEngine:
|
||||||
if settings.graph_backend == "rdflib":
|
if settings.graph_backend == "rdflib":
|
||||||
return RdflibEngine(ttl_path=settings.ttl_path)
|
return RdflibEngine(ttl_path=settings.ttl_path, graph=rdflib_graph)
|
||||||
if settings.graph_backend == "anzograph":
|
if settings.graph_backend == "anzograph":
|
||||||
return AnzoGraphEngine(settings=settings)
|
return AnzoGraphEngine(settings=settings)
|
||||||
raise RuntimeError(f"Unsupported GRAPH_BACKEND={settings.graph_backend!r}")
|
raise RuntimeError(f"Unsupported GRAPH_BACKEND={settings.graph_backend!r}")
|
||||||
|
|||||||
@@ -21,9 +21,14 @@ services:
|
|||||||
- SPARQL_READY_RETRIES=${SPARQL_READY_RETRIES:-30}
|
- SPARQL_READY_RETRIES=${SPARQL_READY_RETRIES:-30}
|
||||||
- SPARQL_READY_DELAY_S=${SPARQL_READY_DELAY_S:-4}
|
- SPARQL_READY_DELAY_S=${SPARQL_READY_DELAY_S:-4}
|
||||||
- SPARQL_READY_TIMEOUT_S=${SPARQL_READY_TIMEOUT_S:-10}
|
- SPARQL_READY_TIMEOUT_S=${SPARQL_READY_TIMEOUT_S:-10}
|
||||||
|
- COMBINE_OWL_IMPORTS_ON_START=${COMBINE_OWL_IMPORTS_ON_START:-false}
|
||||||
|
- COMBINE_ENTRY_LOCATION
|
||||||
|
- COMBINE_OUTPUT_LOCATION
|
||||||
|
- COMBINE_OUTPUT_NAME
|
||||||
|
- COMBINE_FORCE=${COMBINE_FORCE:-false}
|
||||||
volumes:
|
volumes:
|
||||||
- ./backend:/app
|
- ./backend:/app
|
||||||
- ./data:/data:ro
|
- ./data:/data:Z
|
||||||
command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
|
command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/health').read()"]
|
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/health').read()"]
|
||||||
|
|||||||
@@ -1,371 +0,0 @@
|
|||||||
# Waiting for AnzoGraph readiness from Julia (how this repo does it)
|
|
||||||
|
|
||||||
This repo runs a Julia pipeline (`julia/main.jl`) against an AnzoGraph SPARQL endpoint. The key problem is that **“container started” ≠ “SPARQL endpoint is ready to accept queries”**.
|
|
||||||
|
|
||||||
So, before the Julia code does anything that depends on SPARQL (like `LOAD <...>` or large `SELECT`s), it explicitly **waits until AnzoGraph is actually responding to a real SPARQL POST request with valid JSON results**.
|
|
||||||
|
|
||||||
This document explains the exact mechanism used here, why it works, and gives copy/paste-ready patterns you can transfer to another project.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 1) Where the waiting happens (pipeline control flow)
|
|
||||||
|
|
||||||
In `julia/main.jl`, the entrypoint calls:
|
|
||||||
|
|
||||||
```julia
|
|
||||||
# Step 1: Wait for AnzoGraph
|
|
||||||
wait_for_anzograph()
|
|
||||||
|
|
||||||
# Step 2: Load TTL file
|
|
||||||
result = sparql_update("LOAD <$SPARQL_DATA_FILE>")
|
|
||||||
```
|
|
||||||
|
|
||||||
So the “await” is not a Julia `Task`/`async` wait; it is a **blocking retry loop** that only returns when it can successfully execute a small SPARQL query.
|
|
||||||
|
|
||||||
Reference: `julia/main.jl` defines `wait_for_anzograph()` and calls it from `main()`.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 2) Why this is needed even with Docker Compose `depends_on`
|
|
||||||
|
|
||||||
This repo’s `docker-compose.yml` includes an AnzoGraph `healthcheck`:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
anzograph:
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD-SHELL", "curl -f http://localhost:8080/sparql || exit 1"]
|
|
||||||
interval: 10s
|
|
||||||
timeout: 5s
|
|
||||||
retries: 30
|
|
||||||
start_period: 60s
|
|
||||||
```
|
|
||||||
|
|
||||||
However, `julia-layout` currently depends on `anzograph` with:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
depends_on:
|
|
||||||
anzograph:
|
|
||||||
condition: service_started
|
|
||||||
```
|
|
||||||
|
|
||||||
Meaning:
|
|
||||||
- Compose will ensure the **container process has started**.
|
|
||||||
- Compose does **not** guarantee the AnzoGraph HTTP/SPARQL endpoint is ready (unless you use `service_healthy`, and even then a “healthy GET” is not always equivalent to “SPARQL POST works with auth + JSON”).
|
|
||||||
|
|
||||||
So the Julia code includes its own readiness gate to prevent failures like:
|
|
||||||
- TCP connection refused (port not open yet)
|
|
||||||
- HTTP endpoint reachable but not fully initialized
|
|
||||||
- Non-JSON/HTML error responses while the service is still booting
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 3) What “ready” means in this repo
|
|
||||||
|
|
||||||
In this repo, “AnzoGraph is ready” means:
|
|
||||||
|
|
||||||
1. An HTTP `POST` to `${SPARQL_HOST}/sparql` succeeds, with headers:
|
|
||||||
- `Content-Type: application/x-www-form-urlencoded`
|
|
||||||
- `Accept: application/sparql-results+json`
|
|
||||||
- `Authorization: Basic ...`
|
|
||||||
2. The body parses as SPARQL JSON results (`application/sparql-results+json`)
|
|
||||||
|
|
||||||
It does **not** strictly mean:
|
|
||||||
- Your dataset is already loaded
|
|
||||||
- The loaded data is fully indexed (that can matter in some systems after `LOAD`)
|
|
||||||
|
|
||||||
This repo uses readiness as a **“SPARQL endpoint is alive and speaking the protocol”** check.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 4) The actual Julia implementation (as in `julia/main.jl`)
|
|
||||||
|
|
||||||
### 4.1 Configuration (endpoint + auth)
|
|
||||||
|
|
||||||
The Julia script builds endpoint and auth from environment variables:
|
|
||||||
|
|
||||||
```julia
|
|
||||||
const SPARQL_HOST = get(ENV, "SPARQL_HOST", "http://localhost:8080")
|
|
||||||
const SPARQL_ENDPOINT = "$SPARQL_HOST/sparql"
|
|
||||||
const SPARQL_USER = get(ENV, "SPARQL_USER", "admin")
|
|
||||||
const SPARQL_PASS = get(ENV, "SPARQL_PASS", "Passw0rd1")
|
|
||||||
const AUTH_HEADER = "Basic " * base64encode("$SPARQL_USER:$SPARQL_PASS")
|
|
||||||
```
|
|
||||||
|
|
||||||
In Docker Compose for this repo, the Julia container overrides `SPARQL_HOST` to use the service DNS name:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
environment:
|
|
||||||
- SPARQL_HOST=http://anzograph:8080
|
|
||||||
```
|
|
||||||
|
|
||||||
### 4.2 The smoke query used for readiness
|
|
||||||
|
|
||||||
This is the query used in the wait loop:
|
|
||||||
|
|
||||||
```julia
|
|
||||||
const SMOKE_TEST_QUERY = "SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 3"
|
|
||||||
```
|
|
||||||
|
|
||||||
Notes:
|
|
||||||
- It’s intentionally small (`LIMIT 3`) to keep the readiness check cheap.
|
|
||||||
- It returns *some* bindings when data exists, but **even an empty dataset can still return a valid empty result set**. The code treats “valid response” as ready.
|
|
||||||
|
|
||||||
If you want a readiness check that does not depend on any data being present, an `ASK` query is also common:
|
|
||||||
|
|
||||||
```sparql
|
|
||||||
ASK WHERE { ?s ?p ?o }
|
|
||||||
```
|
|
||||||
|
|
||||||
### 4.3 SPARQL query function (request + minimal retry)
|
|
||||||
|
|
||||||
`sparql_query(query; retries=...)` is a generic helper that makes SPARQL POST requests:
|
|
||||||
|
|
||||||
```julia
|
|
||||||
function sparql_query(query::String; retries::Int=5)::SparqlResult
|
|
||||||
for attempt in 1:retries
|
|
||||||
try
|
|
||||||
response = HTTP.post(
|
|
||||||
SPARQL_ENDPOINT,
|
|
||||||
[
|
|
||||||
"Content-Type" => "application/x-www-form-urlencoded",
|
|
||||||
"Accept" => "application/sparql-results+json",
|
|
||||||
"Authorization" => AUTH_HEADER
|
|
||||||
];
|
|
||||||
body = "query=" * HTTP.URIs.escapeuri(query)
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status == 200
|
|
||||||
json = JSON.parse(String(response.body))
|
|
||||||
return SparqlResult(json["results"]["bindings"])
|
|
||||||
elseif response.status >= 500 && attempt < retries
|
|
||||||
sleep(10)
|
|
||||||
continue
|
|
||||||
else
|
|
||||||
error("SPARQL query failed with status $(response.status)")
|
|
||||||
end
|
|
||||||
catch e
|
|
||||||
if attempt < retries
|
|
||||||
sleep(10)
|
|
||||||
continue
|
|
||||||
end
|
|
||||||
rethrow(e)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
error("SPARQL query failed after $retries attempts")
|
|
||||||
end
|
|
||||||
```
|
|
||||||
|
|
||||||
Important behaviors to preserve when transferring:
|
|
||||||
- It uses **POST** (not GET) to the SPARQL endpoint.
|
|
||||||
- It requires a **200** response and successfully parses SPARQL JSON results.
|
|
||||||
- It retries on:
|
|
||||||
- `>= 500` server errors
|
|
||||||
- network / protocol / parsing errors (caught exceptions)
|
|
||||||
|
|
||||||
### 4.4 The readiness gate: `wait_for_anzograph`
|
|
||||||
|
|
||||||
This is the “await until ready” logic:
|
|
||||||
|
|
||||||
```julia
|
|
||||||
function wait_for_anzograph(max_retries::Int=30)::Bool
|
|
||||||
println("Waiting for AnzoGraph at $SPARQL_ENDPOINT...")
|
|
||||||
|
|
||||||
for attempt in 1:max_retries
|
|
||||||
try
|
|
||||||
smoke_result = sparql_query(SMOKE_TEST_QUERY; retries=1)
|
|
||||||
println(" AnzoGraph is ready (attempt $attempt, smoke rows=$(length(smoke_result.bindings)))")
|
|
||||||
return true
|
|
||||||
catch e
|
|
||||||
println(" Attempt $attempt/$max_retries: $(typeof(e))")
|
|
||||||
sleep(4)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
error("AnzoGraph not available after $max_retries attempts")
|
|
||||||
end
|
|
||||||
```
|
|
||||||
|
|
||||||
Why it calls `sparql_query(...; retries=1)`:
|
|
||||||
- It makes each outer “readiness attempt” a **single** request.
|
|
||||||
- The outer loop controls cadence (`sleep(4)`) and total wait time.
|
|
||||||
- This avoids “nested retry loops” (inner sleeps + outer sleeps) that can make waits much longer than intended.
|
|
||||||
|
|
||||||
Time bound in the current implementation:
|
|
||||||
- `max_retries = 30`
|
|
||||||
- `sleep(4)` between attempts
|
|
||||||
- Roughly ~120 seconds of waiting (plus request time).
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 5) What failures cause it to keep waiting
|
|
||||||
|
|
||||||
`wait_for_anzograph()` catches any exception thrown by `sparql_query()` and retries. In practice, that includes:
|
|
||||||
|
|
||||||
- **Connection errors** (DNS not ready, connection refused, etc.)
|
|
||||||
- **Timeouts** (if HTTP request takes too long and the library throws)
|
|
||||||
- **Non-200 HTTP statuses** that cause `error(...)`
|
|
||||||
- **Non-JSON / unexpected JSON** responses causing `JSON.parse(...)` to throw
|
|
||||||
|
|
||||||
That last point is a big reason a “real SPARQL request + parse” is stronger than just “ping the port”.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 6) Transferable, self-contained version (recommended pattern)
|
|
||||||
|
|
||||||
If you want to reuse this in another project, it’s usually easier to:
|
|
||||||
- avoid globals,
|
|
||||||
- make endpoint/auth explicit,
|
|
||||||
- use a **time-based timeout** instead of `max_retries` (more robust),
|
|
||||||
- add request timeouts so the wait loop can’t hang forever on a single request.
|
|
||||||
|
|
||||||
Below is a drop-in module you can copy into your project.
|
|
||||||
|
|
||||||
```julia
|
|
||||||
module AnzoGraphReady
|
|
||||||
|
|
||||||
using HTTP
|
|
||||||
using JSON
|
|
||||||
using Base64
|
|
||||||
using Dates
|
|
||||||
|
|
||||||
struct SparqlResult
|
|
||||||
bindings::Vector{Dict{String, Any}}
|
|
||||||
end
|
|
||||||
|
|
||||||
function basic_auth_header(user::AbstractString, pass::AbstractString)::String
|
|
||||||
return "Basic " * base64encode("$(user):$(pass)")
|
|
||||||
end
|
|
||||||
|
|
||||||
function sparql_query(
|
|
||||||
endpoint::AbstractString,
|
|
||||||
auth_header::AbstractString,
|
|
||||||
query::AbstractString;
|
|
||||||
retries::Int = 1,
|
|
||||||
retry_sleep_s::Real = 2,
|
|
||||||
request_timeout_s::Real = 15,
|
|
||||||
)::SparqlResult
|
|
||||||
for attempt in 1:retries
|
|
||||||
try
|
|
||||||
response = HTTP.post(
|
|
||||||
String(endpoint),
|
|
||||||
[
|
|
||||||
"Content-Type" => "application/x-www-form-urlencoded",
|
|
||||||
"Accept" => "application/sparql-results+json",
|
|
||||||
"Authorization" => auth_header,
|
|
||||||
];
|
|
||||||
body = "query=" * HTTP.URIs.escapeuri(String(query)),
|
|
||||||
readtimeout = request_timeout_s,
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status != 200
|
|
||||||
error("SPARQL query failed with status $(response.status)")
|
|
||||||
end
|
|
||||||
|
|
||||||
parsed = JSON.parse(String(response.body))
|
|
||||||
bindings = get(get(parsed, "results", Dict()), "bindings", Any[])
|
|
||||||
return SparqlResult(Vector{Dict{String, Any}}(bindings))
|
|
||||||
catch e
|
|
||||||
if attempt < retries
|
|
||||||
sleep(retry_sleep_s)
|
|
||||||
continue
|
|
||||||
end
|
|
||||||
rethrow(e)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
error("sparql_query: unreachable")
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
Wait until AnzoGraph responds to a real SPARQL POST with parseable JSON.
|
|
||||||
|
|
||||||
This is the direct analog of this repo's `wait_for_anzograph()`, but with:
|
|
||||||
- a time-based timeout (`timeout`)
|
|
||||||
- a request timeout per attempt (`request_timeout_s`)
|
|
||||||
- simple exponential backoff
|
|
||||||
"""
|
|
||||||
function wait_for_anzograph(
|
|
||||||
endpoint::AbstractString,
|
|
||||||
auth_header::AbstractString;
|
|
||||||
timeout::Period = Minute(3),
|
|
||||||
initial_delay_s::Real = 0.5,
|
|
||||||
max_delay_s::Real = 5.0,
|
|
||||||
request_timeout_s::Real = 10.0,
|
|
||||||
query::AbstractString = "ASK WHERE { ?s ?p ?o }",
|
|
||||||
)::Nothing
|
|
||||||
deadline = now() + timeout
|
|
||||||
delay_s = initial_delay_s
|
|
||||||
|
|
||||||
while now() < deadline
|
|
||||||
try
|
|
||||||
# A single attempt: if it succeeds, we declare "ready".
|
|
||||||
sparql_query(
|
|
||||||
endpoint,
|
|
||||||
auth_header,
|
|
||||||
query;
|
|
||||||
retries = 1,
|
|
||||||
request_timeout_s = request_timeout_s,
|
|
||||||
)
|
|
||||||
return
|
|
||||||
catch
|
|
||||||
sleep(delay_s)
|
|
||||||
delay_s = min(max_delay_s, delay_s * 1.5)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
error("AnzoGraph not available before timeout=$(timeout)")
|
|
||||||
end
|
|
||||||
|
|
||||||
end # module
|
|
||||||
```
|
|
||||||
|
|
||||||
Typical usage (matching this repo’s environment variables):
|
|
||||||
|
|
||||||
```julia
|
|
||||||
using .AnzoGraphReady
|
|
||||||
|
|
||||||
sparql_host = get(ENV, "SPARQL_HOST", "http://localhost:8080")
|
|
||||||
endpoint = "$(sparql_host)/sparql"
|
|
||||||
user = get(ENV, "SPARQL_USER", "admin")
|
|
||||||
pass = get(ENV, "SPARQL_PASS", "Passw0rd1")
|
|
||||||
|
|
||||||
auth = AnzoGraphReady.basic_auth_header(user, pass)
|
|
||||||
AnzoGraphReady.wait_for_anzograph(endpoint, auth; timeout=Minute(5))
|
|
||||||
|
|
||||||
# Now it is safe to LOAD / query.
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 7) Optional: waiting for “data is ready” after `LOAD`
|
|
||||||
|
|
||||||
Some systems accept `LOAD` but need time before results show up reliably (indexing / transaction visibility).
|
|
||||||
If you run into that in your other project, add a second gate after `LOAD`, for example:
|
|
||||||
|
|
||||||
1) load, then
|
|
||||||
2) poll a query that must be true after load (e.g., “triple count > 0”, or a known IRI exists).
|
|
||||||
|
|
||||||
Example “post-load gate”:
|
|
||||||
|
|
||||||
```julia
|
|
||||||
post_load_query = """
|
|
||||||
SELECT (COUNT(*) AS ?n)
|
|
||||||
WHERE { ?s ?p ?o }
|
|
||||||
"""
|
|
||||||
|
|
||||||
res = AnzoGraphReady.sparql_query(endpoint, auth, post_load_query; retries=1)
|
|
||||||
# Parse `?n` out of bindings and require it to be > 0; retry until it is.
|
|
||||||
```
|
|
||||||
|
|
||||||
(This repo does not currently enforce “non-empty”; it only enforces “SPARQL is working”.)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 8) Practical checklist when transferring to another project
|
|
||||||
|
|
||||||
- Make readiness checks hit the **real SPARQL POST** path you will use in production.
|
|
||||||
- Require a **valid JSON parse**, not just “port open”.
|
|
||||||
- Add **per-request timeouts**, so a single hung request cannot hang the whole pipeline.
|
|
||||||
- Prefer **time-based overall timeout** for predictable behavior in CI.
|
|
||||||
- Keep the query **cheap** (`ASK` or `LIMIT 1/3`).
|
|
||||||
- If you use Docker Compose healthchecks, consider also using `depends_on: condition: service_healthy`, but still keep the in-app wait as a safety net (it’s closer to the real contract your code needs).
|
|
||||||
|
|
||||||
@@ -5,6 +5,17 @@ function sleep(ms: number): Promise<void> {
|
|||||||
return new Promise((r) => setTimeout(r, ms));
|
return new Promise((r) => setTimeout(r, ms));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type GraphMeta = {
|
||||||
|
backend?: string;
|
||||||
|
ttl_path?: string | null;
|
||||||
|
sparql_endpoint?: string | null;
|
||||||
|
include_bnodes?: boolean;
|
||||||
|
node_limit?: number;
|
||||||
|
edge_limit?: number;
|
||||||
|
nodes?: number;
|
||||||
|
edges?: number;
|
||||||
|
};
|
||||||
|
|
||||||
export default function App() {
|
export default function App() {
|
||||||
const canvasRef = useRef<HTMLCanvasElement>(null);
|
const canvasRef = useRef<HTMLCanvasElement>(null);
|
||||||
const rendererRef = useRef<Renderer | null>(null);
|
const rendererRef = useRef<Renderer | null>(null);
|
||||||
@@ -18,12 +29,15 @@ export default function App() {
|
|||||||
ptSize: 0,
|
ptSize: 0,
|
||||||
});
|
});
|
||||||
const [error, setError] = useState("");
|
const [error, setError] = useState("");
|
||||||
const [hoveredNode, setHoveredNode] = useState<{ x: number; y: number; screenX: number; screenY: number } | null>(null);
|
const [hoveredNode, setHoveredNode] = useState<{ x: number; y: number; screenX: number; screenY: number; label?: string; iri?: string } | null>(null);
|
||||||
const [selectedNodes, setSelectedNodes] = useState<Set<number>>(new Set());
|
const [selectedNodes, setSelectedNodes] = useState<Set<number>>(new Set());
|
||||||
const [backendStats, setBackendStats] = useState<{ nodes: number; edges: number; backend?: string } | null>(null);
|
const [backendStats, setBackendStats] = useState<{ nodes: number; edges: number; backend?: string } | null>(null);
|
||||||
|
const graphMetaRef = useRef<GraphMeta | null>(null);
|
||||||
|
const neighborsReqIdRef = useRef(0);
|
||||||
|
|
||||||
// Store mouse position in a ref so it can be accessed in render loop without re-renders
|
// Store mouse position in a ref so it can be accessed in render loop without re-renders
|
||||||
const mousePos = useRef({ x: 0, y: 0 });
|
const mousePos = useRef({ x: 0, y: 0 });
|
||||||
|
const nodesRef = useRef<any[]>([]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const canvas = canvasRef.current;
|
const canvas = canvasRef.current;
|
||||||
@@ -70,6 +84,9 @@ export default function App() {
|
|||||||
const meta = graph.meta || null;
|
const meta = graph.meta || null;
|
||||||
const count = nodes.length;
|
const count = nodes.length;
|
||||||
|
|
||||||
|
nodesRef.current = nodes;
|
||||||
|
graphMetaRef.current = meta && typeof meta === "object" ? (meta as GraphMeta) : null;
|
||||||
|
|
||||||
// Build positions from backend-provided node coordinates.
|
// Build positions from backend-provided node coordinates.
|
||||||
setStatus("Preparing buffers…");
|
setStatus("Preparing buffers…");
|
||||||
const xs = new Float32Array(count);
|
const xs = new Float32Array(count);
|
||||||
@@ -196,9 +213,18 @@ export default function App() {
|
|||||||
frameCount++;
|
frameCount++;
|
||||||
|
|
||||||
// Find hovered node using quadtree
|
// Find hovered node using quadtree
|
||||||
const node = renderer.findNodeAt(mousePos.current.x, mousePos.current.y);
|
const hit = renderer.findNodeIndexAt(mousePos.current.x, mousePos.current.y);
|
||||||
if (node) {
|
if (hit) {
|
||||||
setHoveredNode({ ...node, screenX: mousePos.current.x, screenY: mousePos.current.y });
|
const origIdx = renderer.sortedIndexToOriginalIndex(hit.index);
|
||||||
|
const meta = origIdx === null ? null : nodesRef.current[origIdx];
|
||||||
|
setHoveredNode({
|
||||||
|
x: hit.x,
|
||||||
|
y: hit.y,
|
||||||
|
screenX: mousePos.current.x,
|
||||||
|
screenY: mousePos.current.y,
|
||||||
|
label: meta && typeof meta.label === "string" ? meta.label : undefined,
|
||||||
|
iri: meta && typeof meta.iri === "string" ? meta.iri : undefined,
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
setHoveredNode(null);
|
setHoveredNode(null);
|
||||||
}
|
}
|
||||||
@@ -234,9 +260,72 @@ export default function App() {
|
|||||||
|
|
||||||
// Sync selection state to renderer
|
// Sync selection state to renderer
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (rendererRef.current) {
|
const renderer = rendererRef.current;
|
||||||
rendererRef.current.updateSelection(selectedNodes);
|
if (!renderer) return;
|
||||||
|
|
||||||
|
// Optimistically reflect selection immediately; neighbors will be filled in by backend.
|
||||||
|
renderer.updateSelection(selectedNodes, new Set());
|
||||||
|
|
||||||
|
// Invalidate any in-flight neighbor request for the previous selection.
|
||||||
|
const reqId = ++neighborsReqIdRef.current;
|
||||||
|
|
||||||
|
// Convert selected sorted indices to backend node IDs (graph-export dense IDs).
|
||||||
|
const selectedIds: number[] = [];
|
||||||
|
for (const sortedIdx of selectedNodes) {
|
||||||
|
const origIdx = renderer.sortedIndexToOriginalIndex(sortedIdx);
|
||||||
|
if (origIdx === null) continue;
|
||||||
|
const nodeId = nodesRef.current?.[origIdx]?.id;
|
||||||
|
if (typeof nodeId === "number") selectedIds.push(nodeId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (selectedIds.length === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always send the full current selection list; backend returns the merged neighbor set.
|
||||||
|
const ctrl = new AbortController();
|
||||||
|
|
||||||
|
(async () => {
|
||||||
|
try {
|
||||||
|
const meta = graphMetaRef.current;
|
||||||
|
const body = {
|
||||||
|
selected_ids: selectedIds,
|
||||||
|
node_limit: typeof meta?.node_limit === "number" ? meta.node_limit : undefined,
|
||||||
|
edge_limit: typeof meta?.edge_limit === "number" ? meta.edge_limit : undefined,
|
||||||
|
};
|
||||||
|
|
||||||
|
const res = await fetch("/api/neighbors", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "content-type": "application/json" },
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
signal: ctrl.signal,
|
||||||
|
});
|
||||||
|
if (!res.ok) throw new Error(`POST /api/neighbors failed: ${res.status}`);
|
||||||
|
const data = await res.json();
|
||||||
|
if (ctrl.signal.aborted) return;
|
||||||
|
if (reqId !== neighborsReqIdRef.current) return;
|
||||||
|
|
||||||
|
const neighborIds: unknown = data?.neighbor_ids;
|
||||||
|
const neighborSorted = new Set<number>();
|
||||||
|
if (Array.isArray(neighborIds)) {
|
||||||
|
for (const id of neighborIds) {
|
||||||
|
if (typeof id !== "number") continue;
|
||||||
|
const sorted = renderer.vertexIdToSortedIndexOrNull(id);
|
||||||
|
if (sorted === null) continue;
|
||||||
|
if (!selectedNodes.has(sorted)) neighborSorted.add(sorted);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
renderer.updateSelection(selectedNodes, neighborSorted);
|
||||||
|
} catch (e) {
|
||||||
|
if (ctrl.signal.aborted) return;
|
||||||
|
console.warn(e);
|
||||||
|
// Keep the UI usable even if neighbors fail to load.
|
||||||
|
renderer.updateSelection(selectedNodes, new Set());
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
|
||||||
|
return () => ctrl.abort();
|
||||||
}, [selectedNodes]);
|
}, [selectedNodes]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@@ -350,7 +439,12 @@ export default function App() {
|
|||||||
boxShadow: "0 2px 8px rgba(0,0,0,0.5)",
|
boxShadow: "0 2px 8px rgba(0,0,0,0.5)",
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
({hoveredNode.x.toFixed(2)}, {hoveredNode.y.toFixed(2)})
|
<div style={{ color: "#0ff" }}>
|
||||||
|
{hoveredNode.label || hoveredNode.iri || "(unknown)"}
|
||||||
|
</div>
|
||||||
|
<div style={{ color: "#688" }}>
|
||||||
|
({hoveredNode.x.toFixed(2)}, {hoveredNode.y.toFixed(2)})
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</>
|
</>
|
||||||
|
|||||||
@@ -80,9 +80,11 @@ export class Renderer {
|
|||||||
// Data
|
// Data
|
||||||
private leaves: Leaf[] = [];
|
private leaves: Leaf[] = [];
|
||||||
private sorted: Float32Array = new Float32Array(0);
|
private sorted: Float32Array = new Float32Array(0);
|
||||||
|
// order[sortedIdx] = originalIdx (original ordering matches input arrays)
|
||||||
|
private sortedToOriginal: Uint32Array = new Uint32Array(0);
|
||||||
|
private vertexIdToSortedIndex: Map<number, number> = new Map();
|
||||||
private nodeCount = 0;
|
private nodeCount = 0;
|
||||||
private edgeCount = 0;
|
private edgeCount = 0;
|
||||||
private neighborMap: Map<number, number[]> = new Map();
|
|
||||||
private leafEdgeStarts: Uint32Array = new Uint32Array(0);
|
private leafEdgeStarts: Uint32Array = new Uint32Array(0);
|
||||||
private leafEdgeCounts: Uint32Array = new Uint32Array(0);
|
private leafEdgeCounts: Uint32Array = new Uint32Array(0);
|
||||||
private maxPtSize = 256;
|
private maxPtSize = 256;
|
||||||
@@ -202,6 +204,7 @@ export class Renderer {
|
|||||||
const { sorted, leaves, order } = buildSpatialIndex(xs, ys);
|
const { sorted, leaves, order } = buildSpatialIndex(xs, ys);
|
||||||
this.leaves = leaves;
|
this.leaves = leaves;
|
||||||
this.sorted = sorted;
|
this.sorted = sorted;
|
||||||
|
this.sortedToOriginal = order;
|
||||||
|
|
||||||
// Pre-allocate arrays for render loop (zero-allocation rendering)
|
// Pre-allocate arrays for render loop (zero-allocation rendering)
|
||||||
this.visibleLeafIndices = new Uint32Array(leaves.length);
|
this.visibleLeafIndices = new Uint32Array(leaves.length);
|
||||||
@@ -226,6 +229,13 @@ export class Renderer {
|
|||||||
originalToSorted[order[i]] = i;
|
originalToSorted[order[i]] = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Build vertex ID → sorted index mapping (used by backend-driven neighbor highlighting)
|
||||||
|
const vertexIdToSortedIndex = new Map<number, number>();
|
||||||
|
for (let i = 0; i < count; i++) {
|
||||||
|
vertexIdToSortedIndex.set(vertexIds[i], originalToSorted[i]);
|
||||||
|
}
|
||||||
|
this.vertexIdToSortedIndex = vertexIdToSortedIndex;
|
||||||
|
|
||||||
// Remap edges from vertex IDs to sorted indices
|
// Remap edges from vertex IDs to sorted indices
|
||||||
const lineIndices = new Uint32Array(edgeCount * 2);
|
const lineIndices = new Uint32Array(edgeCount * 2);
|
||||||
let validEdges = 0;
|
let validEdges = 0;
|
||||||
@@ -241,18 +251,6 @@ export class Renderer {
|
|||||||
}
|
}
|
||||||
this.edgeCount = validEdges;
|
this.edgeCount = validEdges;
|
||||||
|
|
||||||
// Build per-node neighbor list from edges for selection queries
|
|
||||||
const neighborMap = new Map<number, number[]>();
|
|
||||||
for (let i = 0; i < validEdges; i++) {
|
|
||||||
const src = lineIndices[i * 2];
|
|
||||||
const dst = lineIndices[i * 2 + 1];
|
|
||||||
if (!neighborMap.has(src)) neighborMap.set(src, []);
|
|
||||||
neighborMap.get(src)!.push(dst);
|
|
||||||
if (!neighborMap.has(dst)) neighborMap.set(dst, []);
|
|
||||||
neighborMap.get(dst)!.push(src);
|
|
||||||
}
|
|
||||||
this.neighborMap = neighborMap;
|
|
||||||
|
|
||||||
// Build per-leaf edge index for efficient visible-only edge drawing
|
// Build per-leaf edge index for efficient visible-only edge drawing
|
||||||
// Find which leaf each sorted index belongs to
|
// Find which leaf each sorted index belongs to
|
||||||
const nodeToLeaf = new Uint32Array(count);
|
const nodeToLeaf = new Uint32Array(count);
|
||||||
@@ -331,6 +329,28 @@ export class Renderer {
|
|||||||
return this.nodeCount;
|
return this.nodeCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Map a sorted buffer index (what findNodeIndexAt returns) back to the original
|
||||||
|
* index in the input arrays used to initialize the renderer.
|
||||||
|
*/
|
||||||
|
sortedIndexToOriginalIndex(sortedIndex: number): number | null {
|
||||||
|
if (
|
||||||
|
sortedIndex < 0 ||
|
||||||
|
sortedIndex >= this.sortedToOriginal.length
|
||||||
|
) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return this.sortedToOriginal[sortedIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a backend node ID (node.id from /api/graph) to a sorted index used by the renderer.
|
||||||
|
*/
|
||||||
|
vertexIdToSortedIndexOrNull(vertexId: number): number | null {
|
||||||
|
const idx = this.vertexIdToSortedIndex.get(vertexId);
|
||||||
|
return typeof idx === "number" ? idx : null;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert screen coordinates (CSS pixels) to world coordinates.
|
* Convert screen coordinates (CSS pixels) to world coordinates.
|
||||||
*/
|
*/
|
||||||
@@ -412,10 +432,10 @@ export class Renderer {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Update the selection buffer with the given set of node indices.
|
* Update the selection buffer with the given set of node indices.
|
||||||
* Also computes neighbors of selected nodes.
|
* Neighbor indices are provided by the backend (SPARQL query) and uploaded separately.
|
||||||
* Call this whenever React's selection state changes.
|
* Call this whenever selection or backend neighbor results change.
|
||||||
*/
|
*/
|
||||||
updateSelection(selectedIndices: Set<number>): void {
|
updateSelection(selectedIndices: Set<number>, neighborIndices: Set<number> = new Set()): void {
|
||||||
const gl = this.gl;
|
const gl = this.gl;
|
||||||
|
|
||||||
// Upload selected indices
|
// Upload selected indices
|
||||||
@@ -425,23 +445,11 @@ export class Renderer {
|
|||||||
gl.bufferData(gl.ELEMENT_ARRAY_BUFFER, indices, gl.DYNAMIC_DRAW);
|
gl.bufferData(gl.ELEMENT_ARRAY_BUFFER, indices, gl.DYNAMIC_DRAW);
|
||||||
gl.bindBuffer(gl.ELEMENT_ARRAY_BUFFER, null);
|
gl.bindBuffer(gl.ELEMENT_ARRAY_BUFFER, null);
|
||||||
|
|
||||||
// Compute neighbors of selected nodes (excluding already selected)
|
|
||||||
const neighborSet = new Set<number>();
|
|
||||||
for (const nodeIdx of selectedIndices) {
|
|
||||||
const nodeNeighbors = this.neighborMap.get(nodeIdx);
|
|
||||||
if (!nodeNeighbors) continue;
|
|
||||||
for (const n of nodeNeighbors) {
|
|
||||||
if (!selectedIndices.has(n)) {
|
|
||||||
neighborSet.add(n);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Upload neighbor indices
|
// Upload neighbor indices
|
||||||
const neighborIndices = new Uint32Array(neighborSet);
|
const neighborIndexArray = new Uint32Array(neighborIndices);
|
||||||
this.neighborCount = neighborIndices.length;
|
this.neighborCount = neighborIndexArray.length;
|
||||||
gl.bindBuffer(gl.ELEMENT_ARRAY_BUFFER, this.neighborIbo);
|
gl.bindBuffer(gl.ELEMENT_ARRAY_BUFFER, this.neighborIbo);
|
||||||
gl.bufferData(gl.ELEMENT_ARRAY_BUFFER, neighborIndices, gl.DYNAMIC_DRAW);
|
gl.bufferData(gl.ELEMENT_ARRAY_BUFFER, neighborIndexArray, gl.DYNAMIC_DRAW);
|
||||||
gl.bindBuffer(gl.ELEMENT_ARRAY_BUFFER, null);
|
gl.bindBuffer(gl.ELEMENT_ARRAY_BUFFER, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user