backend
This commit is contained in:
30
.env.example
Normal file
30
.env.example
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# Choose which engine executes SPARQL:
|
||||||
|
# - rdflib: parse TTL locally and query in-memory
|
||||||
|
# - anzograph: query AnzoGraph over HTTP (optionally LOAD the TTL on startup)
|
||||||
|
GRAPH_BACKEND=rdflib
|
||||||
|
|
||||||
|
# Backend (rdflib) file location inside the container.
|
||||||
|
# The TTL file must exist within the mounted ./data folder if you keep the default volume mount.
|
||||||
|
TTL_PATH=/data/o3po.ttl
|
||||||
|
|
||||||
|
# Backend behavior
|
||||||
|
INCLUDE_BNODES=false
|
||||||
|
# MAX_TRIPLES=1000000
|
||||||
|
|
||||||
|
# AnzoGraph / SPARQL endpoint settings (used when GRAPH_BACKEND=anzograph)
|
||||||
|
SPARQL_HOST=http://anzograph:8080
|
||||||
|
# SPARQL_ENDPOINT=http://anzograph:8080/sparql
|
||||||
|
SPARQL_USER=admin
|
||||||
|
SPARQL_PASS=Passw0rd1
|
||||||
|
|
||||||
|
# File URI as seen by the AnzoGraph container (used by SPARQL `LOAD`)
|
||||||
|
SPARQL_DATA_FILE=file:///opt/shared-files/o3po.ttl
|
||||||
|
# SPARQL_GRAPH_IRI=http://example.org/graph
|
||||||
|
|
||||||
|
# Startup behavior for AnzoGraph mode
|
||||||
|
SPARQL_LOAD_ON_START=false
|
||||||
|
SPARQL_CLEAR_ON_START=false
|
||||||
|
|
||||||
|
# Dev UX
|
||||||
|
CORS_ORIGINS=http://localhost:5173
|
||||||
|
VITE_BACKEND_URL=http://backend:8000
|
||||||
7
.gitignore
vendored
7
.gitignore
vendored
@@ -1,2 +1,9 @@
|
|||||||
.direnv/
|
.direnv/
|
||||||
.envrc
|
.envrc
|
||||||
|
.env
|
||||||
|
backend/.env
|
||||||
|
frontend/node_modules/
|
||||||
|
frontend/dist/
|
||||||
|
.npm/
|
||||||
|
.vite/
|
||||||
|
data/
|
||||||
|
|||||||
16
backend/Dockerfile
Normal file
16
backend/Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
COPY requirements.txt /app/requirements.txt
|
||||||
|
RUN pip install --no-cache-dir -r /app/requirements.txt
|
||||||
|
|
||||||
|
COPY app /app/app
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
|
|
||||||
1
backend/app/__init__.py
Normal file
1
backend/app/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
247
backend/app/main.py
Normal file
247
backend/app/main.py
Normal file
@@ -0,0 +1,247 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
|
from fastapi import FastAPI, HTTPException, Query
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
|
from .models import EdgesResponse, GraphResponse, NodesResponse, SparqlQueryRequest, StatsResponse
|
||||||
|
from .rdf_store import RDFStore
|
||||||
|
from .sparql_engine import AnzoGraphEngine, RdflibEngine, SparqlEngine, create_sparql_engine
|
||||||
|
from .settings import Settings
|
||||||
|
|
||||||
|
|
||||||
|
settings = Settings()
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
sparql: SparqlEngine = create_sparql_engine(settings)
|
||||||
|
await sparql.startup()
|
||||||
|
app.state.sparql = sparql
|
||||||
|
|
||||||
|
# Only build node/edge tables when running in rdflib mode.
|
||||||
|
if settings.graph_backend == "rdflib":
|
||||||
|
assert isinstance(sparql, RdflibEngine)
|
||||||
|
if sparql.graph is None:
|
||||||
|
raise RuntimeError("rdflib graph failed to load")
|
||||||
|
|
||||||
|
store = RDFStore(
|
||||||
|
ttl_path=settings.ttl_path,
|
||||||
|
include_bnodes=settings.include_bnodes,
|
||||||
|
max_triples=settings.max_triples,
|
||||||
|
)
|
||||||
|
store.load(sparql.graph)
|
||||||
|
app.state.store = store
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
|
await sparql.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(title="visualizador_instanciados backend", lifespan=lifespan)
|
||||||
|
|
||||||
|
cors_origins = settings.cors_origin_list()
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=cors_origins,
|
||||||
|
allow_credentials=False,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/health")
|
||||||
|
def health() -> dict[str, str]:
|
||||||
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/stats", response_model=StatsResponse)
|
||||||
|
async def stats() -> StatsResponse:
|
||||||
|
sparql: SparqlEngine = app.state.sparql
|
||||||
|
|
||||||
|
if settings.graph_backend == "rdflib":
|
||||||
|
store: RDFStore = app.state.store
|
||||||
|
return StatsResponse(
|
||||||
|
backend=sparql.name,
|
||||||
|
ttl_path=settings.ttl_path,
|
||||||
|
sparql_endpoint=None,
|
||||||
|
parsed_triples=store.parsed_triples,
|
||||||
|
nodes=store.node_count,
|
||||||
|
edges=store.edge_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
# AnzoGraph: compute basic counts via SPARQL.
|
||||||
|
assert isinstance(sparql, AnzoGraphEngine)
|
||||||
|
|
||||||
|
def _count_from(result: dict, *, var: str = "count") -> int:
|
||||||
|
bindings = (((result.get("results") or {}).get("bindings")) or [])
|
||||||
|
if not bindings:
|
||||||
|
return 0
|
||||||
|
raw = bindings[0].get(var, {}).get("value")
|
||||||
|
try:
|
||||||
|
return int(raw)
|
||||||
|
except Exception:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?n))"
|
||||||
|
nodes_q = f"""
|
||||||
|
SELECT (COUNT(DISTINCT ?n) AS ?count)
|
||||||
|
WHERE {{
|
||||||
|
{{ ?n ?p ?o }} UNION {{ ?s ?p ?n }}
|
||||||
|
FILTER(!isLiteral(?n))
|
||||||
|
{bnode_filter}
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
triples_q = "SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }"
|
||||||
|
|
||||||
|
# Approximate "edges" similarly to our rdflib export: non-literal object, and skip label predicates.
|
||||||
|
edges_bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))"
|
||||||
|
edges_q = f"""
|
||||||
|
SELECT (COUNT(*) AS ?count)
|
||||||
|
WHERE {{
|
||||||
|
?s ?p ?o .
|
||||||
|
FILTER(!isLiteral(?o))
|
||||||
|
FILTER(?p NOT IN (
|
||||||
|
<http://www.w3.org/2000/01/rdf-schema#label>,
|
||||||
|
<http://www.w3.org/2004/02/skos/core#prefLabel>,
|
||||||
|
<http://www.w3.org/2004/02/skos/core#altLabel>
|
||||||
|
))
|
||||||
|
{edges_bnode_filter}
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
|
||||||
|
triples_res = await sparql.query_json(triples_q)
|
||||||
|
nodes_res = await sparql.query_json(nodes_q)
|
||||||
|
edges_res = await sparql.query_json(edges_q)
|
||||||
|
|
||||||
|
return StatsResponse(
|
||||||
|
backend=sparql.name,
|
||||||
|
ttl_path=settings.ttl_path,
|
||||||
|
sparql_endpoint=settings.effective_sparql_endpoint(),
|
||||||
|
parsed_triples=_count_from(triples_res),
|
||||||
|
nodes=_count_from(nodes_res),
|
||||||
|
edges=_count_from(edges_res),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/sparql")
|
||||||
|
async def sparql_query(req: SparqlQueryRequest) -> dict:
|
||||||
|
sparql: SparqlEngine = app.state.sparql
|
||||||
|
data = await sparql.query_json(req.query)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/nodes", response_model=NodesResponse)
|
||||||
|
def nodes(
|
||||||
|
limit: int = Query(default=10_000, ge=1, le=200_000),
|
||||||
|
offset: int = Query(default=0, ge=0),
|
||||||
|
) -> NodesResponse:
|
||||||
|
if settings.graph_backend != "rdflib":
|
||||||
|
raise HTTPException(status_code=501, detail="GET /api/nodes is only supported in GRAPH_BACKEND=rdflib mode")
|
||||||
|
store: RDFStore = app.state.store
|
||||||
|
return NodesResponse(total=store.node_count, nodes=store.node_slice(offset=offset, limit=limit))
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/edges", response_model=EdgesResponse)
|
||||||
|
def edges(
|
||||||
|
limit: int = Query(default=50_000, ge=1, le=500_000),
|
||||||
|
offset: int = Query(default=0, ge=0),
|
||||||
|
) -> EdgesResponse:
|
||||||
|
if settings.graph_backend != "rdflib":
|
||||||
|
raise HTTPException(status_code=501, detail="GET /api/edges is only supported in GRAPH_BACKEND=rdflib mode")
|
||||||
|
store: RDFStore = app.state.store
|
||||||
|
return EdgesResponse(total=store.edge_count, edges=store.edge_slice(offset=offset, limit=limit))
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/graph", response_model=GraphResponse)
|
||||||
|
async def graph(
|
||||||
|
node_limit: int = Query(default=50_000, ge=1, le=200_000),
|
||||||
|
edge_limit: int = Query(default=100_000, ge=1, le=500_000),
|
||||||
|
) -> GraphResponse:
|
||||||
|
sparql: SparqlEngine = app.state.sparql
|
||||||
|
|
||||||
|
if settings.graph_backend == "rdflib":
|
||||||
|
store: RDFStore = app.state.store
|
||||||
|
return GraphResponse(
|
||||||
|
nodes=store.node_slice(offset=0, limit=node_limit),
|
||||||
|
edges=store.edge_slice(offset=0, limit=edge_limit),
|
||||||
|
)
|
||||||
|
|
||||||
|
# AnzoGraph mode: return a simple subgraph by pulling the first N triples.
|
||||||
|
assert isinstance(sparql, AnzoGraphEngine)
|
||||||
|
|
||||||
|
edges_bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))"
|
||||||
|
edges_q = f"""
|
||||||
|
SELECT ?s ?p ?o
|
||||||
|
WHERE {{
|
||||||
|
?s ?p ?o .
|
||||||
|
FILTER(!isLiteral(?o))
|
||||||
|
FILTER(?p NOT IN (
|
||||||
|
<http://www.w3.org/2000/01/rdf-schema#label>,
|
||||||
|
<http://www.w3.org/2004/02/skos/core#prefLabel>,
|
||||||
|
<http://www.w3.org/2004/02/skos/core#altLabel>
|
||||||
|
))
|
||||||
|
{edges_bnode_filter}
|
||||||
|
}}
|
||||||
|
LIMIT {edge_limit}
|
||||||
|
"""
|
||||||
|
|
||||||
|
res = await sparql.query_json(edges_q)
|
||||||
|
bindings = (((res.get("results") or {}).get("bindings")) or [])
|
||||||
|
|
||||||
|
node_id_by_key: dict[tuple[str, str], int] = {}
|
||||||
|
node_meta: list[tuple[str, str]] = [] # (termType, iri)
|
||||||
|
out_edges: list[dict[str, object]] = []
|
||||||
|
|
||||||
|
def _term_to_key_and_iri(term: dict[str, str]) -> tuple[tuple[str, str], tuple[str, str]] | None:
|
||||||
|
t = term.get("type")
|
||||||
|
v = term.get("value")
|
||||||
|
if not t or v is None:
|
||||||
|
return None
|
||||||
|
if t == "literal":
|
||||||
|
return None
|
||||||
|
if t == "bnode" and not settings.include_bnodes:
|
||||||
|
return None
|
||||||
|
if t == "bnode":
|
||||||
|
return (("bnode", v), ("bnode", f"_:{v}"))
|
||||||
|
# Default to "uri".
|
||||||
|
return (("uri", v), ("uri", v))
|
||||||
|
|
||||||
|
def _get_or_add(term: dict[str, str]) -> int | None:
|
||||||
|
out = _term_to_key_and_iri(term)
|
||||||
|
if out is None:
|
||||||
|
return None
|
||||||
|
key, meta = out
|
||||||
|
existing = node_id_by_key.get(key)
|
||||||
|
if existing is not None:
|
||||||
|
return existing
|
||||||
|
if len(node_meta) >= node_limit:
|
||||||
|
return None
|
||||||
|
nid = len(node_meta)
|
||||||
|
node_id_by_key[key] = nid
|
||||||
|
node_meta.append(meta)
|
||||||
|
return nid
|
||||||
|
|
||||||
|
for b in bindings:
|
||||||
|
s_term = b.get("s") or {}
|
||||||
|
o_term = b.get("o") or {}
|
||||||
|
p_term = b.get("p") or {}
|
||||||
|
|
||||||
|
sid = _get_or_add(s_term)
|
||||||
|
oid = _get_or_add(o_term)
|
||||||
|
if sid is None or oid is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
pred = p_term.get("value")
|
||||||
|
if not pred:
|
||||||
|
continue
|
||||||
|
|
||||||
|
out_edges.append({"source": sid, "target": oid, "predicate": pred})
|
||||||
|
|
||||||
|
out_nodes = [
|
||||||
|
{"id": i, "termType": term_type, "iri": iri, "label": None}
|
||||||
|
for i, (term_type, iri) in enumerate(node_meta)
|
||||||
|
]
|
||||||
|
|
||||||
|
return GraphResponse(nodes=out_nodes, edges=out_edges)
|
||||||
44
backend/app/models.py
Normal file
44
backend/app/models.py
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class Node(BaseModel):
|
||||||
|
id: int
|
||||||
|
termType: str # "uri" | "bnode"
|
||||||
|
iri: str
|
||||||
|
label: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class Edge(BaseModel):
|
||||||
|
source: int
|
||||||
|
target: int
|
||||||
|
predicate: str
|
||||||
|
|
||||||
|
|
||||||
|
class StatsResponse(BaseModel):
|
||||||
|
backend: str
|
||||||
|
ttl_path: str
|
||||||
|
sparql_endpoint: str | None = None
|
||||||
|
parsed_triples: int
|
||||||
|
nodes: int
|
||||||
|
edges: int
|
||||||
|
|
||||||
|
|
||||||
|
class NodesResponse(BaseModel):
|
||||||
|
total: int
|
||||||
|
nodes: list[Node]
|
||||||
|
|
||||||
|
|
||||||
|
class EdgesResponse(BaseModel):
|
||||||
|
total: int
|
||||||
|
edges: list[Edge]
|
||||||
|
|
||||||
|
|
||||||
|
class GraphResponse(BaseModel):
|
||||||
|
nodes: list[Node]
|
||||||
|
edges: list[Edge]
|
||||||
|
|
||||||
|
|
||||||
|
class SparqlQueryRequest(BaseModel):
|
||||||
|
query: str
|
||||||
134
backend/app/rdf_store.py
Normal file
134
backend/app/rdf_store.py
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from rdflib import BNode, Graph, Literal, URIRef
|
||||||
|
from rdflib.namespace import RDFS, SKOS
|
||||||
|
|
||||||
|
|
||||||
|
LABEL_PREDICATES = {RDFS.label, SKOS.prefLabel, SKOS.altLabel}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class EdgeRow:
|
||||||
|
source: int
|
||||||
|
target: int
|
||||||
|
predicate: str
|
||||||
|
|
||||||
|
|
||||||
|
class RDFStore:
|
||||||
|
def __init__(self, *, ttl_path: str, include_bnodes: bool, max_triples: int | None):
|
||||||
|
self.ttl_path = ttl_path
|
||||||
|
self.include_bnodes = include_bnodes
|
||||||
|
self.max_triples = max_triples
|
||||||
|
|
||||||
|
self.graph: Graph | None = None
|
||||||
|
|
||||||
|
self._id_by_term: dict[Any, int] = {}
|
||||||
|
self._term_by_id: list[Any] = []
|
||||||
|
|
||||||
|
self._labels_by_id: dict[int, str] = {}
|
||||||
|
self._edges: list[EdgeRow] = []
|
||||||
|
self._parsed_triples = 0
|
||||||
|
|
||||||
|
def _term_allowed(self, term: Any) -> bool:
|
||||||
|
if isinstance(term, Literal):
|
||||||
|
return False
|
||||||
|
if isinstance(term, BNode) and not self.include_bnodes:
|
||||||
|
return False
|
||||||
|
return isinstance(term, (URIRef, BNode))
|
||||||
|
|
||||||
|
def _get_id(self, term: Any) -> int | None:
|
||||||
|
if not self._term_allowed(term):
|
||||||
|
return None
|
||||||
|
existing = self._id_by_term.get(term)
|
||||||
|
if existing is not None:
|
||||||
|
return existing
|
||||||
|
nid = len(self._term_by_id)
|
||||||
|
self._id_by_term[term] = nid
|
||||||
|
self._term_by_id.append(term)
|
||||||
|
return nid
|
||||||
|
|
||||||
|
def _term_type(self, term: Any) -> str:
|
||||||
|
if isinstance(term, BNode):
|
||||||
|
return "bnode"
|
||||||
|
return "uri"
|
||||||
|
|
||||||
|
def _term_iri(self, term: Any) -> str:
|
||||||
|
if isinstance(term, BNode):
|
||||||
|
return f"_:{term}"
|
||||||
|
return str(term)
|
||||||
|
|
||||||
|
def load(self, graph: Graph | None = None) -> None:
|
||||||
|
g = graph or Graph()
|
||||||
|
if graph is None:
|
||||||
|
g.parse(self.ttl_path, format="turtle")
|
||||||
|
self.graph = g
|
||||||
|
|
||||||
|
self._id_by_term.clear()
|
||||||
|
self._term_by_id.clear()
|
||||||
|
self._labels_by_id.clear()
|
||||||
|
self._edges.clear()
|
||||||
|
|
||||||
|
parsed = 0
|
||||||
|
for (s, p, o) in g:
|
||||||
|
parsed += 1
|
||||||
|
if self.max_triples is not None and parsed > self.max_triples:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Capture labels but do not emit them as edges.
|
||||||
|
if p in LABEL_PREDICATES and isinstance(o, Literal):
|
||||||
|
sid = self._get_id(s)
|
||||||
|
if sid is not None and sid not in self._labels_by_id:
|
||||||
|
self._labels_by_id[sid] = str(o)
|
||||||
|
continue
|
||||||
|
|
||||||
|
sid = self._get_id(s)
|
||||||
|
oid = self._get_id(o)
|
||||||
|
if sid is None or oid is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
self._edges.append(EdgeRow(source=sid, target=oid, predicate=str(p)))
|
||||||
|
|
||||||
|
self._parsed_triples = parsed
|
||||||
|
|
||||||
|
@property
|
||||||
|
def parsed_triples(self) -> int:
|
||||||
|
return self._parsed_triples
|
||||||
|
|
||||||
|
@property
|
||||||
|
def node_count(self) -> int:
|
||||||
|
return len(self._term_by_id)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def edge_count(self) -> int:
|
||||||
|
return len(self._edges)
|
||||||
|
|
||||||
|
def node_slice(self, *, offset: int, limit: int) -> list[dict[str, Any]]:
|
||||||
|
end = min(self.node_count, offset + limit)
|
||||||
|
out: list[dict[str, Any]] = []
|
||||||
|
for nid in range(offset, end):
|
||||||
|
term = self._term_by_id[nid]
|
||||||
|
out.append(
|
||||||
|
{
|
||||||
|
"id": nid,
|
||||||
|
"termType": self._term_type(term),
|
||||||
|
"iri": self._term_iri(term),
|
||||||
|
"label": self._labels_by_id.get(nid),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def edge_slice(self, *, offset: int, limit: int) -> list[dict[str, Any]]:
|
||||||
|
end = min(self.edge_count, offset + limit)
|
||||||
|
out: list[dict[str, Any]] = []
|
||||||
|
for row in self._edges[offset:end]:
|
||||||
|
out.append(
|
||||||
|
{
|
||||||
|
"source": row.source,
|
||||||
|
"target": row.target,
|
||||||
|
"predicate": row.predicate,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return out
|
||||||
50
backend/app/settings.py
Normal file
50
backend/app/settings.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
from pydantic import Field
|
||||||
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
|
||||||
|
|
||||||
|
class Settings(BaseSettings):
|
||||||
|
# Which graph engine executes SPARQL queries.
|
||||||
|
# - rdflib: parse TTL locally and query in-memory
|
||||||
|
# - anzograph: query a remote AnzoGraph SPARQL endpoint (optionally LOAD on startup)
|
||||||
|
graph_backend: Literal["rdflib", "anzograph"] = Field(default="rdflib", alias="GRAPH_BACKEND")
|
||||||
|
|
||||||
|
ttl_path: str = Field(default="/data/o3po.ttl", alias="TTL_PATH")
|
||||||
|
include_bnodes: bool = Field(default=False, alias="INCLUDE_BNODES")
|
||||||
|
max_triples: int | None = Field(default=None, alias="MAX_TRIPLES")
|
||||||
|
|
||||||
|
# AnzoGraph / SPARQL endpoint configuration
|
||||||
|
sparql_host: str = Field(default="http://anzograph:8080", alias="SPARQL_HOST")
|
||||||
|
# If not set, the backend uses `${SPARQL_HOST}/sparql`.
|
||||||
|
sparql_endpoint: str | None = Field(default=None, alias="SPARQL_ENDPOINT")
|
||||||
|
sparql_user: str | None = Field(default=None, alias="SPARQL_USER")
|
||||||
|
sparql_pass: str | None = Field(default=None, alias="SPARQL_PASS")
|
||||||
|
|
||||||
|
# File URI as seen by the AnzoGraph container (used with SPARQL `LOAD`).
|
||||||
|
# Example: file:///opt/shared-files/o3po.ttl
|
||||||
|
sparql_data_file: str | None = Field(default=None, alias="SPARQL_DATA_FILE")
|
||||||
|
sparql_graph_iri: str | None = Field(default=None, alias="SPARQL_GRAPH_IRI")
|
||||||
|
sparql_load_on_start: bool = Field(default=False, alias="SPARQL_LOAD_ON_START")
|
||||||
|
sparql_clear_on_start: bool = Field(default=False, alias="SPARQL_CLEAR_ON_START")
|
||||||
|
|
||||||
|
sparql_timeout_s: float = Field(default=300.0, alias="SPARQL_TIMEOUT_S")
|
||||||
|
sparql_ready_retries: int = Field(default=30, alias="SPARQL_READY_RETRIES")
|
||||||
|
sparql_ready_delay_s: float = Field(default=4.0, alias="SPARQL_READY_DELAY_S")
|
||||||
|
|
||||||
|
# Comma-separated, or "*" (default).
|
||||||
|
cors_origins: str = Field(default="*", alias="CORS_ORIGINS")
|
||||||
|
|
||||||
|
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
|
||||||
|
|
||||||
|
def cors_origin_list(self) -> list[str]:
|
||||||
|
if self.cors_origins.strip() == "*":
|
||||||
|
return ["*"]
|
||||||
|
return [o.strip() for o in self.cors_origins.split(",") if o.strip()]
|
||||||
|
|
||||||
|
def effective_sparql_endpoint(self) -> str:
|
||||||
|
if self.sparql_endpoint and self.sparql_endpoint.strip():
|
||||||
|
return self.sparql_endpoint.strip()
|
||||||
|
return self.sparql_host.rstrip("/") + "/sparql"
|
||||||
155
backend/app/sparql_engine.py
Normal file
155
backend/app/sparql_engine.py
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
from typing import Any, Protocol
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from rdflib import Graph
|
||||||
|
|
||||||
|
from .settings import Settings
|
||||||
|
|
||||||
|
|
||||||
|
class SparqlEngine(Protocol):
|
||||||
|
name: str
|
||||||
|
|
||||||
|
async def startup(self) -> None: ...
|
||||||
|
|
||||||
|
async def shutdown(self) -> None: ...
|
||||||
|
|
||||||
|
async def query_json(self, query: str) -> dict[str, Any]: ...
|
||||||
|
|
||||||
|
|
||||||
|
class RdflibEngine:
|
||||||
|
name = "rdflib"
|
||||||
|
|
||||||
|
def __init__(self, *, ttl_path: str):
|
||||||
|
self.ttl_path = ttl_path
|
||||||
|
self.graph: Graph | None = None
|
||||||
|
|
||||||
|
async def startup(self) -> None:
|
||||||
|
g = Graph()
|
||||||
|
g.parse(self.ttl_path, format="turtle")
|
||||||
|
self.graph = g
|
||||||
|
|
||||||
|
async def shutdown(self) -> None:
|
||||||
|
# Nothing to close for in-memory rdflib graph.
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def query_json(self, query: str) -> dict[str, Any]:
|
||||||
|
if self.graph is None:
|
||||||
|
raise RuntimeError("RdflibEngine not started")
|
||||||
|
|
||||||
|
result = self.graph.query(query)
|
||||||
|
payload = result.serialize(format="json")
|
||||||
|
if isinstance(payload, bytes):
|
||||||
|
payload = payload.decode("utf-8")
|
||||||
|
return json.loads(payload)
|
||||||
|
|
||||||
|
|
||||||
|
class AnzoGraphEngine:
|
||||||
|
name = "anzograph"
|
||||||
|
|
||||||
|
def __init__(self, *, settings: Settings):
|
||||||
|
self.endpoint = settings.effective_sparql_endpoint()
|
||||||
|
self.timeout_s = settings.sparql_timeout_s
|
||||||
|
self.ready_retries = settings.sparql_ready_retries
|
||||||
|
self.ready_delay_s = settings.sparql_ready_delay_s
|
||||||
|
|
||||||
|
self.user = settings.sparql_user
|
||||||
|
self.password = settings.sparql_pass
|
||||||
|
self.data_file = settings.sparql_data_file
|
||||||
|
self.graph_iri = settings.sparql_graph_iri
|
||||||
|
self.load_on_start = settings.sparql_load_on_start
|
||||||
|
self.clear_on_start = settings.sparql_clear_on_start
|
||||||
|
|
||||||
|
self._client: httpx.AsyncClient | None = None
|
||||||
|
self._auth_header = self._build_auth_header(self.user, self.password)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _build_auth_header(user: str | None, password: str | None) -> str | None:
|
||||||
|
if not user or not password:
|
||||||
|
return None
|
||||||
|
token = base64.b64encode(f"{user}:{password}".encode("utf-8")).decode("ascii")
|
||||||
|
return f"Basic {token}"
|
||||||
|
|
||||||
|
async def startup(self) -> None:
|
||||||
|
self._client = httpx.AsyncClient(timeout=self.timeout_s)
|
||||||
|
|
||||||
|
await self._wait_ready()
|
||||||
|
|
||||||
|
if self.clear_on_start:
|
||||||
|
await self._update("CLEAR ALL")
|
||||||
|
await self._wait_ready()
|
||||||
|
|
||||||
|
if self.load_on_start:
|
||||||
|
if not self.data_file:
|
||||||
|
raise RuntimeError("SPARQL_LOAD_ON_START=true but SPARQL_DATA_FILE is not set")
|
||||||
|
|
||||||
|
if self.graph_iri:
|
||||||
|
await self._update(f"LOAD <{self.data_file}> INTO GRAPH <{self.graph_iri}>")
|
||||||
|
else:
|
||||||
|
await self._update(f"LOAD <{self.data_file}>")
|
||||||
|
|
||||||
|
# AnzoGraph may still be indexing after LOAD.
|
||||||
|
await self._wait_ready()
|
||||||
|
|
||||||
|
async def shutdown(self) -> None:
|
||||||
|
if self._client is not None:
|
||||||
|
await self._client.aclose()
|
||||||
|
self._client = None
|
||||||
|
|
||||||
|
async def query_json(self, query: str) -> dict[str, Any]:
|
||||||
|
if self._client is None:
|
||||||
|
raise RuntimeError("AnzoGraphEngine not started")
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/x-www-form-urlencoded",
|
||||||
|
"Accept": "application/sparql-results+json",
|
||||||
|
}
|
||||||
|
if self._auth_header:
|
||||||
|
headers["Authorization"] = self._auth_header
|
||||||
|
|
||||||
|
# AnzoGraph expects x-www-form-urlencoded with `query=...`.
|
||||||
|
resp = await self._client.post(
|
||||||
|
self.endpoint,
|
||||||
|
headers=headers,
|
||||||
|
data={"query": query},
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
async def _update(self, update: str) -> None:
|
||||||
|
if self._client is None:
|
||||||
|
raise RuntimeError("AnzoGraphEngine not started")
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/sparql-update",
|
||||||
|
"Accept": "application/json",
|
||||||
|
}
|
||||||
|
if self._auth_header:
|
||||||
|
headers["Authorization"] = self._auth_header
|
||||||
|
|
||||||
|
resp = await self._client.post(self.endpoint, headers=headers, content=update)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
async def _wait_ready(self) -> None:
|
||||||
|
last_err: Exception | None = None
|
||||||
|
for _ in range(self.ready_retries):
|
||||||
|
try:
|
||||||
|
# Keep it cheap and JSON-parseable.
|
||||||
|
await self.query_json("ASK WHERE { ?s ?p ?o }")
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
last_err = e
|
||||||
|
await asyncio.sleep(self.ready_delay_s)
|
||||||
|
raise RuntimeError(f"AnzoGraph not ready at {self.endpoint}") from last_err
|
||||||
|
|
||||||
|
|
||||||
|
def create_sparql_engine(settings: Settings) -> SparqlEngine:
|
||||||
|
if settings.graph_backend == "rdflib":
|
||||||
|
return RdflibEngine(ttl_path=settings.ttl_path)
|
||||||
|
if settings.graph_backend == "anzograph":
|
||||||
|
return AnzoGraphEngine(settings=settings)
|
||||||
|
raise RuntimeError(f"Unsupported GRAPH_BACKEND={settings.graph_backend!r}")
|
||||||
5
backend/requirements.txt
Normal file
5
backend/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
fastapi
|
||||||
|
uvicorn[standard]
|
||||||
|
rdflib
|
||||||
|
pydantic-settings
|
||||||
|
httpx
|
||||||
@@ -1,9 +1,49 @@
|
|||||||
services:
|
services:
|
||||||
app:
|
backend:
|
||||||
build: .
|
build: ./backend
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
environment:
|
||||||
|
- GRAPH_BACKEND=${GRAPH_BACKEND:-rdflib}
|
||||||
|
- TTL_PATH=${TTL_PATH:-/data/o3po.ttl}
|
||||||
|
- INCLUDE_BNODES=${INCLUDE_BNODES:-false}
|
||||||
|
- MAX_TRIPLES
|
||||||
|
- CORS_ORIGINS=${CORS_ORIGINS:-http://localhost:5173}
|
||||||
|
- SPARQL_HOST=${SPARQL_HOST:-http://anzograph:8080}
|
||||||
|
- SPARQL_ENDPOINT
|
||||||
|
- SPARQL_USER=${SPARQL_USER:-admin}
|
||||||
|
- SPARQL_PASS=${SPARQL_PASS:-Passw0rd1}
|
||||||
|
- SPARQL_DATA_FILE=${SPARQL_DATA_FILE:-file:///opt/shared-files/o3po.ttl}
|
||||||
|
- SPARQL_GRAPH_IRI
|
||||||
|
- SPARQL_LOAD_ON_START=${SPARQL_LOAD_ON_START:-false}
|
||||||
|
- SPARQL_CLEAR_ON_START=${SPARQL_CLEAR_ON_START:-false}
|
||||||
|
- SPARQL_TIMEOUT_S=${SPARQL_TIMEOUT_S:-300}
|
||||||
|
- SPARQL_READY_RETRIES=${SPARQL_READY_RETRIES:-30}
|
||||||
|
- SPARQL_READY_DELAY_S=${SPARQL_READY_DELAY_S:-4}
|
||||||
|
volumes:
|
||||||
|
- ./backend:/app
|
||||||
|
- ./data:/data:ro
|
||||||
|
command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
|
||||||
|
depends_on:
|
||||||
|
- anzograph
|
||||||
|
|
||||||
|
frontend:
|
||||||
|
build: ./frontend
|
||||||
ports:
|
ports:
|
||||||
- "5173:5173"
|
- "5173:5173"
|
||||||
command: sh -c "npx tsx scripts/compute_layout.ts && npm run dev -- --host"
|
environment:
|
||||||
|
- VITE_BACKEND_URL=${VITE_BACKEND_URL:-http://backend:8000}
|
||||||
volumes:
|
volumes:
|
||||||
- .:/app
|
- ./frontend:/app
|
||||||
- /app/node_modules # Prevents local node_modules from overwriting the container's
|
- /app/node_modules
|
||||||
|
depends_on:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
anzograph:
|
||||||
|
image: cambridgesemantics/anzograph:latest
|
||||||
|
container_name: anzograph
|
||||||
|
ports:
|
||||||
|
- "8080:8080"
|
||||||
|
- "8443:8443"
|
||||||
|
volumes:
|
||||||
|
- ./data:/opt/shared-files:Z
|
||||||
|
|||||||
@@ -2,6 +2,8 @@ FROM node:lts-alpine
|
|||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
EXPOSE 5173
|
||||||
|
|
||||||
# Copy dependency definitions
|
# Copy dependency definitions
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
|
|
||||||
@@ -11,8 +13,5 @@ RUN npm install
|
|||||||
# Copy the rest of the source code
|
# Copy the rest of the source code
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
# Expose the standard Vite port
|
# Start the dev server with --host for external access
|
||||||
EXPOSE 5173
|
CMD ["npm", "run", "dev", "--", "--host", "--port", "5173"]
|
||||||
|
|
||||||
# Compute layout, then start the dev server with --host for external access
|
|
||||||
CMD ["sh", "-c", "npm run layout && npm run dev -- --host"]
|
|
||||||
100000
frontend/public/edges.csv
Normal file
100000
frontend/public/edges.csv
Normal file
File diff suppressed because it is too large
Load Diff
100001
frontend/public/node_positions.csv
Normal file
100001
frontend/public/node_positions.csv
Normal file
File diff suppressed because it is too large
Load Diff
@@ -16,6 +16,7 @@ export default function App() {
|
|||||||
const [error, setError] = useState("");
|
const [error, setError] = useState("");
|
||||||
const [hoveredNode, setHoveredNode] = useState<{ x: number; y: number; screenX: number; screenY: number } | null>(null);
|
const [hoveredNode, setHoveredNode] = useState<{ x: number; y: number; screenX: number; screenY: number } | null>(null);
|
||||||
const [selectedNodes, setSelectedNodes] = useState<Set<number>>(new Set());
|
const [selectedNodes, setSelectedNodes] = useState<Set<number>>(new Set());
|
||||||
|
const [backendStats, setBackendStats] = useState<{ nodes: number; edges: number; parsed_triples: number } | null>(null);
|
||||||
|
|
||||||
// Store mouse position in a ref so it can be accessed in render loop without re-renders
|
// Store mouse position in a ref so it can be accessed in render loop without re-renders
|
||||||
const mousePos = useRef({ x: 0, y: 0 });
|
const mousePos = useRef({ x: 0, y: 0 });
|
||||||
@@ -35,6 +36,19 @@ export default function App() {
|
|||||||
|
|
||||||
let cancelled = false;
|
let cancelled = false;
|
||||||
|
|
||||||
|
// Optional: fetch backend stats (proxied via Vite) so you can confirm backend is up.
|
||||||
|
fetch("/api/stats")
|
||||||
|
.then((r) => (r.ok ? r.json() : null))
|
||||||
|
.then((j) => {
|
||||||
|
if (!j || cancelled) return;
|
||||||
|
if (typeof j.nodes === "number" && typeof j.edges === "number" && typeof j.parsed_triples === "number") {
|
||||||
|
setBackendStats({ nodes: j.nodes, edges: j.edges, parsed_triples: j.parsed_triples });
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch(() => {
|
||||||
|
// Backend is optional; ignore failures.
|
||||||
|
});
|
||||||
|
|
||||||
// Fetch CSVs, parse, and init renderer
|
// Fetch CSVs, parse, and init renderer
|
||||||
(async () => {
|
(async () => {
|
||||||
try {
|
try {
|
||||||
@@ -279,6 +293,11 @@ export default function App() {
|
|||||||
<div>Zoom: {stats.zoom < 0.01 ? stats.zoom.toExponential(2) : stats.zoom.toFixed(2)} px/unit</div>
|
<div>Zoom: {stats.zoom < 0.01 ? stats.zoom.toExponential(2) : stats.zoom.toFixed(2)} px/unit</div>
|
||||||
<div>Pt Size: {stats.ptSize.toFixed(1)}px</div>
|
<div>Pt Size: {stats.ptSize.toFixed(1)}px</div>
|
||||||
<div style={{ color: "#f80" }}>Selected: {selectedNodes.size}</div>
|
<div style={{ color: "#f80" }}>Selected: {selectedNodes.size}</div>
|
||||||
|
{backendStats && (
|
||||||
|
<div style={{ color: "#8f8" }}>
|
||||||
|
Backend: {backendStats.nodes.toLocaleString()} nodes, {backendStats.edges.toLocaleString()} edges
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
<div
|
<div
|
||||||
style={{
|
style={{
|
||||||
@@ -16,4 +16,10 @@ export default defineConfig({
|
|||||||
"@": path.resolve(__dirname, "src"),
|
"@": path.resolve(__dirname, "src"),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
server: {
|
||||||
|
proxy: {
|
||||||
|
// Backend is reachable as http://backend:8000 inside docker-compose; localhost outside.
|
||||||
|
"/api": process.env.VITE_BACKEND_URL || "http://localhost:8000",
|
||||||
|
},
|
||||||
|
},
|
||||||
});
|
});
|
||||||
100000
public/edges.csv
100000
public/edges.csv
File diff suppressed because it is too large
Load Diff
100001
public/node_positions.csv
100001
public/node_positions.csv
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user