Files
visualizador_instanciados/backend/app/sparql_engine.py
2026-03-05 15:39:47 -03:00

143 lines
4.6 KiB
Python

from __future__ import annotations
import asyncio
import base64
from typing import Any, Protocol
import httpx
from .settings import Settings
class SparqlEngine(Protocol):
name: str
async def startup(self) -> None: ...
async def shutdown(self) -> None: ...
async def query_json(self, query: str) -> dict[str, Any]: ...
class AnzoGraphEngine:
name = "anzograph"
def __init__(self, *, settings: Settings):
self.endpoint = settings.effective_sparql_endpoint()
self.timeout_s = settings.sparql_timeout_s
self.ready_retries = settings.sparql_ready_retries
self.ready_delay_s = settings.sparql_ready_delay_s
self.ready_timeout_s = settings.sparql_ready_timeout_s
self.user = settings.sparql_user
self.password = settings.sparql_pass
self.data_file = settings.sparql_data_file
self.graph_iri = settings.sparql_graph_iri
self.load_on_start = settings.sparql_load_on_start
self.clear_on_start = settings.sparql_clear_on_start
self._client: httpx.AsyncClient | None = None
self._auth_header = self._build_auth_header(self.user, self.password)
@staticmethod
def _build_auth_header(user: str | None, password: str | None) -> str | None:
if not user or not password:
return None
token = base64.b64encode(f"{user}:{password}".encode("utf-8")).decode("ascii")
return f"Basic {token}"
async def startup(self) -> None:
self._client = httpx.AsyncClient(timeout=self.timeout_s)
await self._wait_ready()
if self.clear_on_start:
await self._update("CLEAR ALL")
await self._wait_ready()
if self.load_on_start:
if not self.data_file:
raise RuntimeError("SPARQL_LOAD_ON_START=true but SPARQL_DATA_FILE is not set")
if self.graph_iri:
await self._update(f"LOAD <{self.data_file}> INTO GRAPH <{self.graph_iri}>")
else:
await self._update(f"LOAD <{self.data_file}>")
# AnzoGraph may still be indexing after LOAD.
await self._wait_ready()
async def shutdown(self) -> None:
if self._client is not None:
await self._client.aclose()
self._client = None
async def query_json(self, query: str) -> dict[str, Any]:
if self._client is None:
raise RuntimeError("AnzoGraphEngine not started")
headers = {
"Content-Type": "application/x-www-form-urlencoded",
"Accept": "application/sparql-results+json",
}
if self._auth_header:
headers["Authorization"] = self._auth_header
# AnzoGraph expects x-www-form-urlencoded with `query=...`.
resp = await self._client.post(
self.endpoint,
headers=headers,
data={"query": query},
)
resp.raise_for_status()
return resp.json()
async def _update(self, update: str) -> None:
if self._client is None:
raise RuntimeError("AnzoGraphEngine not started")
headers = {
"Content-Type": "application/sparql-update",
"Accept": "application/json",
}
if self._auth_header:
headers["Authorization"] = self._auth_header
resp = await self._client.post(self.endpoint, headers=headers, content=update)
resp.raise_for_status()
async def _wait_ready(self) -> None:
if self._client is None:
raise RuntimeError("AnzoGraphEngine not started")
# Match the repo's Julia readiness gate: real SPARQL POST + valid JSON parse.
headers = {
"Content-Type": "application/x-www-form-urlencoded",
"Accept": "application/sparql-results+json",
}
if self._auth_header:
headers["Authorization"] = self._auth_header
last_err: Exception | None = None
for _ in range(self.ready_retries):
try:
resp = await self._client.post(
self.endpoint,
headers=headers,
data={"query": "ASK WHERE { ?s ?p ?o }"},
timeout=self.ready_timeout_s,
)
resp.raise_for_status()
# Ensure it's JSON, not HTML/text during boot.
resp.json()
return
except Exception as e:
last_err = e
await asyncio.sleep(self.ready_delay_s)
raise RuntimeError(f"AnzoGraph not ready at {self.endpoint}") from last_err
def create_sparql_engine(settings: Settings) -> SparqlEngine:
return AnzoGraphEngine(settings=settings)