from __future__ import annotations import asyncio import base64 import json from typing import Any, Protocol import httpx from rdflib import Graph from .settings import Settings class SparqlEngine(Protocol): name: str async def startup(self) -> None: ... async def shutdown(self) -> None: ... async def query_json(self, query: str) -> dict[str, Any]: ... class RdflibEngine: name = "rdflib" def __init__(self, *, ttl_path: str, graph: Graph | None = None): self.ttl_path = ttl_path self.graph: Graph | None = graph async def startup(self) -> None: if self.graph is not None: return g = Graph() g.parse(self.ttl_path, format="turtle") self.graph = g async def shutdown(self) -> None: # Nothing to close for in-memory rdflib graph. return None async def query_json(self, query: str) -> dict[str, Any]: if self.graph is None: raise RuntimeError("RdflibEngine not started") result = self.graph.query(query) payload = result.serialize(format="json") if isinstance(payload, bytes): payload = payload.decode("utf-8") return json.loads(payload) class AnzoGraphEngine: name = "anzograph" def __init__(self, *, settings: Settings): self.endpoint = settings.effective_sparql_endpoint() self.timeout_s = settings.sparql_timeout_s self.ready_retries = settings.sparql_ready_retries self.ready_delay_s = settings.sparql_ready_delay_s self.ready_timeout_s = settings.sparql_ready_timeout_s self.user = settings.sparql_user self.password = settings.sparql_pass self.data_file = settings.sparql_data_file self.graph_iri = settings.sparql_graph_iri self.load_on_start = settings.sparql_load_on_start self.clear_on_start = settings.sparql_clear_on_start self._client: httpx.AsyncClient | None = None self._auth_header = self._build_auth_header(self.user, self.password) @staticmethod def _build_auth_header(user: str | None, password: str | None) -> str | None: if not user or not password: return None token = base64.b64encode(f"{user}:{password}".encode("utf-8")).decode("ascii") return f"Basic {token}" async def startup(self) -> None: self._client = httpx.AsyncClient(timeout=self.timeout_s) await self._wait_ready() if self.clear_on_start: await self._update("CLEAR ALL") await self._wait_ready() if self.load_on_start: if not self.data_file: raise RuntimeError("SPARQL_LOAD_ON_START=true but SPARQL_DATA_FILE is not set") if self.graph_iri: await self._update(f"LOAD <{self.data_file}> INTO GRAPH <{self.graph_iri}>") else: await self._update(f"LOAD <{self.data_file}>") # AnzoGraph may still be indexing after LOAD. await self._wait_ready() async def shutdown(self) -> None: if self._client is not None: await self._client.aclose() self._client = None async def query_json(self, query: str) -> dict[str, Any]: if self._client is None: raise RuntimeError("AnzoGraphEngine not started") headers = { "Content-Type": "application/x-www-form-urlencoded", "Accept": "application/sparql-results+json", } if self._auth_header: headers["Authorization"] = self._auth_header # AnzoGraph expects x-www-form-urlencoded with `query=...`. resp = await self._client.post( self.endpoint, headers=headers, data={"query": query}, ) resp.raise_for_status() return resp.json() async def _update(self, update: str) -> None: if self._client is None: raise RuntimeError("AnzoGraphEngine not started") headers = { "Content-Type": "application/sparql-update", "Accept": "application/json", } if self._auth_header: headers["Authorization"] = self._auth_header resp = await self._client.post(self.endpoint, headers=headers, content=update) resp.raise_for_status() async def _wait_ready(self) -> None: if self._client is None: raise RuntimeError("AnzoGraphEngine not started") # Match the repo's Julia readiness gate: real SPARQL POST + valid JSON parse. headers = { "Content-Type": "application/x-www-form-urlencoded", "Accept": "application/sparql-results+json", } if self._auth_header: headers["Authorization"] = self._auth_header last_err: Exception | None = None for _ in range(self.ready_retries): try: resp = await self._client.post( self.endpoint, headers=headers, data={"query": "ASK WHERE { ?s ?p ?o }"}, timeout=self.ready_timeout_s, ) resp.raise_for_status() # Ensure it's JSON, not HTML/text during boot. resp.json() return except Exception as e: last_err = e await asyncio.sleep(self.ready_delay_s) raise RuntimeError(f"AnzoGraph not ready at {self.endpoint}") from last_err def create_sparql_engine(settings: Settings, *, rdflib_graph: Graph | None = None) -> SparqlEngine: if settings.graph_backend == "rdflib": return RdflibEngine(ttl_path=settings.ttl_path, graph=rdflib_graph) if settings.graph_backend == "anzograph": return AnzoGraphEngine(settings=settings) raise RuntimeError(f"Unsupported GRAPH_BACKEND={settings.graph_backend!r}")