Graph access via SPARQL

2026-03-02 16:27:28 -03:00
parent bf03d333f9
commit bba0ae887d
8 changed files with 667 additions and 84 deletions
--- a/backend/app/graph_export.py
+++ b/backend/app/graph_export.py
@@ -0,0 +1,94 @@
 from __future__ import annotations
 from typing import Any
 def edge_retrieval_query(*, edge_limit: int, include_bnodes: bool) -> str:
    bnode_filter = "" if include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))"
    return f"""
 SELECT ?s ?p ?o
 WHERE {{
  ?s ?p ?o .
  FILTER(!isLiteral(?o))
  FILTER(?p NOT IN (
    <http://www.w3.org/2000/01/rdf-schema#label>,
    <http://www.w3.org/2004/02/skos/core#prefLabel>,
    <http://www.w3.org/2004/02/skos/core#altLabel>
  ))
  {bnode_filter}
 }}
 LIMIT {edge_limit}
 """
 def graph_from_sparql_bindings(
    bindings: list[dict[str, Any]],
    *,
    node_limit: int,
    include_bnodes: bool,
 ) -> tuple[list[dict[str, object]], list[dict[str, object]]]:
    """
    Convert SPARQL JSON results bindings into:
      nodes: [{id, termType, iri, label}]
      edges: [{source, target, predicate}]
    IDs are assigned densely (0..N-1) based on first occurrence in bindings.
    """
    node_id_by_key: dict[tuple[str, str], int] = {}
    node_meta: list[tuple[str, str]] = []  # (termType, iri)
    out_edges: list[dict[str, object]] = []
    def term_to_key_and_iri(term: dict[str, Any]) -> tuple[tuple[str, str], tuple[str, str]] | None:
        t = term.get("type")
        v = term.get("value")
        if not t or v is None:
            return None
        if t == "literal":
            return None
        if t == "bnode":
            if not include_bnodes:
                return None
            # SPARQL JSON uses bnode identifiers without the "_:" prefix; we normalize to "_:id".
            return (("bnode", str(v)), ("bnode", f"_:{v}"))
        # Default to "uri".
        return (("uri", str(v)), ("uri", str(v)))
    def get_or_add(term: dict[str, Any]) -> int | None:
        out = term_to_key_and_iri(term)
        if out is None:
            return None
        key, meta = out
        existing = node_id_by_key.get(key)
        if existing is not None:
            return existing
        if len(node_meta) >= node_limit:
            return None
        nid = len(node_meta)
        node_id_by_key[key] = nid
        node_meta.append(meta)
        return nid
    for b in bindings:
        s_term = b.get("s") or {}
        o_term = b.get("o") or {}
        p_term = b.get("p") or {}
        sid = get_or_add(s_term)
        oid = get_or_add(o_term)
        if sid is None or oid is None:
            continue
        pred = p_term.get("value")
        if not pred:
            continue
        out_edges.append({"source": sid, "target": oid, "predicate": str(pred)})
    out_nodes = [
        {"id": i, "termType": term_type, "iri": iri, "label": None}
        for i, (term_type, iri) in enumerate(node_meta)
    ]
    return out_nodes, out_edges
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -5,6 +5,7 @@ from contextlib import asynccontextmanager
 from fastapi import FastAPI, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
 from .graph_export import edge_retrieval_query, graph_from_sparql_bindings
 from .models import EdgesResponse, GraphResponse, NodesResponse, SparqlQueryRequest, StatsResponse
 from .rdf_store import RDFStore
 from .sparql_engine import AnzoGraphEngine, RdflibEngine, SparqlEngine, create_sparql_engine
@@ -161,87 +162,13 @@ async def graph(
 ) -> GraphResponse:
    sparql: SparqlEngine = app.state.sparql
-    if settings.graph_backend == "rdflib":
+    # Use SPARQL for graph export in BOTH modes so callers don't care which backend is in use.
-        store: RDFStore = app.state.store
+    edges_q = edge_retrieval_query(edge_limit=edge_limit, include_bnodes=settings.include_bnodes)
        return GraphResponse(
            nodes=store.node_slice(offset=0, limit=node_limit),
            edges=store.edge_slice(offset=0, limit=edge_limit),
        )
    # AnzoGraph mode: return a simple subgraph by pulling the first N triples.
    assert isinstance(sparql, AnzoGraphEngine)
    edges_bnode_filter = "" if settings.include_bnodes else "FILTER(!isBlank(?s) && !isBlank(?o))"
    edges_q = f"""
 SELECT ?s ?p ?o
 WHERE {{
  ?s ?p ?o .
  FILTER(!isLiteral(?o))
  FILTER(?p NOT IN (
    <http://www.w3.org/2000/01/rdf-schema#label>,
    <http://www.w3.org/2004/02/skos/core#prefLabel>,
    <http://www.w3.org/2004/02/skos/core#altLabel>
  ))
  {edges_bnode_filter}
 }}
 LIMIT {edge_limit}
 """
    res = await sparql.query_json(edges_q)
    bindings = (((res.get("results") or {}).get("bindings")) or [])
-
+    nodes, edges = graph_from_sparql_bindings(
-    node_id_by_key: dict[tuple[str, str], int] = {}
+        bindings,
-    node_meta: list[tuple[str, str]] = []  # (termType, iri)
+        node_limit=node_limit,
-    out_edges: list[dict[str, object]] = []
+        include_bnodes=settings.include_bnodes,
-
+    )
-    def _term_to_key_and_iri(term: dict[str, str]) -> tuple[tuple[str, str], tuple[str, str]] | None:
+    return GraphResponse(nodes=nodes, edges=edges)
        t = term.get("type")
        v = term.get("value")
        if not t or v is None:
            return None
        if t == "literal":
            return None
        if t == "bnode" and not settings.include_bnodes:
            return None
        if t == "bnode":
            return (("bnode", v), ("bnode", f"_:{v}"))
        # Default to "uri".
        return (("uri", v), ("uri", v))
    def _get_or_add(term: dict[str, str]) -> int | None:
        out = _term_to_key_and_iri(term)
        if out is None:
            return None
        key, meta = out
        existing = node_id_by_key.get(key)
        if existing is not None:
            return existing
        if len(node_meta) >= node_limit:
            return None
        nid = len(node_meta)
        node_id_by_key[key] = nid
        node_meta.append(meta)
        return nid
    for b in bindings:
        s_term = b.get("s") or {}
        o_term = b.get("o") or {}
        p_term = b.get("p") or {}
        sid = _get_or_add(s_term)
        oid = _get_or_add(o_term)
        if sid is None or oid is None:
            continue
        pred = p_term.get("value")
        if not pred:
            continue
        out_edges.append({"source": sid, "target": oid, "predicate": pred})
    out_nodes = [
        {"id": i, "termType": term_type, "iri": iri, "label": None}
        for i, (term_type, iri) in enumerate(node_meta)
    ]
    return GraphResponse(nodes=out_nodes, edges=out_edges)
--- a/backend/app/pipelines/init.py
+++ b/backend/app/pipelines/init.py
@@ -0,0 +1 @@
--- a/backend/app/pipelines/subclass_labels.py
+++ b/backend/app/pipelines/subclass_labels.py
@@ -0,0 +1,153 @@
 from __future__ import annotations
 from typing import Any
 from ..sparql_engine import SparqlEngine
 RDFS_SUBCLASS_OF = "http://www.w3.org/2000/01/rdf-schema#subClassOf"
 RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
 def _bindings(res: dict[str, Any]) -> list[dict[str, Any]]:
    return (((res.get("results") or {}).get("bindings")) or [])
 def _term_key(term: dict[str, Any]) -> tuple[str, str] | None:
    t = term.get("type")
    v = term.get("value")
    if not t or v is None:
        return None
    if t == "literal":
        return None
    if t == "bnode":
        return ("bnode", str(v))
    return ("uri", str(v))
 def _key_to_entity_string(key: tuple[str, str]) -> str:
    t, v = key
    if t == "bnode":
        return f"_:{v}"
    return v
 def _label_score(binding: dict[str, Any]) -> int:
    """
    Higher is better.
    Prefer English, then no-language, then anything else.
    """
    lang = (binding.get("xml:lang") or "").lower()
    if lang == "en":
        return 3
    if lang == "":
        return 2
    return 1
 async def extract_subclass_entities_and_labels(
    sparql: SparqlEngine,
    *,
    include_bnodes: bool,
    label_batch_size: int = 500,
 ) -> tuple[list[str], list[str | None]]:
    """
    Pipeline:
      1) Query all rdfs:subClassOf triples.
      2) Build a unique set of entity terms from subjects+objects, convert to list.
      3) Fetch rdfs:label for those entities and return an aligned labels list.
    Returns:
      entities: list[str] (IRI or "_:bnodeId")
      labels:   list[str|None], aligned with entities
    """
    subclass_q = f"""
 SELECT ?s ?o
 WHERE {{
  ?s <{RDFS_SUBCLASS_OF}> ?o .
  FILTER(!isLiteral(?o))
  {"FILTER(!isBlank(?s) && !isBlank(?o))" if not include_bnodes else ""}
 }}
 """
    res = await sparql.query_json(subclass_q)
    entity_keys: set[tuple[str, str]] = set()
    for b in _bindings(res):
        sk = _term_key(b.get("s") or {})
        ok = _term_key(b.get("o") or {})
        if sk is not None and (include_bnodes or sk[0] != "bnode"):
            entity_keys.add(sk)
        if ok is not None and (include_bnodes or ok[0] != "bnode"):
            entity_keys.add(ok)
    # Deterministic ordering.
    entity_key_list = sorted(entity_keys, key=lambda k: (k[0], k[1]))
    entities = [_key_to_entity_string(k) for k in entity_key_list]
    # Build label map keyed by term key.
    best_label_by_key: dict[tuple[str, str], tuple[int, str]] = {}
    # URIs can be batch-queried via VALUES.
    uri_values = [v for (t, v) in entity_key_list if t == "uri"]
    for i in range(0, len(uri_values), label_batch_size):
        batch = uri_values[i : i + label_batch_size]
        values = " ".join(f"<{u}>" for u in batch)
        labels_q = f"""
 SELECT ?s ?label
 WHERE {{
  VALUES ?s {{ {values} }}
  ?s <{RDFS_LABEL}> ?label .
 }}
 """
        lres = await sparql.query_json(labels_q)
        for b in _bindings(lres):
            sk = _term_key(b.get("s") or {})
            if sk is None or sk[0] != "uri":
                continue
            label_term = b.get("label") or {}
            if label_term.get("type") != "literal":
                continue
            label_value = label_term.get("value")
            if label_value is None:
                continue
            score = _label_score(label_term)
            prev = best_label_by_key.get(sk)
            if prev is None or score > prev[0]:
                best_label_by_key[sk] = (score, str(label_value))
    # Blank nodes can't reliably be addressed by ID across queries, but if enabled we can still
    # fetch all bnode labels and filter locally.
    if include_bnodes:
        bnode_keys = {k for k in entity_key_list if k[0] == "bnode"}
        if bnode_keys:
            bnode_labels_q = f"""
 SELECT ?s ?label
 WHERE {{
  ?s <{RDFS_LABEL}> ?label .
  FILTER(isBlank(?s))
 }}
 """
            blres = await sparql.query_json(bnode_labels_q)
            for b in _bindings(blres):
                sk = _term_key(b.get("s") or {})
                if sk is None or sk not in bnode_keys:
                    continue
                label_term = b.get("label") or {}
                if label_term.get("type") != "literal":
                    continue
                label_value = label_term.get("value")
                if label_value is None:
                    continue
                score = _label_score(label_term)
                prev = best_label_by_key.get(sk)
                if prev is None or score > prev[0]:
                    best_label_by_key[sk] = (score, str(label_value))
    labels: list[str | None] = []
    for k in entity_key_list:
        item = best_label_by_key.get(k)
        labels.append(item[1] if item else None)
    return entities, labels
--- a/backend/app/rdf_store.py
+++ b/backend/app/rdf_store.py
@@ -132,3 +132,19 @@ class RDFStore:
                }
            )
        return out
    def edges_within_nodes(self, *, max_node_id_exclusive: int, limit: int) -> list[dict[str, Any]]:
        out: list[dict[str, Any]] = []
        for row in self._edges:
            if row.source >= max_node_id_exclusive or row.target >= max_node_id_exclusive:
                continue
            out.append(
                {
                    "source": row.source,
                    "target": row.target,
                    "predicate": row.predicate,
                }
            )
            if len(out) >= limit:
                break
        return out
--- a/backend/app/settings.py
+++ b/backend/app/settings.py
@@ -33,6 +33,7 @@ class Settings(BaseSettings):
    sparql_timeout_s: float = Field(default=300.0, alias="SPARQL_TIMEOUT_S")
    sparql_ready_retries: int = Field(default=30, alias="SPARQL_READY_RETRIES")
    sparql_ready_delay_s: float = Field(default=4.0, alias="SPARQL_READY_DELAY_S")
    sparql_ready_timeout_s: float = Field(default=10.0, alias="SPARQL_READY_TIMEOUT_S")
    # Comma-separated, or "*" (default).
    cors_origins: str = Field(default="*", alias="CORS_ORIGINS")
--- a/backend/app/sparql_engine.py
+++ b/backend/app/sparql_engine.py
@@ -56,6 +56,7 @@ class AnzoGraphEngine:
        self.timeout_s = settings.sparql_timeout_s
        self.ready_retries = settings.sparql_ready_retries
        self.ready_delay_s = settings.sparql_ready_delay_s
        self.ready_timeout_s = settings.sparql_ready_timeout_s
        self.user = settings.sparql_user
        self.password = settings.sparql_pass
@@ -135,15 +136,34 @@ class AnzoGraphEngine:
        resp.raise_for_status()
    async def _wait_ready(self) -> None:
        if self._client is None:
            raise RuntimeError("AnzoGraphEngine not started")
        # Match the repo's Julia readiness gate: real SPARQL POST + valid JSON parse.
        headers = {
            "Content-Type": "application/x-www-form-urlencoded",
            "Accept": "application/sparql-results+json",
        }
        if self._auth_header:
            headers["Authorization"] = self._auth_header
        last_err: Exception | None = None
        for _ in range(self.ready_retries):
            try:
-                # Keep it cheap and JSON-parseable.
+                resp = await self._client.post(
-                await self.query_json("ASK WHERE { ?s ?p ?o }")
+                    self.endpoint,
                    headers=headers,
                    data={"query": "ASK WHERE { ?s ?p ?o }"},
                    timeout=self.ready_timeout_s,
                )
                resp.raise_for_status()
                # Ensure it's JSON, not HTML/text during boot.
                resp.json()
                return
            except Exception as e:
                last_err = e
                await asyncio.sleep(self.ready_delay_s)
        raise RuntimeError(f"AnzoGraph not ready at {self.endpoint}") from last_err
--- a/docs/anzograph-readiness-julia.md
+++ b/docs/anzograph-readiness-julia.md
@@ -0,0 +1,371 @@
 # Waiting for AnzoGraph readiness from Julia (how this repo does it)
 This repo runs a Julia pipeline (`julia/main.jl`) against an AnzoGraph SPARQL endpoint. The key problem is that **“container started” ≠ “SPARQL endpoint is ready to accept queries”**.
 So, before the Julia code does anything that depends on SPARQL (like `LOAD <...>` or large `SELECT`s), it explicitly **waits until AnzoGraph is actually responding to a real SPARQL POST request with valid JSON results**.
 This document explains the exact mechanism used here, why it works, and gives copy/paste-ready patterns you can transfer to another project.
 ---
 ## 1) Where the waiting happens (pipeline control flow)
 In `julia/main.jl`, the entrypoint calls:
 ```julia
 # Step 1: Wait for AnzoGraph
 wait_for_anzograph()
 # Step 2: Load TTL file
 result = sparql_update("LOAD <$SPARQL_DATA_FILE>")
 ```
 So the “await” is not a Julia `Task`/`async` wait; it is a **blocking retry loop** that only returns when it can successfully execute a small SPARQL query.
 Reference: `julia/main.jl` defines `wait_for_anzograph()` and calls it from `main()`.
 ---
 ## 2) Why this is needed even with Docker Compose `depends_on`
 This repo’s `docker-compose.yml` includes an AnzoGraph `healthcheck`:
 ```yaml
 anzograph:
  healthcheck:
    test: ["CMD-SHELL", "curl -f http://localhost:8080/sparql || exit 1"]
    interval: 10s
    timeout: 5s
    retries: 30
    start_period: 60s
 ```
 However, `julia-layout` currently depends on `anzograph` with:
 ```yaml
 depends_on:
  anzograph:
    condition: service_started
 ```
 Meaning:
 - Compose will ensure the **container process has started**.
 - Compose does **not** guarantee the AnzoGraph HTTP/SPARQL endpoint is ready (unless you use `service_healthy`, and even then a “healthy GET” is not always equivalent to “SPARQL POST works with auth + JSON”).
 So the Julia code includes its own readiness gate to prevent failures like:
 - TCP connection refused (port not open yet)
 - HTTP endpoint reachable but not fully initialized
 - Non-JSON/HTML error responses while the service is still booting
 ---
 ## 3) What “ready” means in this repo
 In this repo, “AnzoGraph is ready” means:
 1. An HTTP `POST` to `${SPARQL_HOST}/sparql` succeeds, with headers:
   - `Content-Type: application/x-www-form-urlencoded`
   - `Accept: application/sparql-results+json`
   - `Authorization: Basic ...`
 2. The body parses as SPARQL JSON results (`application/sparql-results+json`)
 It does **not** strictly mean:
 - Your dataset is already loaded
 - The loaded data is fully indexed (that can matter in some systems after `LOAD`)
 This repo uses readiness as a **“SPARQL endpoint is alive and speaking the protocol”** check.
 ---
 ## 4) The actual Julia implementation (as in `julia/main.jl`)
 ### 4.1 Configuration (endpoint + auth)
 The Julia script builds endpoint and auth from environment variables:
 ```julia
 const SPARQL_HOST = get(ENV, "SPARQL_HOST", "http://localhost:8080")
 const SPARQL_ENDPOINT = "$SPARQL_HOST/sparql"
 const SPARQL_USER = get(ENV, "SPARQL_USER", "admin")
 const SPARQL_PASS = get(ENV, "SPARQL_PASS", "Passw0rd1")
 const AUTH_HEADER = "Basic " * base64encode("$SPARQL_USER:$SPARQL_PASS")
 ```
 In Docker Compose for this repo, the Julia container overrides `SPARQL_HOST` to use the service DNS name:
 ```yaml
 environment:
  - SPARQL_HOST=http://anzograph:8080
 ```
 ### 4.2 The smoke query used for readiness
 This is the query used in the wait loop:
 ```julia
 const SMOKE_TEST_QUERY = "SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 3"
 ```
 Notes:
 - It’s intentionally small (`LIMIT 3`) to keep the readiness check cheap.
 - It returns *some* bindings when data exists, but **even an empty dataset can still return a valid empty result set**. The code treats “valid response” as ready.
 If you want a readiness check that does not depend on any data being present, an `ASK` query is also common:
 ```sparql
 ASK WHERE { ?s ?p ?o }
 ```
 ### 4.3 SPARQL query function (request + minimal retry)
 `sparql_query(query; retries=...)` is a generic helper that makes SPARQL POST requests:
 ```julia
 function sparql_query(query::String; retries::Int=5)::SparqlResult
    for attempt in 1:retries
        try
            response = HTTP.post(
                SPARQL_ENDPOINT,
                [
                    "Content-Type" => "application/x-www-form-urlencoded",
                    "Accept" => "application/sparql-results+json",
                    "Authorization" => AUTH_HEADER
                ];
                body = "query=" * HTTP.URIs.escapeuri(query)
            )
            if response.status == 200
                json = JSON.parse(String(response.body))
                return SparqlResult(json["results"]["bindings"])
            elseif response.status >= 500 && attempt < retries
                sleep(10)
                continue
            else
                error("SPARQL query failed with status $(response.status)")
            end
        catch e
            if attempt < retries
                sleep(10)
                continue
            end
            rethrow(e)
        end
    end
    error("SPARQL query failed after $retries attempts")
 end
 ```
 Important behaviors to preserve when transferring:
 - It uses **POST** (not GET) to the SPARQL endpoint.
 - It requires a **200** response and successfully parses SPARQL JSON results.
 - It retries on:
  - `>= 500` server errors
  - network / protocol / parsing errors (caught exceptions)
 ### 4.4 The readiness gate: `wait_for_anzograph`
 This is the “await until ready” logic:
 ```julia
 function wait_for_anzograph(max_retries::Int=30)::Bool
    println("Waiting for AnzoGraph at $SPARQL_ENDPOINT...")
    for attempt in 1:max_retries
        try
            smoke_result = sparql_query(SMOKE_TEST_QUERY; retries=1)
            println("  AnzoGraph is ready (attempt $attempt, smoke rows=$(length(smoke_result.bindings)))")
            return true
        catch e
            println("  Attempt $attempt/$max_retries: $(typeof(e))")
            sleep(4)
        end
    end
    error("AnzoGraph not available after $max_retries attempts")
 end
 ```
 Why it calls `sparql_query(...; retries=1)`:
 - It makes each outer “readiness attempt” a **single** request.
 - The outer loop controls cadence (`sleep(4)`) and total wait time.
 - This avoids “nested retry loops” (inner sleeps + outer sleeps) that can make waits much longer than intended.
 Time bound in the current implementation:
 - `max_retries = 30`
 - `sleep(4)` between attempts
 - Roughly ~120 seconds of waiting (plus request time).
 ---
 ## 5) What failures cause it to keep waiting
 `wait_for_anzograph()` catches any exception thrown by `sparql_query()` and retries. In practice, that includes:
 - **Connection errors** (DNS not ready, connection refused, etc.)
 - **Timeouts** (if HTTP request takes too long and the library throws)
 - **Non-200 HTTP statuses** that cause `error(...)`
 - **Non-JSON / unexpected JSON** responses causing `JSON.parse(...)` to throw
 That last point is a big reason a “real SPARQL request + parse” is stronger than just “ping the port”.
 ---
 ## 6) Transferable, self-contained version (recommended pattern)
 If you want to reuse this in another project, it’s usually easier to:
 - avoid globals,
 - make endpoint/auth explicit,
 - use a **time-based timeout** instead of `max_retries` (more robust),
 - add request timeouts so the wait loop can’t hang forever on a single request.
 Below is a drop-in module you can copy into your project.
 ```julia
 module AnzoGraphReady
 using HTTP
 using JSON
 using Base64
 using Dates
 struct SparqlResult
    bindings::Vector{Dict{String, Any}}
 end
 function basic_auth_header(user::AbstractString, pass::AbstractString)::String
    return "Basic " * base64encode("$(user):$(pass)")
 end
 function sparql_query(
    endpoint::AbstractString,
    auth_header::AbstractString,
    query::AbstractString;
    retries::Int = 1,
    retry_sleep_s::Real = 2,
    request_timeout_s::Real = 15,
 )::SparqlResult
    for attempt in 1:retries
        try
            response = HTTP.post(
                String(endpoint),
                [
                    "Content-Type" => "application/x-www-form-urlencoded",
                    "Accept" => "application/sparql-results+json",
                    "Authorization" => auth_header,
                ];
                body = "query=" * HTTP.URIs.escapeuri(String(query)),
                readtimeout = request_timeout_s,
            )
            if response.status != 200
                error("SPARQL query failed with status $(response.status)")
            end
            parsed = JSON.parse(String(response.body))
            bindings = get(get(parsed, "results", Dict()), "bindings", Any[])
            return SparqlResult(Vector{Dict{String, Any}}(bindings))
        catch e
            if attempt < retries
                sleep(retry_sleep_s)
                continue
            end
            rethrow(e)
        end
    end
    error("sparql_query: unreachable")
 end
 """
 Wait until AnzoGraph responds to a real SPARQL POST with parseable JSON.
 This is the direct analog of this repo's `wait_for_anzograph()`, but with:
 - a time-based timeout (`timeout`)
 - a request timeout per attempt (`request_timeout_s`)
 - simple exponential backoff
 """
 function wait_for_anzograph(
    endpoint::AbstractString,
    auth_header::AbstractString;
    timeout::Period = Minute(3),
    initial_delay_s::Real = 0.5,
    max_delay_s::Real = 5.0,
    request_timeout_s::Real = 10.0,
    query::AbstractString = "ASK WHERE { ?s ?p ?o }",
 )::Nothing
    deadline = now() + timeout
    delay_s = initial_delay_s
    while now() < deadline
        try
            # A single attempt: if it succeeds, we declare "ready".
            sparql_query(
                endpoint,
                auth_header,
                query;
                retries = 1,
                request_timeout_s = request_timeout_s,
            )
            return
        catch
            sleep(delay_s)
            delay_s = min(max_delay_s, delay_s * 1.5)
        end
    end
    error("AnzoGraph not available before timeout=$(timeout)")
 end
 end # module
 ```
 Typical usage (matching this repo’s environment variables):
 ```julia
 using .AnzoGraphReady
 sparql_host = get(ENV, "SPARQL_HOST", "http://localhost:8080")
 endpoint = "$(sparql_host)/sparql"
 user = get(ENV, "SPARQL_USER", "admin")
 pass = get(ENV, "SPARQL_PASS", "Passw0rd1")
 auth = AnzoGraphReady.basic_auth_header(user, pass)
 AnzoGraphReady.wait_for_anzograph(endpoint, auth; timeout=Minute(5))
 # Now it is safe to LOAD / query.
 ```
 ---
 ## 7) Optional: waiting for “data is ready” after `LOAD`
 Some systems accept `LOAD` but need time before results show up reliably (indexing / transaction visibility).
 If you run into that in your other project, add a second gate after `LOAD`, for example:
 1) load, then
 2) poll a query that must be true after load (e.g., “triple count > 0”, or a known IRI exists).
 Example “post-load gate”:
 ```julia
 post_load_query = """
 SELECT (COUNT(*) AS ?n)
 WHERE { ?s ?p ?o }
 """
 res = AnzoGraphReady.sparql_query(endpoint, auth, post_load_query; retries=1)
 # Parse `?n` out of bindings and require it to be > 0; retry until it is.
 ```
 (This repo does not currently enforce “non-empty”; it only enforces “SPARQL is working”.)
 ---
 ## 8) Practical checklist when transferring to another project
 - Make readiness checks hit the **real SPARQL POST** path you will use in production.
 - Require a **valid JSON parse**, not just “port open”.
 - Add **per-request timeouts**, so a single hung request cannot hang the whole pipeline.
 - Prefer **time-based overall timeout** for predictable behavior in CI.
 - Keep the query **cheap** (`ASK` or `LIMIT 1/3`).
 - If you use Docker Compose healthchecks, consider also using `depends_on: condition: service_healthy`, but still keep the in-app wait as a safety net (it’s closer to the real contract your code needs).