Reorganiza backend

2026-03-02 17:33:45 -03:00
parent bba0ae887d
commit d4bfa5f064
8 changed files with 419 additions and 124 deletions
--- a/backend/app/pipelines/graph_snapshot.py
+++ b/backend/app/pipelines/graph_snapshot.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+from ..graph_export import edge_retrieval_query, graph_from_sparql_bindings
+from ..models import GraphResponse
+from ..sparql_engine import SparqlEngine
+from ..settings import Settings
+from .layout_spiral import spiral_positions
+
+
+async def fetch_graph_snapshot(
+    sparql: SparqlEngine,
+    *,
+    settings: Settings,
+    node_limit: int,
+    edge_limit: int,
+) -> GraphResponse:
+    """
+    Fetch a graph snapshot (nodes + edges) via SPARQL, independent of whether the
+    underlying engine is RDFLib or AnzoGraph.
+    """
+    edges_q = edge_retrieval_query(edge_limit=edge_limit, include_bnodes=settings.include_bnodes)
+    res = await sparql.query_json(edges_q)
+    bindings = (((res.get("results") or {}).get("bindings")) or [])
+    nodes, edges = graph_from_sparql_bindings(
+        bindings,
+        node_limit=node_limit,
+        include_bnodes=settings.include_bnodes,
+    )
+
+    # Add positions so the frontend doesn't need to run a layout.
+    xs, ys = spiral_positions(len(nodes))
+    for i, node in enumerate(nodes):
+        node["x"] = float(xs[i])
+        node["y"] = float(ys[i])
+
+    meta = GraphResponse.Meta(
+        backend=sparql.name,
+        ttl_path=settings.ttl_path if settings.graph_backend == "rdflib" else None,
+        sparql_endpoint=settings.effective_sparql_endpoint() if settings.graph_backend == "anzograph" else None,
+        include_bnodes=settings.include_bnodes,
+        node_limit=node_limit,
+        edge_limit=edge_limit,
+        nodes=len(nodes),
+        edges=len(edges),
+    )
+    return GraphResponse(nodes=nodes, edges=edges, meta=meta)
--- a/backend/app/pipelines/layout_spiral.py
+++ b/backend/app/pipelines/layout_spiral.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+import math
+
+
+def spiral_positions(n: int, *, max_r: float = 5000.0) -> tuple[list[float], list[float]]:
+    """
+    Deterministic "sunflower" (golden-angle) spiral layout.
+
+    This is intentionally simple and stable across runs:
+    - angle increments by the golden angle to avoid radial spokes
+    - radius grows with sqrt(i) to keep density roughly uniform over area
+    """
+    if n <= 0:
+        return ([], [])
+
+    xs = [0.0] * n
+    ys = [0.0] * n
+
+    golden = math.pi * (3.0 - math.sqrt(5.0))
+    denom = float(max(1, n - 1))
+
+    for i in range(n):
+        t = i * golden
+        r = math.sqrt(i / denom) * max_r
+        xs[i] = r * math.cos(t)
+        ys[i] = r * math.sin(t)
+
+    return xs, ys
+
--- a/backend/app/pipelines/snapshot_service.py
+++ b/backend/app/pipelines/snapshot_service.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+import asyncio
+from dataclasses import dataclass
+
+from ..models import GraphResponse
+from ..sparql_engine import SparqlEngine
+from ..settings import Settings
+from .graph_snapshot import fetch_graph_snapshot
+
+
+@dataclass(frozen=True)
+class SnapshotKey:
+    node_limit: int
+    edge_limit: int
+    include_bnodes: bool
+
+
+class GraphSnapshotService:
+    """
+    Caches graph snapshots so the backend doesn't re-run expensive SPARQL for stats/graph.
+    """
+
+    def __init__(self, *, sparql: SparqlEngine, settings: Settings):
+        self._sparql = sparql
+        self._settings = settings
+
+        self._cache: dict[SnapshotKey, GraphResponse] = {}
+        self._locks: dict[SnapshotKey, asyncio.Lock] = {}
+        self._global_lock = asyncio.Lock()
+
+    async def get(self, *, node_limit: int, edge_limit: int) -> GraphResponse:
+        key = SnapshotKey(
+            node_limit=node_limit,
+            edge_limit=edge_limit,
+            include_bnodes=self._settings.include_bnodes,
+        )
+
+        cached = self._cache.get(key)
+        if cached is not None:
+            return cached
+
+        # Create/get a per-key lock under a global lock to avoid races.
+        async with self._global_lock:
+            lock = self._locks.get(key)
+            if lock is None:
+                lock = asyncio.Lock()
+                self._locks[key] = lock
+
+        async with lock:
+            cached2 = self._cache.get(key)
+            if cached2 is not None:
+                return cached2
+
+            snapshot = await fetch_graph_snapshot(
+                self._sparql,
+                settings=self._settings,
+                node_limit=node_limit,
+                edge_limit=edge_limit,
+            )
+            self._cache[key] = snapshot
+            return snapshot
+