diff --git a/backend/app/README.md b/backend/app/README.md index 3f4afec..0d44d7e 100644 --- a/backend/app/README.md +++ b/backend/app/README.md @@ -2,12 +2,8 @@ This folder contains the FastAPI backend for `visualizador_instanciados`. -The backend can execute SPARQL queries in two interchangeable ways: - -1. **`GRAPH_BACKEND=rdflib`**: parse a Turtle file into an in-memory RDFLib `Graph` and run SPARQL queries locally. -2. **`GRAPH_BACKEND=anzograph`**: run SPARQL queries against an AnzoGraph SPARQL endpoint over HTTP (optionally `LOAD` a TTL on startup). - -Callers (frontend or other clients) interact with a single API surface (`/api/*`) and do not need to know which backend is configured. +The backend executes SPARQL queries against an AnzoGraph SPARQL endpoint over HTTP +(optionally `LOAD` a TTL on startup). ## Files @@ -16,10 +12,9 @@ Callers (frontend or other clients) interact with a single API surface (`/api/*` - `settings.py` - Env-driven configuration (`pydantic-settings`). - `sparql_engine.py` - - Backend-agnostic SPARQL execution layer: - - `RdflibEngine`: `Graph.query(...)` + SPARQL JSON serialization. + - SPARQL execution layer: - `AnzoGraphEngine`: HTTP POST to `/sparql` with Basic auth + readiness gate. - - `create_sparql_engine(settings)` chooses the engine based on `GRAPH_BACKEND`. + - `create_sparql_engine(settings)` creates the engine. - `graph_export.py` - Shared helpers to: - build the snapshot SPARQL query used for edge retrieval @@ -27,11 +22,8 @@ Callers (frontend or other clients) interact with a single API surface (`/api/*` - `models.py` - Pydantic response/request models: - `Node`, `Edge`, `GraphResponse`, `StatsResponse`, etc. -- `rdf_store.py` - - A local parsed representation (dense IDs + neighbor-ish data) built only in `GRAPH_BACKEND=rdflib`. - - Used by `/api/nodes`, `/api/edges`, and `rdflib`-mode `/api/stats`. - `pipelines/graph_snapshot.py` - - Pipeline used by `/api/graph` to return a `{nodes, edges}` snapshot via SPARQL (works for both RDFLib and AnzoGraph). + - Pipeline used by `/api/graph` to return a `{nodes, edges}` snapshot via SPARQL. - `pipelines/layout_dag_radial.py` - DAG layout helpers used by `pipelines/graph_snapshot.py`: - cycle detection @@ -48,11 +40,10 @@ On startup (FastAPI lifespan): 1. `create_sparql_engine(settings)` selects and starts a SPARQL engine. 2. The engine is stored at `app.state.sparql`. -3. If `GRAPH_BACKEND=rdflib`, `RDFStore` is also built from the already-loaded RDFLib graph and stored at `app.state.store`. On shutdown: -- `app.state.sparql.shutdown()` is called to close the HTTP client (AnzoGraph mode) or no-op (RDFLib mode). +- `app.state.sparql.shutdown()` is called to close the HTTP client. ## Environment Variables @@ -60,20 +51,16 @@ Most configuration is intended to be provided via container environment variable Core: -- `GRAPH_BACKEND`: `rdflib` or `anzograph` - `INCLUDE_BNODES`: `true`/`false` - `CORS_ORIGINS`: comma-separated list or `*` -RDFLib mode: +Optional import-combining step (separate container): -- `TTL_PATH`: path inside the backend container to a `.ttl` file (example: `/data/o3po.ttl`) -- `MAX_TRIPLES`: optional int; if set, stops parsing after this many triples +The repo's `owl_imports_combiner` Docker service can be used to recursively load a Turtle file (or URL) plus its `owl:imports` into a single combined TTL output. -Optional import-combining step (runs before the SPARQL engine starts): - -- `COMBINE_OWL_IMPORTS_ON_START`: `true` to recursively load `TTL_PATH` (or `COMBINE_ENTRY_LOCATION`) plus `owl:imports` and write a combined TTL file. -- `COMBINE_ENTRY_LOCATION`: optional override for the entry file/URL to load (defaults to `TTL_PATH`) -- `COMBINE_OUTPUT_LOCATION`: optional explicit output path (defaults to `${dirname(entry)}/${COMBINE_OUTPUT_NAME}`) +- `COMBINE_OWL_IMPORTS_ON_START`: `true` to run the combiner container on startup (no-op when `false`) +- `COMBINE_ENTRY_LOCATION`: entry file/URL to load (falls back to `TTL_PATH` if not set) +- `COMBINE_OUTPUT_LOCATION`: output path for the combined TTL (defaults to `${dirname(entry)}/${COMBINE_OUTPUT_NAME}`) - `COMBINE_OUTPUT_NAME`: output filename when `COMBINE_OUTPUT_LOCATION` is not set (default: `combined_ontology.ttl`) - `COMBINE_FORCE`: `true` to rebuild even if the output file already exists @@ -119,8 +106,6 @@ This matches the behavior described in `docs/anzograph-readiness-julia.md`. - `GET /api/graph?node_limit=...&edge_limit=...` - Returns a graph snapshot as `{ nodes: [...], edges: [...] }`. - Implemented as a SPARQL edge query + mapping in `pipelines/graph_snapshot.py`. -- `GET /api/nodes`, `GET /api/edges` - - Only available in `GRAPH_BACKEND=rdflib` (these use `RDFStore`'s dense ID tables). ## Data Contract @@ -193,5 +178,4 @@ If a cycle is detected in the returned `rdfs:subClassOf` snapshot, `/api/graph` ## Notes / Tradeoffs - `/api/graph` returns only nodes that appear in the returned edge result set. Nodes not referenced by those edges will not be present. -- RDFLib and AnzoGraph may differ in supported SPARQL features (vendor extensions, inference, performance), but the API surface is the same. -- `rdf_store.py` is currently only needed for `/api/nodes`, `/api/edges`, and rdflib-mode `/api/stats`. If you don't use those endpoints, it can be removed later. +- AnzoGraph SPARQL feature support (inference, extensions, performance) is vendor-specific. diff --git a/backend/app/main.py b/backend/app/main.py index d8f67c8..d501463 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,81 +1,34 @@ from __future__ import annotations from contextlib import asynccontextmanager -import logging -import asyncio from fastapi import FastAPI, HTTPException, Query from fastapi.middleware.cors import CORSMiddleware from .models import ( - EdgesResponse, GraphResponse, NeighborsRequest, NeighborsResponse, - NodesResponse, SparqlQueryRequest, StatsResponse, ) from .pipelines.layout_dag_radial import CycleError -from .pipelines.owl_imports_combiner import ( - build_combined_graph, - output_location_to_path, - resolve_output_location, - serialize_graph_to_ttl, -) from .pipelines.selection_neighbors import fetch_neighbor_ids_for_selection from .pipelines.snapshot_service import GraphSnapshotService -from .rdf_store import RDFStore -from .sparql_engine import RdflibEngine, SparqlEngine, create_sparql_engine +from .sparql_engine import SparqlEngine, create_sparql_engine from .settings import Settings settings = Settings() -logger = logging.getLogger(__name__) @asynccontextmanager async def lifespan(app: FastAPI): - rdflib_preloaded_graph = None - - if settings.combine_owl_imports_on_start: - entry_location = settings.combine_entry_location or settings.ttl_path - output_location = resolve_output_location( - entry_location, - output_location=settings.combine_output_location, - output_name=settings.combine_output_name, - ) - - output_path = output_location_to_path(output_location) - if output_path.exists() and not settings.combine_force: - logger.info("Skipping combine step (output exists): %s", output_location) - else: - rdflib_preloaded_graph = await asyncio.to_thread(build_combined_graph, entry_location) - logger.info("Finished combining imports; serializing to: %s", output_location) - await asyncio.to_thread(serialize_graph_to_ttl, rdflib_preloaded_graph, output_location) - - if settings.graph_backend == "rdflib": - settings.ttl_path = str(output_path) - - sparql: SparqlEngine = create_sparql_engine(settings, rdflib_graph=rdflib_preloaded_graph) + sparql: SparqlEngine = create_sparql_engine(settings) await sparql.startup() app.state.sparql = sparql app.state.snapshot_service = GraphSnapshotService(sparql=sparql, settings=settings) - # Only build node/edge tables when running in rdflib mode. - if settings.graph_backend == "rdflib": - assert isinstance(sparql, RdflibEngine) - if sparql.graph is None: - raise RuntimeError("rdflib graph failed to load") - - store = RDFStore( - ttl_path=settings.ttl_path, - include_bnodes=settings.include_bnodes, - max_triples=settings.max_triples, - ) - store.load(sparql.graph) - app.state.store = store - yield await sparql.shutdown() @@ -109,7 +62,7 @@ async def stats() -> StatsResponse: meta = snap.meta return StatsResponse( backend=meta.backend if meta else app.state.sparql.name, - ttl_path=meta.ttl_path if meta and meta.ttl_path else settings.ttl_path, + ttl_path=meta.ttl_path if meta else None, sparql_endpoint=meta.sparql_endpoint if meta else None, parsed_triples=len(snap.edges), nodes=len(snap.nodes), @@ -138,28 +91,6 @@ async def neighbors(req: NeighborsRequest) -> NeighborsResponse: return NeighborsResponse(selected_ids=req.selected_ids, neighbor_ids=neighbor_ids) -@app.get("/api/nodes", response_model=NodesResponse) -def nodes( - limit: int = Query(default=10_000, ge=1, le=200_000), - offset: int = Query(default=0, ge=0), -) -> NodesResponse: - if settings.graph_backend != "rdflib": - raise HTTPException(status_code=501, detail="GET /api/nodes is only supported in GRAPH_BACKEND=rdflib mode") - store: RDFStore = app.state.store - return NodesResponse(total=store.node_count, nodes=store.node_slice(offset=offset, limit=limit)) - - -@app.get("/api/edges", response_model=EdgesResponse) -def edges( - limit: int = Query(default=50_000, ge=1, le=500_000), - offset: int = Query(default=0, ge=0), -) -> EdgesResponse: - if settings.graph_backend != "rdflib": - raise HTTPException(status_code=501, detail="GET /api/edges is only supported in GRAPH_BACKEND=rdflib mode") - store: RDFStore = app.state.store - return EdgesResponse(total=store.edge_count, edges=store.edge_slice(offset=offset, limit=limit)) - - @app.get("/api/graph", response_model=GraphResponse) async def graph( node_limit: int = Query(default=50_000, ge=1, le=200_000), diff --git a/backend/app/models.py b/backend/app/models.py index 8b662a7..a0f4e8a 100644 --- a/backend/app/models.py +++ b/backend/app/models.py @@ -8,7 +8,7 @@ class Node(BaseModel): termType: str # "uri" | "bnode" iri: str label: str | None = None - # Optional because /api/nodes (RDFStore) doesn't currently provide positions. + # Optional because some endpoints may omit positions. x: float | None = None y: float | None = None @@ -21,23 +21,13 @@ class Edge(BaseModel): class StatsResponse(BaseModel): backend: str - ttl_path: str + ttl_path: str | None = None sparql_endpoint: str | None = None parsed_triples: int nodes: int edges: int -class NodesResponse(BaseModel): - total: int - nodes: list[Node] - - -class EdgesResponse(BaseModel): - total: int - edges: list[Edge] - - class GraphResponse(BaseModel): class Meta(BaseModel): backend: str diff --git a/backend/app/pipelines/graph_snapshot.py b/backend/app/pipelines/graph_snapshot.py index f8211be..9d77c4f 100644 --- a/backend/app/pipelines/graph_snapshot.py +++ b/backend/app/pipelines/graph_snapshot.py @@ -69,8 +69,7 @@ async def fetch_graph_snapshot( edge_limit: int, ) -> GraphResponse: """ - Fetch a graph snapshot (nodes + edges) via SPARQL, independent of whether the - underlying engine is RDFLib or AnzoGraph. + Fetch a graph snapshot (nodes + edges) via SPARQL. """ edges_q = edge_retrieval_query(edge_limit=edge_limit, include_bnodes=settings.include_bnodes) res = await sparql.query_json(edges_q) @@ -137,8 +136,8 @@ async def fetch_graph_snapshot( meta = GraphResponse.Meta( backend=sparql.name, - ttl_path=settings.ttl_path if settings.graph_backend == "rdflib" else None, - sparql_endpoint=settings.effective_sparql_endpoint() if settings.graph_backend == "anzograph" else None, + ttl_path=None, + sparql_endpoint=settings.effective_sparql_endpoint(), include_bnodes=settings.include_bnodes, node_limit=node_limit, edge_limit=edge_limit, diff --git a/backend/app/rdf_store.py b/backend/app/rdf_store.py deleted file mode 100644 index 0e2fa16..0000000 --- a/backend/app/rdf_store.py +++ /dev/null @@ -1,150 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import Any - -from rdflib import BNode, Graph, Literal, URIRef -from rdflib.namespace import RDFS, SKOS - - -LABEL_PREDICATES = {RDFS.label, SKOS.prefLabel, SKOS.altLabel} - - -@dataclass(frozen=True) -class EdgeRow: - source: int - target: int - predicate: str - - -class RDFStore: - def __init__(self, *, ttl_path: str, include_bnodes: bool, max_triples: int | None): - self.ttl_path = ttl_path - self.include_bnodes = include_bnodes - self.max_triples = max_triples - - self.graph: Graph | None = None - - self._id_by_term: dict[Any, int] = {} - self._term_by_id: list[Any] = [] - - self._labels_by_id: dict[int, str] = {} - self._edges: list[EdgeRow] = [] - self._parsed_triples = 0 - - def _term_allowed(self, term: Any) -> bool: - if isinstance(term, Literal): - return False - if isinstance(term, BNode) and not self.include_bnodes: - return False - return isinstance(term, (URIRef, BNode)) - - def _get_id(self, term: Any) -> int | None: - if not self._term_allowed(term): - return None - existing = self._id_by_term.get(term) - if existing is not None: - return existing - nid = len(self._term_by_id) - self._id_by_term[term] = nid - self._term_by_id.append(term) - return nid - - def _term_type(self, term: Any) -> str: - if isinstance(term, BNode): - return "bnode" - return "uri" - - def _term_iri(self, term: Any) -> str: - if isinstance(term, BNode): - return f"_:{term}" - return str(term) - - def load(self, graph: Graph | None = None) -> None: - g = graph or Graph() - if graph is None: - g.parse(self.ttl_path, format="turtle") - self.graph = g - - self._id_by_term.clear() - self._term_by_id.clear() - self._labels_by_id.clear() - self._edges.clear() - - parsed = 0 - for (s, p, o) in g: - parsed += 1 - if self.max_triples is not None and parsed > self.max_triples: - break - - # Capture labels but do not emit them as edges. - if p in LABEL_PREDICATES and isinstance(o, Literal): - sid = self._get_id(s) - if sid is not None and sid not in self._labels_by_id: - self._labels_by_id[sid] = str(o) - continue - - sid = self._get_id(s) - oid = self._get_id(o) - if sid is None or oid is None: - continue - - self._edges.append(EdgeRow(source=sid, target=oid, predicate=str(p))) - - self._parsed_triples = parsed - - @property - def parsed_triples(self) -> int: - return self._parsed_triples - - @property - def node_count(self) -> int: - return len(self._term_by_id) - - @property - def edge_count(self) -> int: - return len(self._edges) - - def node_slice(self, *, offset: int, limit: int) -> list[dict[str, Any]]: - end = min(self.node_count, offset + limit) - out: list[dict[str, Any]] = [] - for nid in range(offset, end): - term = self._term_by_id[nid] - out.append( - { - "id": nid, - "termType": self._term_type(term), - "iri": self._term_iri(term), - "label": self._labels_by_id.get(nid), - } - ) - return out - - def edge_slice(self, *, offset: int, limit: int) -> list[dict[str, Any]]: - end = min(self.edge_count, offset + limit) - out: list[dict[str, Any]] = [] - for row in self._edges[offset:end]: - out.append( - { - "source": row.source, - "target": row.target, - "predicate": row.predicate, - } - ) - return out - - def edges_within_nodes(self, *, max_node_id_exclusive: int, limit: int) -> list[dict[str, Any]]: - out: list[dict[str, Any]] = [] - for row in self._edges: - if row.source >= max_node_id_exclusive or row.target >= max_node_id_exclusive: - continue - out.append( - { - "source": row.source, - "target": row.target, - "predicate": row.predicate, - } - ) - if len(out) >= limit: - break - return out diff --git a/backend/app/settings.py b/backend/app/settings.py index 9e49b18..b431615 100644 --- a/backend/app/settings.py +++ b/backend/app/settings.py @@ -1,27 +1,11 @@ from __future__ import annotations -from typing import Literal - from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict class Settings(BaseSettings): - # Which graph engine executes SPARQL queries. - # - rdflib: parse TTL locally and query in-memory - # - anzograph: query a remote AnzoGraph SPARQL endpoint (optionally LOAD on startup) - graph_backend: Literal["rdflib", "anzograph"] = Field(default="rdflib", alias="GRAPH_BACKEND") - - ttl_path: str = Field(default="/data/o3po.ttl", alias="TTL_PATH") include_bnodes: bool = Field(default=False, alias="INCLUDE_BNODES") - max_triples: int | None = Field(default=None, alias="MAX_TRIPLES") - - # Optional: Combine owl:imports into a single TTL file on backend startup. - combine_owl_imports_on_start: bool = Field(default=False, alias="COMBINE_OWL_IMPORTS_ON_START") - combine_entry_location: str | None = Field(default=None, alias="COMBINE_ENTRY_LOCATION") - combine_output_location: str | None = Field(default=None, alias="COMBINE_OUTPUT_LOCATION") - combine_output_name: str = Field(default="combined_ontology.ttl", alias="COMBINE_OUTPUT_NAME") - combine_force: bool = Field(default=False, alias="COMBINE_FORCE") # AnzoGraph / SPARQL endpoint configuration sparql_host: str = Field(default="http://anzograph:8080", alias="SPARQL_HOST") diff --git a/backend/app/sparql_engine.py b/backend/app/sparql_engine.py index 75a3559..7d71fec 100644 --- a/backend/app/sparql_engine.py +++ b/backend/app/sparql_engine.py @@ -2,11 +2,9 @@ from __future__ import annotations import asyncio import base64 -import json from typing import Any, Protocol import httpx -from rdflib import Graph from .settings import Settings @@ -21,35 +19,6 @@ class SparqlEngine(Protocol): async def query_json(self, query: str) -> dict[str, Any]: ... -class RdflibEngine: - name = "rdflib" - - def __init__(self, *, ttl_path: str, graph: Graph | None = None): - self.ttl_path = ttl_path - self.graph: Graph | None = graph - - async def startup(self) -> None: - if self.graph is not None: - return - g = Graph() - g.parse(self.ttl_path, format="turtle") - self.graph = g - - async def shutdown(self) -> None: - # Nothing to close for in-memory rdflib graph. - return None - - async def query_json(self, query: str) -> dict[str, Any]: - if self.graph is None: - raise RuntimeError("RdflibEngine not started") - - result = self.graph.query(query) - payload = result.serialize(format="json") - if isinstance(payload, bytes): - payload = payload.decode("utf-8") - return json.loads(payload) - - class AnzoGraphEngine: name = "anzograph" @@ -169,9 +138,5 @@ class AnzoGraphEngine: raise RuntimeError(f"AnzoGraph not ready at {self.endpoint}") from last_err -def create_sparql_engine(settings: Settings, *, rdflib_graph: Graph | None = None) -> SparqlEngine: - if settings.graph_backend == "rdflib": - return RdflibEngine(ttl_path=settings.ttl_path, graph=rdflib_graph) - if settings.graph_backend == "anzograph": - return AnzoGraphEngine(settings=settings) - raise RuntimeError(f"Unsupported GRAPH_BACKEND={settings.graph_backend!r}") +def create_sparql_engine(settings: Settings) -> SparqlEngine: + return AnzoGraphEngine(settings=settings) diff --git a/backend/requirements.txt b/backend/requirements.txt index 987b0f5..f0c51f4 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,5 +1,4 @@ fastapi uvicorn[standard] -rdflib pydantic-settings httpx diff --git a/docker-compose.yml b/docker-compose.yml index 28af26b..2a3103f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,13 +1,22 @@ services: + owl_imports_combiner: + build: ./python_services/owl_imports_combiner + environment: + - COMBINE_OWL_IMPORTS_ON_START=${COMBINE_OWL_IMPORTS_ON_START:-false} + - COMBINE_ENTRY_LOCATION + - COMBINE_OUTPUT_LOCATION + - COMBINE_OUTPUT_NAME + - COMBINE_FORCE=${COMBINE_FORCE:-false} + - TTL_PATH=${TTL_PATH:-/data/o3po.ttl} + volumes: + - ./data:/data:Z + backend: build: ./backend ports: - "8000:8000" environment: - - GRAPH_BACKEND=${GRAPH_BACKEND:-rdflib} - - TTL_PATH=${TTL_PATH:-/data/o3po.ttl} - INCLUDE_BNODES=${INCLUDE_BNODES:-false} - - MAX_TRIPLES - CORS_ORIGINS=${CORS_ORIGINS:-http://localhost:5173} - SPARQL_HOST=${SPARQL_HOST:-http://anzograph:8080} - SPARQL_ENDPOINT @@ -21,14 +30,12 @@ services: - SPARQL_READY_RETRIES=${SPARQL_READY_RETRIES:-30} - SPARQL_READY_DELAY_S=${SPARQL_READY_DELAY_S:-4} - SPARQL_READY_TIMEOUT_S=${SPARQL_READY_TIMEOUT_S:-10} - - COMBINE_OWL_IMPORTS_ON_START=${COMBINE_OWL_IMPORTS_ON_START:-false} - - COMBINE_ENTRY_LOCATION - - COMBINE_OUTPUT_LOCATION - - COMBINE_OUTPUT_NAME - - COMBINE_FORCE=${COMBINE_FORCE:-false} volumes: - ./backend:/app - ./data:/data:Z + depends_on: + - owl_imports_combiner + - anzograph command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload healthcheck: test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/health').read()"] diff --git a/python_services/owl_imports_combiner/Dockerfile b/python_services/owl_imports_combiner/Dockerfile new file mode 100644 index 0000000..96bec4c --- /dev/null +++ b/python_services/owl_imports_combiner/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.12-slim + +WORKDIR /app + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +COPY requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir -r /app/requirements.txt + +COPY owl_imports_combiner.py /app/owl_imports_combiner.py +COPY main.py /app/main.py + +CMD ["python", "/app/main.py"] diff --git a/python_services/owl_imports_combiner/main.py b/python_services/owl_imports_combiner/main.py new file mode 100644 index 0000000..9f65fab --- /dev/null +++ b/python_services/owl_imports_combiner/main.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import logging +import os + +from owl_imports_combiner import ( + build_combined_graph, + output_location_to_path, + resolve_output_location, + serialize_graph_to_ttl, +) + + +logger = logging.getLogger(__name__) + + +def _env_bool(name: str, *, default: bool = False) -> bool: + val = os.getenv(name) + if val is None: + return default + return val.strip().lower() in {"1", "true", "yes", "y", "on"} + + +def main() -> None: + logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO").upper()) + + if not _env_bool("COMBINE_OWL_IMPORTS_ON_START", default=False): + logger.info("Skipping combine step (COMBINE_OWL_IMPORTS_ON_START=false)") + return + + entry_location = os.getenv("COMBINE_ENTRY_LOCATION") or os.getenv("TTL_PATH") + if not entry_location: + raise SystemExit("Set COMBINE_ENTRY_LOCATION (or TTL_PATH) to the ontology file/URL to load.") + + output_name = os.getenv("COMBINE_OUTPUT_NAME", "combined_ontology.ttl") + output_location = resolve_output_location( + entry_location, + output_location=os.getenv("COMBINE_OUTPUT_LOCATION"), + output_name=output_name, + ) + + output_path = output_location_to_path(output_location) + force = _env_bool("COMBINE_FORCE", default=False) + if output_path.exists() and not force: + logger.info("Skipping combine step (output exists): %s", output_location) + return + + graph = build_combined_graph(entry_location) + logger.info("Finished combining imports; serializing to: %s", output_location) + serialize_graph_to_ttl(graph, output_location) + + +if __name__ == "__main__": + main() diff --git a/backend/app/pipelines/owl_imports_combiner.py b/python_services/owl_imports_combiner/owl_imports_combiner.py similarity index 100% rename from backend/app/pipelines/owl_imports_combiner.py rename to python_services/owl_imports_combiner/owl_imports_combiner.py diff --git a/python_services/owl_imports_combiner/requirements.txt b/python_services/owl_imports_combiner/requirements.txt new file mode 100644 index 0000000..f455515 --- /dev/null +++ b/python_services/owl_imports_combiner/requirements.txt @@ -0,0 +1 @@ +rdflib