midpoint - go
This commit is contained in:
14
python_services/owl_imports_combiner/Dockerfile
Normal file
14
python_services/owl_imports_combiner/Dockerfile
Normal file
@@ -0,0 +1,14 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1
|
||||
|
||||
COPY requirements.txt /app/requirements.txt
|
||||
RUN pip install --no-cache-dir -r /app/requirements.txt
|
||||
|
||||
COPY owl_imports_combiner.py /app/owl_imports_combiner.py
|
||||
COPY main.py /app/main.py
|
||||
|
||||
CMD ["python", "/app/main.py"]
|
||||
54
python_services/owl_imports_combiner/main.py
Normal file
54
python_services/owl_imports_combiner/main.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
from owl_imports_combiner import (
|
||||
build_combined_graph,
|
||||
output_location_to_path,
|
||||
resolve_output_location,
|
||||
serialize_graph_to_ttl,
|
||||
)
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _env_bool(name: str, *, default: bool = False) -> bool:
|
||||
val = os.getenv(name)
|
||||
if val is None:
|
||||
return default
|
||||
return val.strip().lower() in {"1", "true", "yes", "y", "on"}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO").upper())
|
||||
|
||||
if not _env_bool("COMBINE_OWL_IMPORTS_ON_START", default=False):
|
||||
logger.info("Skipping combine step (COMBINE_OWL_IMPORTS_ON_START=false)")
|
||||
return
|
||||
|
||||
entry_location = os.getenv("COMBINE_ENTRY_LOCATION") or os.getenv("TTL_PATH")
|
||||
if not entry_location:
|
||||
raise SystemExit("Set COMBINE_ENTRY_LOCATION (or TTL_PATH) to the ontology file/URL to load.")
|
||||
|
||||
output_name = os.getenv("COMBINE_OUTPUT_NAME", "combined_ontology.ttl")
|
||||
output_location = resolve_output_location(
|
||||
entry_location,
|
||||
output_location=os.getenv("COMBINE_OUTPUT_LOCATION"),
|
||||
output_name=output_name,
|
||||
)
|
||||
|
||||
output_path = output_location_to_path(output_location)
|
||||
force = _env_bool("COMBINE_FORCE", default=False)
|
||||
if output_path.exists() and not force:
|
||||
logger.info("Skipping combine step (output exists): %s", output_location)
|
||||
return
|
||||
|
||||
graph = build_combined_graph(entry_location)
|
||||
logger.info("Finished combining imports; serializing to: %s", output_location)
|
||||
serialize_graph_to_ttl(graph, output_location)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
96
python_services/owl_imports_combiner/owl_imports_combiner.py
Normal file
96
python_services/owl_imports_combiner/owl_imports_combiner.py
Normal file
@@ -0,0 +1,96 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from urllib.parse import unquote, urlparse
|
||||
|
||||
from rdflib import Graph
|
||||
from rdflib.namespace import OWL
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _is_http_url(location: str) -> bool:
|
||||
scheme = urlparse(location).scheme.lower()
|
||||
return scheme in {"http", "https"}
|
||||
|
||||
|
||||
def _is_file_uri(location: str) -> bool:
|
||||
return urlparse(location).scheme.lower() == "file"
|
||||
|
||||
|
||||
def _file_uri_to_path(location: str) -> Path:
|
||||
u = urlparse(location)
|
||||
if u.scheme.lower() != "file":
|
||||
raise ValueError(f"Not a file:// URI: {location!r}")
|
||||
return Path(unquote(u.path))
|
||||
|
||||
|
||||
def resolve_output_location(
|
||||
entry_location: str,
|
||||
*,
|
||||
output_location: str | None,
|
||||
output_name: str,
|
||||
) -> str:
|
||||
if output_location:
|
||||
return output_location
|
||||
|
||||
if _is_http_url(entry_location):
|
||||
raise ValueError(
|
||||
"COMBINE_ENTRY_LOCATION points to an http(s) URL; set COMBINE_OUTPUT_LOCATION to a writable file path."
|
||||
)
|
||||
|
||||
entry_path = _file_uri_to_path(entry_location) if _is_file_uri(entry_location) else Path(entry_location)
|
||||
return str(entry_path.parent / output_name)
|
||||
|
||||
|
||||
def _output_destination_to_path(output_location: str) -> Path:
|
||||
if _is_file_uri(output_location):
|
||||
return _file_uri_to_path(output_location)
|
||||
if _is_http_url(output_location):
|
||||
raise ValueError("Output location must be a local file path (or file:// URI), not http(s).")
|
||||
return Path(output_location)
|
||||
|
||||
|
||||
def output_location_to_path(output_location: str) -> Path:
|
||||
return _output_destination_to_path(output_location)
|
||||
|
||||
|
||||
def build_combined_graph(entry_location: str) -> Graph:
|
||||
"""
|
||||
Recursively loads an RDF document (file path, file:// URI, or http(s) URL) and its
|
||||
owl:imports into a single in-memory graph.
|
||||
"""
|
||||
combined_graph = Graph()
|
||||
visited_locations: set[str] = set()
|
||||
|
||||
def resolve_imports(location: str) -> None:
|
||||
if location in visited_locations:
|
||||
return
|
||||
visited_locations.add(location)
|
||||
|
||||
logger.info("Loading ontology: %s", location)
|
||||
try:
|
||||
combined_graph.parse(location=location)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to load %s (%s)", location, e)
|
||||
return
|
||||
|
||||
imports = [str(o) for _, _, o in combined_graph.triples((None, OWL.imports, None))]
|
||||
for imported_location in imports:
|
||||
if imported_location not in visited_locations:
|
||||
resolve_imports(imported_location)
|
||||
|
||||
resolve_imports(entry_location)
|
||||
return combined_graph
|
||||
|
||||
|
||||
def serialize_graph_to_ttl(graph: Graph, output_location: str) -> None:
|
||||
output_path = _output_destination_to_path(output_location)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
tmp_path = output_path.with_suffix(output_path.suffix + ".tmp")
|
||||
graph.serialize(destination=str(tmp_path), format="turtle")
|
||||
os.replace(str(tmp_path), str(output_path))
|
||||
1
python_services/owl_imports_combiner/requirements.txt
Normal file
1
python_services/owl_imports_combiner/requirements.txt
Normal file
@@ -0,0 +1 @@
|
||||
rdflib
|
||||
Reference in New Issue
Block a user