97 lines
2.9 KiB
Python
97 lines
2.9 KiB
Python
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
from pathlib import Path
|
|
from urllib.parse import unquote, urlparse
|
|
|
|
from rdflib import Graph
|
|
from rdflib.namespace import OWL
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _is_http_url(location: str) -> bool:
|
|
scheme = urlparse(location).scheme.lower()
|
|
return scheme in {"http", "https"}
|
|
|
|
|
|
def _is_file_uri(location: str) -> bool:
|
|
return urlparse(location).scheme.lower() == "file"
|
|
|
|
|
|
def _file_uri_to_path(location: str) -> Path:
|
|
u = urlparse(location)
|
|
if u.scheme.lower() != "file":
|
|
raise ValueError(f"Not a file:// URI: {location!r}")
|
|
return Path(unquote(u.path))
|
|
|
|
|
|
def resolve_output_location(
|
|
entry_location: str,
|
|
*,
|
|
output_location: str | None,
|
|
output_name: str,
|
|
) -> str:
|
|
if output_location:
|
|
return output_location
|
|
|
|
if _is_http_url(entry_location):
|
|
raise ValueError(
|
|
"COMBINE_ENTRY_LOCATION points to an http(s) URL; set COMBINE_OUTPUT_LOCATION to a writable file path."
|
|
)
|
|
|
|
entry_path = _file_uri_to_path(entry_location) if _is_file_uri(entry_location) else Path(entry_location)
|
|
return str(entry_path.parent / output_name)
|
|
|
|
|
|
def _output_destination_to_path(output_location: str) -> Path:
|
|
if _is_file_uri(output_location):
|
|
return _file_uri_to_path(output_location)
|
|
if _is_http_url(output_location):
|
|
raise ValueError("Output location must be a local file path (or file:// URI), not http(s).")
|
|
return Path(output_location)
|
|
|
|
|
|
def output_location_to_path(output_location: str) -> Path:
|
|
return _output_destination_to_path(output_location)
|
|
|
|
|
|
def build_combined_graph(entry_location: str) -> Graph:
|
|
"""
|
|
Recursively loads an RDF document (file path, file:// URI, or http(s) URL) and its
|
|
owl:imports into a single in-memory graph.
|
|
"""
|
|
combined_graph = Graph()
|
|
visited_locations: set[str] = set()
|
|
|
|
def resolve_imports(location: str) -> None:
|
|
if location in visited_locations:
|
|
return
|
|
visited_locations.add(location)
|
|
|
|
logger.info("Loading ontology: %s", location)
|
|
try:
|
|
combined_graph.parse(location=location)
|
|
except Exception as e:
|
|
logger.warning("Failed to load %s (%s)", location, e)
|
|
return
|
|
|
|
imports = [str(o) for _, _, o in combined_graph.triples((None, OWL.imports, None))]
|
|
for imported_location in imports:
|
|
if imported_location not in visited_locations:
|
|
resolve_imports(imported_location)
|
|
|
|
resolve_imports(entry_location)
|
|
return combined_graph
|
|
|
|
|
|
def serialize_graph_to_ttl(graph: Graph, output_location: str) -> None:
|
|
output_path = _output_destination_to_path(output_location)
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
tmp_path = output_path.with_suffix(output_path.suffix + ".tmp")
|
|
graph.serialize(destination=str(tmp_path), format="turtle")
|
|
os.replace(str(tmp_path), str(output_path))
|