from __future__ import annotations import logging import os from pathlib import Path from urllib.parse import unquote, urlparse from rdflib import Graph from rdflib.namespace import OWL logger = logging.getLogger(__name__) def _is_http_url(location: str) -> bool: scheme = urlparse(location).scheme.lower() return scheme in {"http", "https"} def _is_file_uri(location: str) -> bool: return urlparse(location).scheme.lower() == "file" def _file_uri_to_path(location: str) -> Path: u = urlparse(location) if u.scheme.lower() != "file": raise ValueError(f"Not a file:// URI: {location!r}") return Path(unquote(u.path)) def resolve_output_location( entry_location: str, *, output_location: str | None, output_name: str, ) -> str: if output_location: return output_location if _is_http_url(entry_location): raise ValueError( "COMBINE_ENTRY_LOCATION points to an http(s) URL; set COMBINE_OUTPUT_LOCATION to a writable file path." ) entry_path = _file_uri_to_path(entry_location) if _is_file_uri(entry_location) else Path(entry_location) return str(entry_path.parent / output_name) def _output_destination_to_path(output_location: str) -> Path: if _is_file_uri(output_location): return _file_uri_to_path(output_location) if _is_http_url(output_location): raise ValueError("Output location must be a local file path (or file:// URI), not http(s).") return Path(output_location) def output_location_to_path(output_location: str) -> Path: return _output_destination_to_path(output_location) def build_combined_graph(entry_location: str) -> Graph: """ Recursively loads an RDF document (file path, file:// URI, or http(s) URL) and its owl:imports into a single in-memory graph. """ combined_graph = Graph() visited_locations: set[str] = set() def resolve_imports(location: str) -> None: if location in visited_locations: return visited_locations.add(location) logger.info("Loading ontology: %s", location) try: combined_graph.parse(location=location) except Exception as e: logger.warning("Failed to load %s (%s)", location, e) return imports = [str(o) for _, _, o in combined_graph.triples((None, OWL.imports, None))] for imported_location in imports: if imported_location not in visited_locations: resolve_imports(imported_location) resolve_imports(entry_location) return combined_graph def serialize_graph_to_ttl(graph: Graph, output_location: str) -> None: output_path = _output_destination_to_path(output_location) output_path.parent.mkdir(parents=True, exist_ok=True) tmp_path = output_path.with_suffix(output_path.suffix + ".tmp") graph.serialize(destination=str(tmp_path), format="turtle") os.replace(str(tmp_path), str(output_path))