package main import ( "context" "fmt" "log" "runtime" "runtime/debug" "sort" "strings" "time" graphqueries "visualizador_instanciados/backend_go/graph_queries" "visualizador_instanciados/backend_go/queryscope" ) const ( rdfsLabelIRI = "http://www.w3.org/2000/01/rdf-schema#label" ) func fetchGraphSnapshot( ctx context.Context, sparql *AnzoGraphClient, cfg Config, nodeLimit int, edgeLimit int, graphQueryID string, ) (GraphResponse, error) { start := time.Now() logStats := func(stage string) { if !cfg.LogSnapshotTimings { return } var ms runtime.MemStats runtime.ReadMemStats(&ms) log.Printf( "[snapshot] %s graph_query_id=%s node_limit=%d edge_limit=%d elapsed=%s alloc=%dMB heap_inuse=%dMB sys=%dMB numgc=%d", stage, graphQueryID, nodeLimit, edgeLimit, time.Since(start).Truncate(time.Millisecond), ms.Alloc/1024/1024, ms.HeapInuse/1024/1024, ms.Sys/1024/1024, ms.NumGC, ) } def, ok := graphqueries.Get(graphQueryID) if !ok { return GraphResponse{}, fmt.Errorf("unknown graph_query_id: %s", graphQueryID) } // Build predicate dictionary (predicate IRI -> uint32 ID) before fetching edges. preds, err := func() (*PredicateDict, error) { logStats("predicates_query_start") predQ := def.PredicateQuery(cfg.IncludeBNodes) var predRes sparqlBindingsResponse[sparqlPredicateBinding] metrics, err := sparql.QueryJSON(ctx, predQ, &predRes) if err != nil { return nil, fmt.Errorf("predicates query failed: %w", err) } if cfg.LogSnapshotTimings { log.Printf( "[snapshot] predicates_query_done bytes=%d bindings=%d round_trip_time=%s decode_time=%s", metrics.ResponseBytes, len(predRes.Results.Bindings), metrics.RoundTripTime.Truncate(time.Millisecond), metrics.BodyDecodeTime.Truncate(time.Millisecond), ) } predicateIRIs := make([]string, 0, len(predRes.Results.Bindings)) for _, b := range predRes.Results.Bindings { if b.P.Type != "uri" || b.P.Value == "" { continue } predicateIRIs = append(predicateIRIs, b.P.Value) } logStats("predicates_dict_built") return NewPredicateDict(predicateIRIs), nil }() if err != nil { return GraphResponse{}, err } // Fetch edges in batches to avoid decoding a single huge SPARQL JSON response. logStats("edges_batched_start") batchSize := cfg.EdgeBatchSize acc := newGraphAccumulator(nodeLimit, cfg.IncludeBNodes, min(edgeLimit, batchSize), preds) totalBindings := 0 convAllT0 := time.Now() for batch, offset := 0, 0; offset < edgeLimit; batch, offset = batch+1, offset+batchSize { limit := batchSize remaining := edgeLimit - offset if remaining < limit { limit = remaining } logStats(fmt.Sprintf("edges_batch_start batch=%d offset=%d limit=%d", batch, offset, limit)) edgesQ := def.EdgeQuery(limit, offset, cfg.IncludeBNodes) var batchConvertTime time.Duration metrics, err := sparql.QueryTripleBindingsStream(ctx, edgesQ, func(binding sparqlTripleBinding) error { if !cfg.LogSnapshotTimings { acc.addTripleBinding(binding) return nil } convertStart := time.Now() acc.addTripleBinding(binding) batchConvertTime += time.Since(convertStart) return nil }) if err != nil { return GraphResponse{}, fmt.Errorf("edges batch=%d offset=%d limit=%d: %w", batch, offset, limit, err) } got := metrics.BindingCount totalBindings += got if got == 0 { logStats(fmt.Sprintf("edges_batch_done_empty batch=%d offset=%d", batch, offset)) break } if cfg.LogSnapshotTimings { log.Printf( "[snapshot] edges_batch_stream_done batch=%d offset=%d limit=%d bytes=%d got_bindings=%d total_bindings=%d round_trip_time=%s stream_time=%s decode_overhead_time=%s convert_time=%s nodes=%d edges=%d", batch, offset, limit, metrics.ResponseBytes, got, totalBindings, metrics.RoundTripTime.Truncate(time.Millisecond), metrics.BodyDecodeTime.Truncate(time.Millisecond), maxDuration(metrics.BodyDecodeTime-batchConvertTime, 0).Truncate(time.Millisecond), batchConvertTime.Truncate(time.Millisecond), len(acc.nodes), len(acc.edges), ) } logStats(fmt.Sprintf("edges_batch_done batch=%d offset=%d", batch, offset)) if cfg.FreeOSMemoryAfterSnapshot { debug.FreeOSMemory() logStats(fmt.Sprintf("edges_batch_free_os_memory_done batch=%d offset=%d", batch, offset)) } if got < limit { break } } if cfg.LogSnapshotTimings { log.Printf("[snapshot] convert_batches_done total_bindings=%d total_time=%s", totalBindings, time.Since(convAllT0).Truncate(time.Millisecond)) } logStats("edges_batched_done") if totalBindings == 0 { log.Printf( "[snapshot] empty_graph_result graph_query_id=%s endpoint=%s hint=app-generated reads now query named graphs only with GRAPH ?g; verify expected triples are present in named graphs and match the graph query shape", graphQueryID, cfg.EffectiveSparqlEndpoint(), ) } nodes := acc.nodes edges := acc.edges routeSegments := []RouteSegment(nil) layoutEngine := "go" var layoutRootIRI *string if shouldUseRustHierarchyLayout(cfg, graphQueryID) { layoutResult, err := layoutHierarchyWithRust(ctx, cfg, nodes, edges, preds) if err != nil { return GraphResponse{}, err } nodes = layoutResult.Nodes edges = layoutResult.Edges routeSegments = layoutResult.RouteSegments layoutEngine = rustHierarchyLayoutEngineID rootIRI := cfg.HierarchyLayoutRootIRI layoutRootIRI = &rootIRI } else { // Layout: invert edges for hierarchy (target -> source). hierEdges := make([][2]int, 0, len(edges)) for _, e := range edges { hierEdges = append(hierEdges, [2]int{int(e.Target), int(e.Source)}) } layers, cycleErr := levelSynchronousKahnLayers(len(nodes), hierEdges) if cycleErr != nil { sample := make([]string, 0, 20) for _, nid := range cycleErr.RemainingNodeIDs { if len(sample) >= 20 { break } if nid >= 0 && nid < len(nodes) { sample = append(sample, nodes[nid].IRI) } } cycleErr.RemainingIRISample = sample return GraphResponse{}, cycleErr } idToIRI := make([]string, len(nodes)) for i := range nodes { idToIRI[i] = nodes[i].IRI } for _, layer := range layers { sortLayerByIRI(layer, idToIRI) } xs, ys := radialPositionsFromLayers(len(nodes), layers, 5000.0) for i := range nodes { nodes[i].X = xs[i] nodes[i].Y = ys[i] } } // Attach labels for URI nodes. iris := make([]string, 0) for _, n := range nodes { if n.TermType == "uri" && n.IRI != "" { iris = append(iris, n.IRI) } } if len(iris) > 0 { labelByIRI, err := fetchRDFSLabels(ctx, sparql, iris, 500) if err != nil { return GraphResponse{}, fmt.Errorf("fetch rdfs:label failed: %w", err) } for i := range nodes { if nodes[i].TermType != "uri" { continue } lbl, ok := labelByIRI[nodes[i].IRI] if !ok { continue } val := lbl nodes[i].Label = &val } } meta := &GraphMeta{ Backend: "anzograph", TTLPath: nil, SparqlEndpoint: cfg.EffectiveSparqlEndpoint(), IncludeBNodes: cfg.IncludeBNodes, GraphQueryID: graphQueryID, Predicates: preds.IRIs(), NodeLimit: nodeLimit, EdgeLimit: edgeLimit, Nodes: len(nodes), Edges: len(edges), LayoutEngine: layoutEngine, LayoutRootIRI: layoutRootIRI, } return GraphResponse{Nodes: nodes, Edges: edges, RouteSegments: routeSegments, Meta: meta}, nil } type bestLabel struct { score int value string } func fetchRDFSLabels( ctx context.Context, sparql *AnzoGraphClient, iris []string, batchSize int, ) (map[string]string, error) { best := make(map[string]bestLabel) for i := 0; i < len(iris); i += batchSize { end := i + batchSize if end > len(iris) { end = len(iris) } batch := iris[i:end] q := rdfsLabelQuery(batch) var res sparqlBindingsResponse[sparqlLabelBinding] _, err := sparql.QueryJSON(ctx, q, &res) if err != nil { return nil, err } for _, b := range res.Results.Bindings { if b.S.Value == "" { continue } if b.Label.Type != "literal" || b.Label.Value == "" { continue } score := labelScore(b.Label.Lang) prev, ok := best[b.S.Value] if !ok || score > prev.score { best[b.S.Value] = bestLabel{score: score, value: b.Label.Value} } } } out := make(map[string]string, len(best)) for iri, v := range best { out[iri] = v.value } return out, nil } func rdfsLabelQuery(iris []string) string { if len(iris) == 0 { return "SELECT ?s ?label WHERE { FILTER(false) }" } values := make([]string, 0, len(iris)) for _, u := range iris { if strings.TrimSpace(u) == "" { continue } values = append(values, "<"+u+">") } if len(values) == 0 { return "SELECT ?s ?label WHERE { FILTER(false) }" } pattern := queryscope.NamedGraph(fmt.Sprintf(` VALUES ?s { %s } ?s <%s> ?label . `, strings.Join(values, " "), rdfsLabelIRI)) return fmt.Sprintf(` SELECT DISTINCT ?s ?label WHERE { %s } `, pattern) } func labelScore(lang string) int { lang = strings.ToLower(strings.TrimSpace(lang)) if lang == "en" { return 3 } if lang == "" { return 2 } return 1 } func sortIntsUnique(xs []int) []int { if len(xs) == 0 { return xs } sort.Ints(xs) out := xs[:0] var last int for i, v := range xs { if i == 0 || v != last { out = append(out, v) } last = v } return out } func maxDuration(a time.Duration, b time.Duration) time.Duration { if a > b { return a } return b }