backend: support external SPARQL and named-graph snapshots

This commit is contained in:
Oxy8
2026-04-06 13:36:08 -03:00
parent 696844f341
commit 44c1d3eaa6
25 changed files with 1695 additions and 243 deletions

View File

@@ -2,7 +2,6 @@ package main
import (
"context"
"encoding/json"
"fmt"
"log"
"runtime"
@@ -12,6 +11,7 @@ import (
"time"
graphqueries "visualizador_instanciados/backend_go/graph_queries"
"visualizador_instanciados/backend_go/queryscope"
)
const (
@@ -56,29 +56,26 @@ func fetchGraphSnapshot(
preds, err := func() (*PredicateDict, error) {
logStats("predicates_query_start")
predQ := def.PredicateQuery(cfg.IncludeBNodes)
t0 := time.Now()
rawPred, err := sparql.Query(ctx, predQ)
var predRes sparqlBindingsResponse[sparqlPredicateBinding]
metrics, err := sparql.QueryJSON(ctx, predQ, &predRes)
if err != nil {
return nil, fmt.Errorf("predicates query failed: %w", err)
}
if cfg.LogSnapshotTimings {
log.Printf("[snapshot] predicates_query_returned bytes=%d query_time=%s", len(rawPred), time.Since(t0).Truncate(time.Millisecond))
}
var predRes sparqlResponse
t1 := time.Now()
if err := json.Unmarshal(rawPred, &predRes); err != nil {
return nil, fmt.Errorf("predicates unmarshal failed: %w", err)
}
if cfg.LogSnapshotTimings {
log.Printf("[snapshot] predicates_unmarshal_done bindings=%d unmarshal_time=%s", len(predRes.Results.Bindings), time.Since(t1).Truncate(time.Millisecond))
log.Printf(
"[snapshot] predicates_query_done bytes=%d bindings=%d round_trip_time=%s decode_time=%s",
metrics.ResponseBytes,
len(predRes.Results.Bindings),
metrics.RoundTripTime.Truncate(time.Millisecond),
metrics.BodyDecodeTime.Truncate(time.Millisecond),
)
}
predicateIRIs := make([]string, 0, len(predRes.Results.Bindings))
for _, b := range predRes.Results.Bindings {
pTerm, ok := b["p"]
if !ok || pTerm.Type != "uri" || pTerm.Value == "" {
if b.P.Type != "uri" || b.P.Value == "" {
continue
}
predicateIRIs = append(predicateIRIs, pTerm.Value)
predicateIRIs = append(predicateIRIs, b.P.Value)
}
logStats("predicates_dict_built")
return NewPredicateDict(predicateIRIs), nil
@@ -102,55 +99,48 @@ func fetchGraphSnapshot(
}
logStats(fmt.Sprintf("edges_batch_start batch=%d offset=%d limit=%d", batch, offset, limit))
bindings, err := func() ([]map[string]sparqlTerm, error) {
edgesQ := def.EdgeQuery(limit, offset, cfg.IncludeBNodes)
t0 := time.Now()
raw, err := sparql.Query(ctx, edgesQ)
if err != nil {
return nil, fmt.Errorf("edges query failed: %w", err)
}
if cfg.LogSnapshotTimings {
log.Printf("[snapshot] edges_batch_query_returned batch=%d offset=%d limit=%d bytes=%d query_time=%s", batch, offset, limit, len(raw), time.Since(t0).Truncate(time.Millisecond))
edgesQ := def.EdgeQuery(limit, offset, cfg.IncludeBNodes)
var batchConvertTime time.Duration
metrics, err := sparql.QueryTripleBindingsStream(ctx, edgesQ, func(binding sparqlTripleBinding) error {
if !cfg.LogSnapshotTimings {
acc.addTripleBinding(binding)
return nil
}
var res sparqlResponse
t1 := time.Now()
if err := json.Unmarshal(raw, &res); err != nil {
return nil, fmt.Errorf("edges unmarshal failed: %w", err)
}
if cfg.LogSnapshotTimings {
log.Printf("[snapshot] edges_batch_unmarshal_done batch=%d bindings=%d unmarshal_time=%s", batch, len(res.Results.Bindings), time.Since(t1).Truncate(time.Millisecond))
}
return res.Results.Bindings, nil
}()
convertStart := time.Now()
acc.addTripleBinding(binding)
batchConvertTime += time.Since(convertStart)
return nil
})
if err != nil {
return GraphResponse{}, fmt.Errorf("edges batch=%d offset=%d limit=%d: %w", batch, offset, limit, err)
}
got := len(bindings)
got := metrics.BindingCount
totalBindings += got
if got == 0 {
bindings = nil
logStats(fmt.Sprintf("edges_batch_done_empty batch=%d offset=%d", batch, offset))
break
}
convT0 := time.Now()
acc.addBindings(bindings)
if cfg.LogSnapshotTimings {
log.Printf(
"[snapshot] edges_batch_convert_done batch=%d got_bindings=%d total_bindings=%d nodes=%d edges=%d convert_time=%s",
"[snapshot] edges_batch_stream_done batch=%d offset=%d limit=%d bytes=%d got_bindings=%d total_bindings=%d round_trip_time=%s stream_time=%s decode_overhead_time=%s convert_time=%s nodes=%d edges=%d",
batch,
offset,
limit,
metrics.ResponseBytes,
got,
totalBindings,
metrics.RoundTripTime.Truncate(time.Millisecond),
metrics.BodyDecodeTime.Truncate(time.Millisecond),
maxDuration(metrics.BodyDecodeTime-batchConvertTime, 0).Truncate(time.Millisecond),
batchConvertTime.Truncate(time.Millisecond),
len(acc.nodes),
len(acc.edges),
time.Since(convT0).Truncate(time.Millisecond),
)
}
// Make the batch eligible for GC.
bindings = nil
logStats(fmt.Sprintf("edges_batch_done batch=%d offset=%d", batch, offset))
if cfg.FreeOSMemoryAfterSnapshot {
debug.FreeOSMemory()
@@ -165,6 +155,13 @@ func fetchGraphSnapshot(
log.Printf("[snapshot] convert_batches_done total_bindings=%d total_time=%s", totalBindings, time.Since(convAllT0).Truncate(time.Millisecond))
}
logStats("edges_batched_done")
if totalBindings == 0 {
log.Printf(
"[snapshot] empty_graph_result graph_query_id=%s endpoint=%s hint=app-generated reads now query named graphs only with GRAPH ?g; verify expected triples are present in named graphs and match the graph query shape",
graphQueryID,
cfg.EffectiveSparqlEndpoint(),
)
}
nodes := acc.nodes
edges := acc.edges
@@ -283,43 +280,26 @@ func fetchRDFSLabels(
}
batch := iris[i:end]
values := make([]string, 0, len(batch))
for _, u := range batch {
values = append(values, "<"+u+">")
}
q := rdfsLabelQuery(batch)
q := fmt.Sprintf(`
SELECT ?s ?label
WHERE {
VALUES ?s { %s }
?s <%s> ?label .
}
`, strings.Join(values, " "), rdfsLabelIRI)
raw, err := sparql.Query(ctx, q)
var res sparqlBindingsResponse[sparqlLabelBinding]
_, err := sparql.QueryJSON(ctx, q, &res)
if err != nil {
return nil, err
}
var res sparqlResponse
if err := json.Unmarshal(raw, &res); err != nil {
return nil, fmt.Errorf("failed to parse SPARQL JSON: %w", err)
}
for _, b := range res.Results.Bindings {
sTerm, ok := b["s"]
if !ok || sTerm.Value == "" {
if b.S.Value == "" {
continue
}
lblTerm, ok := b["label"]
if !ok || lblTerm.Type != "literal" || lblTerm.Value == "" {
if b.Label.Type != "literal" || b.Label.Value == "" {
continue
}
score := labelScore(lblTerm.Lang)
prev, ok := best[sTerm.Value]
score := labelScore(b.Label.Lang)
prev, ok := best[b.S.Value]
if !ok || score > prev.score {
best[sTerm.Value] = bestLabel{score: score, value: lblTerm.Value}
best[b.S.Value] = bestLabel{score: score, value: b.Label.Value}
}
}
}
@@ -331,6 +311,35 @@ WHERE {
return out, nil
}
func rdfsLabelQuery(iris []string) string {
if len(iris) == 0 {
return "SELECT ?s ?label WHERE { FILTER(false) }"
}
values := make([]string, 0, len(iris))
for _, u := range iris {
if strings.TrimSpace(u) == "" {
continue
}
values = append(values, "<"+u+">")
}
if len(values) == 0 {
return "SELECT ?s ?label WHERE { FILTER(false) }"
}
pattern := queryscope.NamedGraph(fmt.Sprintf(`
VALUES ?s { %s }
?s <%s> ?label .
`, strings.Join(values, " "), rdfsLabelIRI))
return fmt.Sprintf(`
SELECT DISTINCT ?s ?label
WHERE {
%s
}
`, pattern)
}
func labelScore(lang string) int {
lang = strings.ToLower(strings.TrimSpace(lang))
if lang == "en" {
@@ -357,3 +366,10 @@ func sortIntsUnique(xs []int) []int {
}
return out
}
func maxDuration(a time.Duration, b time.Duration) time.Duration {
if a > b {
return a
}
return b
}