backend: support external SPARQL and named-graph snapshots
This commit is contained in:
@@ -2,7 +2,6 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"runtime"
|
||||
@@ -12,6 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
graphqueries "visualizador_instanciados/backend_go/graph_queries"
|
||||
"visualizador_instanciados/backend_go/queryscope"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -56,29 +56,26 @@ func fetchGraphSnapshot(
|
||||
preds, err := func() (*PredicateDict, error) {
|
||||
logStats("predicates_query_start")
|
||||
predQ := def.PredicateQuery(cfg.IncludeBNodes)
|
||||
t0 := time.Now()
|
||||
rawPred, err := sparql.Query(ctx, predQ)
|
||||
var predRes sparqlBindingsResponse[sparqlPredicateBinding]
|
||||
metrics, err := sparql.QueryJSON(ctx, predQ, &predRes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("predicates query failed: %w", err)
|
||||
}
|
||||
if cfg.LogSnapshotTimings {
|
||||
log.Printf("[snapshot] predicates_query_returned bytes=%d query_time=%s", len(rawPred), time.Since(t0).Truncate(time.Millisecond))
|
||||
}
|
||||
var predRes sparqlResponse
|
||||
t1 := time.Now()
|
||||
if err := json.Unmarshal(rawPred, &predRes); err != nil {
|
||||
return nil, fmt.Errorf("predicates unmarshal failed: %w", err)
|
||||
}
|
||||
if cfg.LogSnapshotTimings {
|
||||
log.Printf("[snapshot] predicates_unmarshal_done bindings=%d unmarshal_time=%s", len(predRes.Results.Bindings), time.Since(t1).Truncate(time.Millisecond))
|
||||
log.Printf(
|
||||
"[snapshot] predicates_query_done bytes=%d bindings=%d round_trip_time=%s decode_time=%s",
|
||||
metrics.ResponseBytes,
|
||||
len(predRes.Results.Bindings),
|
||||
metrics.RoundTripTime.Truncate(time.Millisecond),
|
||||
metrics.BodyDecodeTime.Truncate(time.Millisecond),
|
||||
)
|
||||
}
|
||||
predicateIRIs := make([]string, 0, len(predRes.Results.Bindings))
|
||||
for _, b := range predRes.Results.Bindings {
|
||||
pTerm, ok := b["p"]
|
||||
if !ok || pTerm.Type != "uri" || pTerm.Value == "" {
|
||||
if b.P.Type != "uri" || b.P.Value == "" {
|
||||
continue
|
||||
}
|
||||
predicateIRIs = append(predicateIRIs, pTerm.Value)
|
||||
predicateIRIs = append(predicateIRIs, b.P.Value)
|
||||
}
|
||||
logStats("predicates_dict_built")
|
||||
return NewPredicateDict(predicateIRIs), nil
|
||||
@@ -102,55 +99,48 @@ func fetchGraphSnapshot(
|
||||
}
|
||||
|
||||
logStats(fmt.Sprintf("edges_batch_start batch=%d offset=%d limit=%d", batch, offset, limit))
|
||||
bindings, err := func() ([]map[string]sparqlTerm, error) {
|
||||
edgesQ := def.EdgeQuery(limit, offset, cfg.IncludeBNodes)
|
||||
t0 := time.Now()
|
||||
raw, err := sparql.Query(ctx, edgesQ)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("edges query failed: %w", err)
|
||||
}
|
||||
if cfg.LogSnapshotTimings {
|
||||
log.Printf("[snapshot] edges_batch_query_returned batch=%d offset=%d limit=%d bytes=%d query_time=%s", batch, offset, limit, len(raw), time.Since(t0).Truncate(time.Millisecond))
|
||||
edgesQ := def.EdgeQuery(limit, offset, cfg.IncludeBNodes)
|
||||
var batchConvertTime time.Duration
|
||||
metrics, err := sparql.QueryTripleBindingsStream(ctx, edgesQ, func(binding sparqlTripleBinding) error {
|
||||
if !cfg.LogSnapshotTimings {
|
||||
acc.addTripleBinding(binding)
|
||||
return nil
|
||||
}
|
||||
|
||||
var res sparqlResponse
|
||||
t1 := time.Now()
|
||||
if err := json.Unmarshal(raw, &res); err != nil {
|
||||
return nil, fmt.Errorf("edges unmarshal failed: %w", err)
|
||||
}
|
||||
if cfg.LogSnapshotTimings {
|
||||
log.Printf("[snapshot] edges_batch_unmarshal_done batch=%d bindings=%d unmarshal_time=%s", batch, len(res.Results.Bindings), time.Since(t1).Truncate(time.Millisecond))
|
||||
}
|
||||
return res.Results.Bindings, nil
|
||||
}()
|
||||
convertStart := time.Now()
|
||||
acc.addTripleBinding(binding)
|
||||
batchConvertTime += time.Since(convertStart)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return GraphResponse{}, fmt.Errorf("edges batch=%d offset=%d limit=%d: %w", batch, offset, limit, err)
|
||||
}
|
||||
|
||||
got := len(bindings)
|
||||
got := metrics.BindingCount
|
||||
totalBindings += got
|
||||
if got == 0 {
|
||||
bindings = nil
|
||||
logStats(fmt.Sprintf("edges_batch_done_empty batch=%d offset=%d", batch, offset))
|
||||
break
|
||||
}
|
||||
|
||||
convT0 := time.Now()
|
||||
acc.addBindings(bindings)
|
||||
if cfg.LogSnapshotTimings {
|
||||
log.Printf(
|
||||
"[snapshot] edges_batch_convert_done batch=%d got_bindings=%d total_bindings=%d nodes=%d edges=%d convert_time=%s",
|
||||
"[snapshot] edges_batch_stream_done batch=%d offset=%d limit=%d bytes=%d got_bindings=%d total_bindings=%d round_trip_time=%s stream_time=%s decode_overhead_time=%s convert_time=%s nodes=%d edges=%d",
|
||||
batch,
|
||||
offset,
|
||||
limit,
|
||||
metrics.ResponseBytes,
|
||||
got,
|
||||
totalBindings,
|
||||
metrics.RoundTripTime.Truncate(time.Millisecond),
|
||||
metrics.BodyDecodeTime.Truncate(time.Millisecond),
|
||||
maxDuration(metrics.BodyDecodeTime-batchConvertTime, 0).Truncate(time.Millisecond),
|
||||
batchConvertTime.Truncate(time.Millisecond),
|
||||
len(acc.nodes),
|
||||
len(acc.edges),
|
||||
time.Since(convT0).Truncate(time.Millisecond),
|
||||
)
|
||||
}
|
||||
|
||||
// Make the batch eligible for GC.
|
||||
bindings = nil
|
||||
logStats(fmt.Sprintf("edges_batch_done batch=%d offset=%d", batch, offset))
|
||||
if cfg.FreeOSMemoryAfterSnapshot {
|
||||
debug.FreeOSMemory()
|
||||
@@ -165,6 +155,13 @@ func fetchGraphSnapshot(
|
||||
log.Printf("[snapshot] convert_batches_done total_bindings=%d total_time=%s", totalBindings, time.Since(convAllT0).Truncate(time.Millisecond))
|
||||
}
|
||||
logStats("edges_batched_done")
|
||||
if totalBindings == 0 {
|
||||
log.Printf(
|
||||
"[snapshot] empty_graph_result graph_query_id=%s endpoint=%s hint=app-generated reads now query named graphs only with GRAPH ?g; verify expected triples are present in named graphs and match the graph query shape",
|
||||
graphQueryID,
|
||||
cfg.EffectiveSparqlEndpoint(),
|
||||
)
|
||||
}
|
||||
|
||||
nodes := acc.nodes
|
||||
edges := acc.edges
|
||||
@@ -283,43 +280,26 @@ func fetchRDFSLabels(
|
||||
}
|
||||
batch := iris[i:end]
|
||||
|
||||
values := make([]string, 0, len(batch))
|
||||
for _, u := range batch {
|
||||
values = append(values, "<"+u+">")
|
||||
}
|
||||
q := rdfsLabelQuery(batch)
|
||||
|
||||
q := fmt.Sprintf(`
|
||||
SELECT ?s ?label
|
||||
WHERE {
|
||||
VALUES ?s { %s }
|
||||
?s <%s> ?label .
|
||||
}
|
||||
`, strings.Join(values, " "), rdfsLabelIRI)
|
||||
|
||||
raw, err := sparql.Query(ctx, q)
|
||||
var res sparqlBindingsResponse[sparqlLabelBinding]
|
||||
_, err := sparql.QueryJSON(ctx, q, &res)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var res sparqlResponse
|
||||
if err := json.Unmarshal(raw, &res); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse SPARQL JSON: %w", err)
|
||||
}
|
||||
|
||||
for _, b := range res.Results.Bindings {
|
||||
sTerm, ok := b["s"]
|
||||
if !ok || sTerm.Value == "" {
|
||||
if b.S.Value == "" {
|
||||
continue
|
||||
}
|
||||
lblTerm, ok := b["label"]
|
||||
if !ok || lblTerm.Type != "literal" || lblTerm.Value == "" {
|
||||
if b.Label.Type != "literal" || b.Label.Value == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
score := labelScore(lblTerm.Lang)
|
||||
prev, ok := best[sTerm.Value]
|
||||
score := labelScore(b.Label.Lang)
|
||||
prev, ok := best[b.S.Value]
|
||||
if !ok || score > prev.score {
|
||||
best[sTerm.Value] = bestLabel{score: score, value: lblTerm.Value}
|
||||
best[b.S.Value] = bestLabel{score: score, value: b.Label.Value}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -331,6 +311,35 @@ WHERE {
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func rdfsLabelQuery(iris []string) string {
|
||||
if len(iris) == 0 {
|
||||
return "SELECT ?s ?label WHERE { FILTER(false) }"
|
||||
}
|
||||
|
||||
values := make([]string, 0, len(iris))
|
||||
for _, u := range iris {
|
||||
if strings.TrimSpace(u) == "" {
|
||||
continue
|
||||
}
|
||||
values = append(values, "<"+u+">")
|
||||
}
|
||||
if len(values) == 0 {
|
||||
return "SELECT ?s ?label WHERE { FILTER(false) }"
|
||||
}
|
||||
|
||||
pattern := queryscope.NamedGraph(fmt.Sprintf(`
|
||||
VALUES ?s { %s }
|
||||
?s <%s> ?label .
|
||||
`, strings.Join(values, " "), rdfsLabelIRI))
|
||||
|
||||
return fmt.Sprintf(`
|
||||
SELECT DISTINCT ?s ?label
|
||||
WHERE {
|
||||
%s
|
||||
}
|
||||
`, pattern)
|
||||
}
|
||||
|
||||
func labelScore(lang string) int {
|
||||
lang = strings.ToLower(strings.TrimSpace(lang))
|
||||
if lang == "en" {
|
||||
@@ -357,3 +366,10 @@ func sortIntsUnique(xs []int) []int {
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func maxDuration(a time.Duration, b time.Duration) time.Duration {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user