added multi-term search

This commit is contained in:
gulimabr
2026-01-28 16:27:34 -03:00
parent 1461168af6
commit 5abcad4c43
2 changed files with 327 additions and 92 deletions

View File

@@ -14,7 +14,7 @@ import httpx
import structlog
from asgi_correlation_id import CorrelationIdMiddleware
from asgi_correlation_id.context import correlation_id
from fastapi import FastAPI, Query, Response
from fastapi import FastAPI, HTTPException, Query, Response
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from selectolax.parser import HTMLParser
@@ -45,6 +45,29 @@ class DefinitionResponse(BaseModel):
taxonomy: List[TaxonomyMatch] = Field(default_factory=list)
class BulkTermMeta(BaseModel):
definitions_count: int
taxonomy_count: int
class BulkTermResult(BaseModel):
term: str
results: List[Definition] = Field(default_factory=list)
taxonomy: List[TaxonomyMatch] = Field(default_factory=list)
meta: BulkTermMeta
error: Optional[str] = None
class BulkDefinitionRequest(BaseModel):
terms: List[str]
class BulkDefinitionResponse(BaseModel):
terms: List[str]
results: dict[str, BulkTermResult]
request_id: Optional[str] = None
app = FastAPI(title="TermSearch API", version="0.1.0")
logging.basicConfig(format="%(message)s", level=logging.INFO)
@@ -79,6 +102,21 @@ def normalize_text(text: str) -> str:
return " ".join(text.lower().split())
MAX_BULK_TERMS = 5
def normalize_terms(raw_terms: List[str]) -> List[str]:
normalized: List[str] = []
seen: set[str] = set()
for raw in raw_terms:
term = raw.strip()
if not term or term in seen:
continue
normalized.append(term)
seen.add(term)
return normalized
@lru_cache(maxsize=1)
def load_taxonomy() -> dict:
root_dir = Path(__file__).resolve().parents[1]
@@ -486,3 +524,73 @@ async def get_definitions(
request_id=request_id,
taxonomy=taxonomy,
)
async def build_bulk_term_result(term: str) -> BulkTermResult:
try:
results = [
result
for result in await asyncio.gather(
scrape_dicionario_first(term),
scrape_slb_first(term),
scrape_merriam_first(term),
)
if result
]
taxonomy = find_taxonomy_matches(term)
meta = BulkTermMeta(
definitions_count=len(results),
taxonomy_count=len(taxonomy),
)
return BulkTermResult(
term=term,
results=results,
taxonomy=taxonomy,
meta=meta,
)
except Exception as exc: # noqa: BLE001
logger.exception(
"bulk_term_failed",
term=term,
request_id=correlation_id.get(),
error=str(exc),
)
meta = BulkTermMeta(definitions_count=0, taxonomy_count=0)
return BulkTermResult(
term=term,
results=[],
taxonomy=[],
meta=meta,
error="Failed to fetch definitions for this term.",
)
@app.post("/api/definitions/bulk", response_model=BulkDefinitionResponse)
async def get_definitions_bulk(
payload: BulkDefinitionRequest,
response: Response,
) -> BulkDefinitionResponse:
request_id = correlation_id.get()
if request_id:
response.headers["X-Request-ID"] = request_id
terms = normalize_terms(payload.terms)
if len(terms) > MAX_BULK_TERMS:
raise HTTPException(
status_code=400,
detail=f"Maximum of {MAX_BULK_TERMS} terms allowed.",
)
if not terms:
return BulkDefinitionResponse(terms=[], results={}, request_id=request_id)
term_results = await asyncio.gather(
*(build_bulk_term_result(term) for term in terms)
)
results_map = {item.term: item for item in term_results}
return BulkDefinitionResponse(
terms=terms,
results=results_map,
request_id=request_id,
)

View File

@@ -7,10 +7,23 @@ type Definition = {
definition: string;
};
type DefinitionResponse = {
type BulkTermMeta = {
definitions_count: number;
taxonomy_count: number;
};
type BulkTermResult = {
term: string;
results: Definition[];
taxonomy?: TaxonomyMatch[];
meta: BulkTermMeta;
error?: string | null;
};
type BulkDefinitionResponse = {
terms: string[];
results: Record<string, BulkTermResult>;
request_id?: string | null;
};
type TaxonomyMatch = {
@@ -26,20 +39,52 @@ type TaxonomyMatch = {
const API_BASE_URL =
import.meta.env.VITE_API_BASE_URL?.toString() || "http://localhost:8000";
const MAX_TERMS = 5;
export default function App() {
const [term, setTerm] = useState("");
const [results, setResults] = useState<Definition[]>([]);
const [taxonomy, setTaxonomy] = useState<TaxonomyMatch[]>([]);
const [termInput, setTermInput] = useState("");
const [resultsByTerm, setResultsByTerm] = useState<
Record<string, BulkTermResult>
>({});
const [orderedTerms, setOrderedTerms] = useState<string[]>([]);
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const canSearch = term.trim().length > 0 && !loading;
const parsedTerms = useMemo(() => {
const terms: string[] = [];
const seen = new Set<string>();
for (const line of termInput.split(/\r?\n/)) {
const trimmed = line.trim();
if (!trimmed || seen.has(trimmed)) continue;
terms.push(trimmed);
seen.add(trimmed);
}
return terms;
}, [termInput]);
const hasTooManyTerms = parsedTerms.length > MAX_TERMS;
const canSearch = parsedTerms.length > 0 && !hasTooManyTerms && !loading;
const apiUrl = useMemo(() => {
const url = new URL("/api/definitions", API_BASE_URL);
url.searchParams.set("term", term.trim());
const url = new URL("/api/definitions/bulk", API_BASE_URL);
return url.toString();
}, [term]);
}, [API_BASE_URL]);
const summary = useMemo(() => {
let definitions = 0;
let taxonomy = 0;
let failed = 0;
for (const term of orderedTerms) {
const item = resultsByTerm[term];
if (!item) continue;
if (item.error) failed += 1;
definitions += item.meta?.definitions_count ?? item.results?.length ?? 0;
taxonomy += item.meta?.taxonomy_count ?? item.taxonomy?.length ?? 0;
}
return { definitions, taxonomy, failed };
}, [orderedTerms, resultsByTerm]);
const handleSearch = async (event: React.FormEvent) => {
event.preventDefault();
@@ -50,15 +95,32 @@ export default function App() {
setError(null);
try {
const response = await fetch(apiUrl);
const response = await fetch(apiUrl, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ terms: parsedTerms }),
});
if (!response.ok) {
throw new Error("Failed to fetch definitions.");
let message = "Failed to fetch definitions.";
try {
const payload = (await response.json()) as { detail?: string };
if (payload?.detail) {
message = payload.detail;
}
} catch {
// ignore JSON parsing errors
}
throw new Error(message);
}
const data = (await response.json()) as DefinitionResponse;
setResults(data.results ?? []);
setTaxonomy(data.taxonomy ?? []);
const data = (await response.json()) as BulkDefinitionResponse;
setOrderedTerms(data.terms ?? parsedTerms);
setResultsByTerm(data.results ?? {});
} catch (err) {
setError(err instanceof Error ? err.message : "Something went wrong.");
setOrderedTerms([]);
setResultsByTerm({});
} finally {
setLoading(false);
}
@@ -83,17 +145,28 @@ export default function App() {
onSubmit={handleSearch}
className="flex flex-col gap-4 rounded-2xl bg-white p-6 shadow-sm"
>
<label className="text-sm font-medium text-slate-700" htmlFor="term">
Search term
</label>
<div className="flex items-center justify-between">
<label className="text-sm font-medium text-slate-700" htmlFor="terms">
Search terms (one per line)
</label>
<span
className={`text-xs ${
hasTooManyTerms ? "text-rose-600" : "text-slate-500"
}`}
>
{parsedTerms.length}/{MAX_TERMS} terms
</span>
</div>
<div className="flex flex-col gap-3 sm:flex-row">
<input
id="term"
name="term"
type="text"
value={term}
onChange={(event) => setTerm(event.target.value)}
placeholder="Ex: gas lift"
<textarea
id="terms"
name="terms"
rows={4}
value={termInput}
onChange={(
event: React.ChangeEvent<HTMLTextAreaElement>
) => setTermInput(event.target.value)}
placeholder={`Ex:\ngas lift\npump\nflow assurance`}
className="flex-1 rounded-xl border border-slate-200 px-4 py-3 text-base focus:border-sky-500 focus:outline-none focus:ring-2 focus:ring-sky-200"
/>
<button
@@ -104,16 +177,28 @@ export default function App() {
{loading ? "Searching..." : "Search"}
</button>
</div>
{hasTooManyTerms ? (
<p className="text-xs text-rose-600">
Maximum of {MAX_TERMS} terms allowed. Please remove extra terms to
continue.
</p>
) : (
<p className="text-xs text-slate-500">
Maximum of {MAX_TERMS} terms per search.
</p>
)}
<p className="text-xs text-slate-500">
API base: <span className="font-medium">{API_BASE_URL}</span>
</p>
</form>
<section className="space-y-4">
<div className="flex items-center justify-between">
<div className="flex flex-wrap items-center justify-between gap-2">
<h2 className="text-xl font-semibold text-slate-800">Results</h2>
<span className="text-sm text-slate-500">
{results.length} {results.length === 1 ? "source" : "sources"}
{summary.definitions} definitions · {summary.taxonomy} taxonomy
matches · {summary.failed} failed term
{summary.failed === 1 ? "" : "s"}
</span>
</div>
@@ -123,79 +208,121 @@ export default function App() {
</div>
) : null}
{results.length === 0 && !loading ? (
{orderedTerms.length === 0 && !loading ? (
<div className="rounded-xl border border-dashed border-slate-200 bg-white p-6 text-sm text-slate-500">
No definitions yet. Try searching for a term.
No definitions yet. Add terms and search.
</div>
) : null}
<div className="space-y-3">
{results.map((result) => (
<article
key={`${result.source}-${result.title}`}
className="rounded-xl border border-slate-100 bg-white p-5 shadow-sm"
>
<h3 className="text-sm font-semibold uppercase tracking-wide text-sky-600">
{result.source}
</h3>
<p className="mt-2 text-lg font-semibold text-slate-900">
{result.title}
</p>
<a
href={result.url}
target="_blank"
rel="noreferrer"
className="mt-2 inline-flex text-sm font-medium text-sky-600 hover:text-sky-700"
<div className="space-y-6">
{orderedTerms.map((term) => {
const item = resultsByTerm[term];
const definitions = item?.results ?? [];
const taxonomy = item?.taxonomy ?? [];
const termError = item?.error;
return (
<section
key={term}
className="space-y-4 rounded-2xl border border-slate-100 bg-white p-5 shadow-sm"
>
View source
</a>
<p className="mt-2 text-base text-slate-700">
{result.definition}
</p>
</article>
))}
</div>
</section>
<section className="space-y-4">
<div className="flex items-center justify-between">
<h2 className="text-xl font-semibold text-slate-800">
ISO 14224 Taxonomy
</h2>
<span className="text-sm text-slate-500">
{taxonomy.length} {taxonomy.length === 1 ? "match" : "matches"}
</span>
</div>
{taxonomy.length === 0 && !loading ? (
<div className="rounded-xl border border-dashed border-slate-200 bg-white p-6 text-sm text-slate-500">
No taxonomy matches found.
</div>
) : null}
<div className="space-y-3">
{taxonomy.map((item) => (
<article
key={`${item.class_code}-${item.type_code ?? "class"}`}
className="rounded-xl border border-slate-100 bg-white p-5 shadow-sm"
>
<p className="text-xs font-semibold uppercase tracking-wide text-emerald-600">
{item.category}
</p>
<h3 className="mt-2 text-lg font-semibold text-slate-900">
{item.full_name}
</h3>
<div className="mt-2 flex flex-wrap gap-3 text-sm text-slate-600">
<span>Class: {item.class_name} ({item.class_code})</span>
{item.type_description ? (
<span>
Type: {item.type_description} ({item.type_code})
<div className="flex flex-wrap items-center justify-between gap-2">
<h3 className="text-lg font-semibold text-slate-900">
{term}
</h3>
<span className="text-xs text-slate-500">
{definitions.length} source
{definitions.length === 1 ? "" : "s"} · {taxonomy.length}
{taxonomy.length === 1 ? " match" : " matches"}
</span>
</div>
{termError ? (
<div className="rounded-xl border border-rose-200 bg-rose-50 p-4 text-sm text-rose-600">
{termError}
</div>
) : null}
{item.annex ? <span>Annex: {item.annex}</span> : null}
</div>
</article>
))}
<div className="space-y-3">
<h4 className="text-xs font-semibold uppercase tracking-wide text-slate-500">
Definitions
</h4>
{definitions.length === 0 ? (
<div className="rounded-xl border border-dashed border-slate-200 bg-white p-4 text-sm text-slate-500">
No definitions found for this term.
</div>
) : (
<div className="space-y-3">
{definitions.map((result) => (
<article
key={`${term}-${result.source}-${result.title}`}
className="rounded-xl border border-slate-100 bg-white p-4 shadow-sm"
>
<h5 className="text-xs font-semibold uppercase tracking-wide text-sky-600">
{result.source}
</h5>
<p className="mt-2 text-base font-semibold text-slate-900">
{result.title}
</p>
<a
href={result.url}
target="_blank"
rel="noreferrer"
className="mt-2 inline-flex text-sm font-medium text-sky-600 hover:text-sky-700"
>
View source
</a>
<p className="mt-2 text-sm text-slate-700">
{result.definition}
</p>
</article>
))}
</div>
)}
</div>
<div className="space-y-3">
<h4 className="text-xs font-semibold uppercase tracking-wide text-slate-500">
ISO 14224 Taxonomy
</h4>
{taxonomy.length === 0 ? (
<div className="rounded-xl border border-dashed border-slate-200 bg-white p-4 text-sm text-slate-500">
No taxonomy matches found for this term.
</div>
) : (
<div className="space-y-3">
{taxonomy.map((item) => (
<article
key={`${term}-${item.class_code}-${
item.type_code ?? "class"
}`}
className="rounded-xl border border-slate-100 bg-white p-4 shadow-sm"
>
<p className="text-xs font-semibold uppercase tracking-wide text-emerald-600">
{item.category}
</p>
<h5 className="mt-2 text-base font-semibold text-slate-900">
{item.full_name}
</h5>
<div className="mt-2 flex flex-wrap gap-3 text-sm text-slate-600">
<span>
Class: {item.class_name} ({item.class_code})
</span>
{item.type_description ? (
<span>
Type: {item.type_description} ({item.type_code})
</span>
) : null}
{item.annex ? <span>Annex: {item.annex}</span> : null}
</div>
</article>
))}
</div>
)}
</div>
</section>
);
})}
</div>
</section>
</div>