added multi-term search

This commit is contained in:
gulimabr
2026-01-28 16:27:34 -03:00
parent 1461168af6
commit 5abcad4c43
2 changed files with 327 additions and 92 deletions

View File

@@ -14,7 +14,7 @@ import httpx
import structlog import structlog
from asgi_correlation_id import CorrelationIdMiddleware from asgi_correlation_id import CorrelationIdMiddleware
from asgi_correlation_id.context import correlation_id from asgi_correlation_id.context import correlation_id
from fastapi import FastAPI, Query, Response from fastapi import FastAPI, HTTPException, Query, Response
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from selectolax.parser import HTMLParser from selectolax.parser import HTMLParser
@@ -45,6 +45,29 @@ class DefinitionResponse(BaseModel):
taxonomy: List[TaxonomyMatch] = Field(default_factory=list) taxonomy: List[TaxonomyMatch] = Field(default_factory=list)
class BulkTermMeta(BaseModel):
definitions_count: int
taxonomy_count: int
class BulkTermResult(BaseModel):
term: str
results: List[Definition] = Field(default_factory=list)
taxonomy: List[TaxonomyMatch] = Field(default_factory=list)
meta: BulkTermMeta
error: Optional[str] = None
class BulkDefinitionRequest(BaseModel):
terms: List[str]
class BulkDefinitionResponse(BaseModel):
terms: List[str]
results: dict[str, BulkTermResult]
request_id: Optional[str] = None
app = FastAPI(title="TermSearch API", version="0.1.0") app = FastAPI(title="TermSearch API", version="0.1.0")
logging.basicConfig(format="%(message)s", level=logging.INFO) logging.basicConfig(format="%(message)s", level=logging.INFO)
@@ -79,6 +102,21 @@ def normalize_text(text: str) -> str:
return " ".join(text.lower().split()) return " ".join(text.lower().split())
MAX_BULK_TERMS = 5
def normalize_terms(raw_terms: List[str]) -> List[str]:
normalized: List[str] = []
seen: set[str] = set()
for raw in raw_terms:
term = raw.strip()
if not term or term in seen:
continue
normalized.append(term)
seen.add(term)
return normalized
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def load_taxonomy() -> dict: def load_taxonomy() -> dict:
root_dir = Path(__file__).resolve().parents[1] root_dir = Path(__file__).resolve().parents[1]
@@ -486,3 +524,73 @@ async def get_definitions(
request_id=request_id, request_id=request_id,
taxonomy=taxonomy, taxonomy=taxonomy,
) )
async def build_bulk_term_result(term: str) -> BulkTermResult:
try:
results = [
result
for result in await asyncio.gather(
scrape_dicionario_first(term),
scrape_slb_first(term),
scrape_merriam_first(term),
)
if result
]
taxonomy = find_taxonomy_matches(term)
meta = BulkTermMeta(
definitions_count=len(results),
taxonomy_count=len(taxonomy),
)
return BulkTermResult(
term=term,
results=results,
taxonomy=taxonomy,
meta=meta,
)
except Exception as exc: # noqa: BLE001
logger.exception(
"bulk_term_failed",
term=term,
request_id=correlation_id.get(),
error=str(exc),
)
meta = BulkTermMeta(definitions_count=0, taxonomy_count=0)
return BulkTermResult(
term=term,
results=[],
taxonomy=[],
meta=meta,
error="Failed to fetch definitions for this term.",
)
@app.post("/api/definitions/bulk", response_model=BulkDefinitionResponse)
async def get_definitions_bulk(
payload: BulkDefinitionRequest,
response: Response,
) -> BulkDefinitionResponse:
request_id = correlation_id.get()
if request_id:
response.headers["X-Request-ID"] = request_id
terms = normalize_terms(payload.terms)
if len(terms) > MAX_BULK_TERMS:
raise HTTPException(
status_code=400,
detail=f"Maximum of {MAX_BULK_TERMS} terms allowed.",
)
if not terms:
return BulkDefinitionResponse(terms=[], results={}, request_id=request_id)
term_results = await asyncio.gather(
*(build_bulk_term_result(term) for term in terms)
)
results_map = {item.term: item for item in term_results}
return BulkDefinitionResponse(
terms=terms,
results=results_map,
request_id=request_id,
)

View File

@@ -7,10 +7,23 @@ type Definition = {
definition: string; definition: string;
}; };
type DefinitionResponse = { type BulkTermMeta = {
definitions_count: number;
taxonomy_count: number;
};
type BulkTermResult = {
term: string; term: string;
results: Definition[]; results: Definition[];
taxonomy?: TaxonomyMatch[]; taxonomy?: TaxonomyMatch[];
meta: BulkTermMeta;
error?: string | null;
};
type BulkDefinitionResponse = {
terms: string[];
results: Record<string, BulkTermResult>;
request_id?: string | null;
}; };
type TaxonomyMatch = { type TaxonomyMatch = {
@@ -26,20 +39,52 @@ type TaxonomyMatch = {
const API_BASE_URL = const API_BASE_URL =
import.meta.env.VITE_API_BASE_URL?.toString() || "http://localhost:8000"; import.meta.env.VITE_API_BASE_URL?.toString() || "http://localhost:8000";
const MAX_TERMS = 5;
export default function App() { export default function App() {
const [term, setTerm] = useState(""); const [termInput, setTermInput] = useState("");
const [results, setResults] = useState<Definition[]>([]); const [resultsByTerm, setResultsByTerm] = useState<
const [taxonomy, setTaxonomy] = useState<TaxonomyMatch[]>([]); Record<string, BulkTermResult>
>({});
const [orderedTerms, setOrderedTerms] = useState<string[]>([]);
const [loading, setLoading] = useState(false); const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null); const [error, setError] = useState<string | null>(null);
const canSearch = term.trim().length > 0 && !loading; const parsedTerms = useMemo(() => {
const terms: string[] = [];
const seen = new Set<string>();
for (const line of termInput.split(/\r?\n/)) {
const trimmed = line.trim();
if (!trimmed || seen.has(trimmed)) continue;
terms.push(trimmed);
seen.add(trimmed);
}
return terms;
}, [termInput]);
const hasTooManyTerms = parsedTerms.length > MAX_TERMS;
const canSearch = parsedTerms.length > 0 && !hasTooManyTerms && !loading;
const apiUrl = useMemo(() => { const apiUrl = useMemo(() => {
const url = new URL("/api/definitions", API_BASE_URL); const url = new URL("/api/definitions/bulk", API_BASE_URL);
url.searchParams.set("term", term.trim());
return url.toString(); return url.toString();
}, [term]); }, [API_BASE_URL]);
const summary = useMemo(() => {
let definitions = 0;
let taxonomy = 0;
let failed = 0;
for (const term of orderedTerms) {
const item = resultsByTerm[term];
if (!item) continue;
if (item.error) failed += 1;
definitions += item.meta?.definitions_count ?? item.results?.length ?? 0;
taxonomy += item.meta?.taxonomy_count ?? item.taxonomy?.length ?? 0;
}
return { definitions, taxonomy, failed };
}, [orderedTerms, resultsByTerm]);
const handleSearch = async (event: React.FormEvent) => { const handleSearch = async (event: React.FormEvent) => {
event.preventDefault(); event.preventDefault();
@@ -50,15 +95,32 @@ export default function App() {
setError(null); setError(null);
try { try {
const response = await fetch(apiUrl); const response = await fetch(apiUrl, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ terms: parsedTerms }),
});
if (!response.ok) { if (!response.ok) {
throw new Error("Failed to fetch definitions."); let message = "Failed to fetch definitions.";
try {
const payload = (await response.json()) as { detail?: string };
if (payload?.detail) {
message = payload.detail;
}
} catch {
// ignore JSON parsing errors
}
throw new Error(message);
} }
const data = (await response.json()) as DefinitionResponse; const data = (await response.json()) as BulkDefinitionResponse;
setResults(data.results ?? []); setOrderedTerms(data.terms ?? parsedTerms);
setTaxonomy(data.taxonomy ?? []); setResultsByTerm(data.results ?? {});
} catch (err) { } catch (err) {
setError(err instanceof Error ? err.message : "Something went wrong."); setError(err instanceof Error ? err.message : "Something went wrong.");
setOrderedTerms([]);
setResultsByTerm({});
} finally { } finally {
setLoading(false); setLoading(false);
} }
@@ -83,17 +145,28 @@ export default function App() {
onSubmit={handleSearch} onSubmit={handleSearch}
className="flex flex-col gap-4 rounded-2xl bg-white p-6 shadow-sm" className="flex flex-col gap-4 rounded-2xl bg-white p-6 shadow-sm"
> >
<label className="text-sm font-medium text-slate-700" htmlFor="term"> <div className="flex items-center justify-between">
Search term <label className="text-sm font-medium text-slate-700" htmlFor="terms">
</label> Search terms (one per line)
</label>
<span
className={`text-xs ${
hasTooManyTerms ? "text-rose-600" : "text-slate-500"
}`}
>
{parsedTerms.length}/{MAX_TERMS} terms
</span>
</div>
<div className="flex flex-col gap-3 sm:flex-row"> <div className="flex flex-col gap-3 sm:flex-row">
<input <textarea
id="term" id="terms"
name="term" name="terms"
type="text" rows={4}
value={term} value={termInput}
onChange={(event) => setTerm(event.target.value)} onChange={(
placeholder="Ex: gas lift" event: React.ChangeEvent<HTMLTextAreaElement>
) => setTermInput(event.target.value)}
placeholder={`Ex:\ngas lift\npump\nflow assurance`}
className="flex-1 rounded-xl border border-slate-200 px-4 py-3 text-base focus:border-sky-500 focus:outline-none focus:ring-2 focus:ring-sky-200" className="flex-1 rounded-xl border border-slate-200 px-4 py-3 text-base focus:border-sky-500 focus:outline-none focus:ring-2 focus:ring-sky-200"
/> />
<button <button
@@ -104,16 +177,28 @@ export default function App() {
{loading ? "Searching..." : "Search"} {loading ? "Searching..." : "Search"}
</button> </button>
</div> </div>
{hasTooManyTerms ? (
<p className="text-xs text-rose-600">
Maximum of {MAX_TERMS} terms allowed. Please remove extra terms to
continue.
</p>
) : (
<p className="text-xs text-slate-500">
Maximum of {MAX_TERMS} terms per search.
</p>
)}
<p className="text-xs text-slate-500"> <p className="text-xs text-slate-500">
API base: <span className="font-medium">{API_BASE_URL}</span> API base: <span className="font-medium">{API_BASE_URL}</span>
</p> </p>
</form> </form>
<section className="space-y-4"> <section className="space-y-4">
<div className="flex items-center justify-between"> <div className="flex flex-wrap items-center justify-between gap-2">
<h2 className="text-xl font-semibold text-slate-800">Results</h2> <h2 className="text-xl font-semibold text-slate-800">Results</h2>
<span className="text-sm text-slate-500"> <span className="text-sm text-slate-500">
{results.length} {results.length === 1 ? "source" : "sources"} {summary.definitions} definitions · {summary.taxonomy} taxonomy
matches · {summary.failed} failed term
{summary.failed === 1 ? "" : "s"}
</span> </span>
</div> </div>
@@ -123,79 +208,121 @@ export default function App() {
</div> </div>
) : null} ) : null}
{results.length === 0 && !loading ? ( {orderedTerms.length === 0 && !loading ? (
<div className="rounded-xl border border-dashed border-slate-200 bg-white p-6 text-sm text-slate-500"> <div className="rounded-xl border border-dashed border-slate-200 bg-white p-6 text-sm text-slate-500">
No definitions yet. Try searching for a term. No definitions yet. Add terms and search.
</div> </div>
) : null} ) : null}
<div className="space-y-3"> <div className="space-y-6">
{results.map((result) => ( {orderedTerms.map((term) => {
<article const item = resultsByTerm[term];
key={`${result.source}-${result.title}`} const definitions = item?.results ?? [];
className="rounded-xl border border-slate-100 bg-white p-5 shadow-sm" const taxonomy = item?.taxonomy ?? [];
> const termError = item?.error;
<h3 className="text-sm font-semibold uppercase tracking-wide text-sky-600">
{result.source} return (
</h3> <section
<p className="mt-2 text-lg font-semibold text-slate-900"> key={term}
{result.title} className="space-y-4 rounded-2xl border border-slate-100 bg-white p-5 shadow-sm"
</p>
<a
href={result.url}
target="_blank"
rel="noreferrer"
className="mt-2 inline-flex text-sm font-medium text-sky-600 hover:text-sky-700"
> >
View source <div className="flex flex-wrap items-center justify-between gap-2">
</a> <h3 className="text-lg font-semibold text-slate-900">
<p className="mt-2 text-base text-slate-700"> {term}
{result.definition} </h3>
</p> <span className="text-xs text-slate-500">
</article> {definitions.length} source
))} {definitions.length === 1 ? "" : "s"} · {taxonomy.length}
</div> {taxonomy.length === 1 ? " match" : " matches"}
</section>
<section className="space-y-4">
<div className="flex items-center justify-between">
<h2 className="text-xl font-semibold text-slate-800">
ISO 14224 Taxonomy
</h2>
<span className="text-sm text-slate-500">
{taxonomy.length} {taxonomy.length === 1 ? "match" : "matches"}
</span>
</div>
{taxonomy.length === 0 && !loading ? (
<div className="rounded-xl border border-dashed border-slate-200 bg-white p-6 text-sm text-slate-500">
No taxonomy matches found.
</div>
) : null}
<div className="space-y-3">
{taxonomy.map((item) => (
<article
key={`${item.class_code}-${item.type_code ?? "class"}`}
className="rounded-xl border border-slate-100 bg-white p-5 shadow-sm"
>
<p className="text-xs font-semibold uppercase tracking-wide text-emerald-600">
{item.category}
</p>
<h3 className="mt-2 text-lg font-semibold text-slate-900">
{item.full_name}
</h3>
<div className="mt-2 flex flex-wrap gap-3 text-sm text-slate-600">
<span>Class: {item.class_name} ({item.class_code})</span>
{item.type_description ? (
<span>
Type: {item.type_description} ({item.type_code})
</span> </span>
</div>
{termError ? (
<div className="rounded-xl border border-rose-200 bg-rose-50 p-4 text-sm text-rose-600">
{termError}
</div>
) : null} ) : null}
{item.annex ? <span>Annex: {item.annex}</span> : null}
</div> <div className="space-y-3">
</article> <h4 className="text-xs font-semibold uppercase tracking-wide text-slate-500">
))} Definitions
</h4>
{definitions.length === 0 ? (
<div className="rounded-xl border border-dashed border-slate-200 bg-white p-4 text-sm text-slate-500">
No definitions found for this term.
</div>
) : (
<div className="space-y-3">
{definitions.map((result) => (
<article
key={`${term}-${result.source}-${result.title}`}
className="rounded-xl border border-slate-100 bg-white p-4 shadow-sm"
>
<h5 className="text-xs font-semibold uppercase tracking-wide text-sky-600">
{result.source}
</h5>
<p className="mt-2 text-base font-semibold text-slate-900">
{result.title}
</p>
<a
href={result.url}
target="_blank"
rel="noreferrer"
className="mt-2 inline-flex text-sm font-medium text-sky-600 hover:text-sky-700"
>
View source
</a>
<p className="mt-2 text-sm text-slate-700">
{result.definition}
</p>
</article>
))}
</div>
)}
</div>
<div className="space-y-3">
<h4 className="text-xs font-semibold uppercase tracking-wide text-slate-500">
ISO 14224 Taxonomy
</h4>
{taxonomy.length === 0 ? (
<div className="rounded-xl border border-dashed border-slate-200 bg-white p-4 text-sm text-slate-500">
No taxonomy matches found for this term.
</div>
) : (
<div className="space-y-3">
{taxonomy.map((item) => (
<article
key={`${term}-${item.class_code}-${
item.type_code ?? "class"
}`}
className="rounded-xl border border-slate-100 bg-white p-4 shadow-sm"
>
<p className="text-xs font-semibold uppercase tracking-wide text-emerald-600">
{item.category}
</p>
<h5 className="mt-2 text-base font-semibold text-slate-900">
{item.full_name}
</h5>
<div className="mt-2 flex flex-wrap gap-3 text-sm text-slate-600">
<span>
Class: {item.class_name} ({item.class_code})
</span>
{item.type_description ? (
<span>
Type: {item.type_description} ({item.type_code})
</span>
) : null}
{item.annex ? <span>Annex: {item.annex}</span> : null}
</div>
</article>
))}
</div>
)}
</div>
</section>
);
})}
</div> </div>
</section> </section>
</div> </div>