From 5abcad4c43a6e58c24941fa637540c40a8d9426f Mon Sep 17 00:00:00 2001 From: gulimabr Date: Wed, 28 Jan 2026 16:27:34 -0300 Subject: [PATCH] added multi-term search --- backend/app/main.py | 110 ++++++++++++++- frontend/src/App.tsx | 309 ++++++++++++++++++++++++++++++------------- 2 files changed, 327 insertions(+), 92 deletions(-) diff --git a/backend/app/main.py b/backend/app/main.py index 36c5338..5dc08c4 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -14,7 +14,7 @@ import httpx import structlog from asgi_correlation_id import CorrelationIdMiddleware from asgi_correlation_id.context import correlation_id -from fastapi import FastAPI, Query, Response +from fastapi import FastAPI, HTTPException, Query, Response from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel, Field from selectolax.parser import HTMLParser @@ -45,6 +45,29 @@ class DefinitionResponse(BaseModel): taxonomy: List[TaxonomyMatch] = Field(default_factory=list) +class BulkTermMeta(BaseModel): + definitions_count: int + taxonomy_count: int + + +class BulkTermResult(BaseModel): + term: str + results: List[Definition] = Field(default_factory=list) + taxonomy: List[TaxonomyMatch] = Field(default_factory=list) + meta: BulkTermMeta + error: Optional[str] = None + + +class BulkDefinitionRequest(BaseModel): + terms: List[str] + + +class BulkDefinitionResponse(BaseModel): + terms: List[str] + results: dict[str, BulkTermResult] + request_id: Optional[str] = None + + app = FastAPI(title="TermSearch API", version="0.1.0") logging.basicConfig(format="%(message)s", level=logging.INFO) @@ -79,6 +102,21 @@ def normalize_text(text: str) -> str: return " ".join(text.lower().split()) +MAX_BULK_TERMS = 5 + + +def normalize_terms(raw_terms: List[str]) -> List[str]: + normalized: List[str] = [] + seen: set[str] = set() + for raw in raw_terms: + term = raw.strip() + if not term or term in seen: + continue + normalized.append(term) + seen.add(term) + return normalized + + @lru_cache(maxsize=1) def load_taxonomy() -> dict: root_dir = Path(__file__).resolve().parents[1] @@ -486,3 +524,73 @@ async def get_definitions( request_id=request_id, taxonomy=taxonomy, ) + + +async def build_bulk_term_result(term: str) -> BulkTermResult: + try: + results = [ + result + for result in await asyncio.gather( + scrape_dicionario_first(term), + scrape_slb_first(term), + scrape_merriam_first(term), + ) + if result + ] + taxonomy = find_taxonomy_matches(term) + meta = BulkTermMeta( + definitions_count=len(results), + taxonomy_count=len(taxonomy), + ) + return BulkTermResult( + term=term, + results=results, + taxonomy=taxonomy, + meta=meta, + ) + except Exception as exc: # noqa: BLE001 + logger.exception( + "bulk_term_failed", + term=term, + request_id=correlation_id.get(), + error=str(exc), + ) + meta = BulkTermMeta(definitions_count=0, taxonomy_count=0) + return BulkTermResult( + term=term, + results=[], + taxonomy=[], + meta=meta, + error="Failed to fetch definitions for this term.", + ) + + +@app.post("/api/definitions/bulk", response_model=BulkDefinitionResponse) +async def get_definitions_bulk( + payload: BulkDefinitionRequest, + response: Response, +) -> BulkDefinitionResponse: + request_id = correlation_id.get() + if request_id: + response.headers["X-Request-ID"] = request_id + + terms = normalize_terms(payload.terms) + if len(terms) > MAX_BULK_TERMS: + raise HTTPException( + status_code=400, + detail=f"Maximum of {MAX_BULK_TERMS} terms allowed.", + ) + + if not terms: + return BulkDefinitionResponse(terms=[], results={}, request_id=request_id) + + term_results = await asyncio.gather( + *(build_bulk_term_result(term) for term in terms) + ) + results_map = {item.term: item for item in term_results} + + return BulkDefinitionResponse( + terms=terms, + results=results_map, + request_id=request_id, + ) diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index eba70b0..fc937fe 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -7,10 +7,23 @@ type Definition = { definition: string; }; -type DefinitionResponse = { +type BulkTermMeta = { + definitions_count: number; + taxonomy_count: number; +}; + +type BulkTermResult = { term: string; results: Definition[]; taxonomy?: TaxonomyMatch[]; + meta: BulkTermMeta; + error?: string | null; +}; + +type BulkDefinitionResponse = { + terms: string[]; + results: Record; + request_id?: string | null; }; type TaxonomyMatch = { @@ -26,20 +39,52 @@ type TaxonomyMatch = { const API_BASE_URL = import.meta.env.VITE_API_BASE_URL?.toString() || "http://localhost:8000"; +const MAX_TERMS = 5; + export default function App() { - const [term, setTerm] = useState(""); - const [results, setResults] = useState([]); - const [taxonomy, setTaxonomy] = useState([]); + const [termInput, setTermInput] = useState(""); + const [resultsByTerm, setResultsByTerm] = useState< + Record + >({}); + const [orderedTerms, setOrderedTerms] = useState([]); const [loading, setLoading] = useState(false); const [error, setError] = useState(null); - const canSearch = term.trim().length > 0 && !loading; + const parsedTerms = useMemo(() => { + const terms: string[] = []; + const seen = new Set(); + for (const line of termInput.split(/\r?\n/)) { + const trimmed = line.trim(); + if (!trimmed || seen.has(trimmed)) continue; + terms.push(trimmed); + seen.add(trimmed); + } + return terms; + }, [termInput]); + + const hasTooManyTerms = parsedTerms.length > MAX_TERMS; + const canSearch = parsedTerms.length > 0 && !hasTooManyTerms && !loading; const apiUrl = useMemo(() => { - const url = new URL("/api/definitions", API_BASE_URL); - url.searchParams.set("term", term.trim()); + const url = new URL("/api/definitions/bulk", API_BASE_URL); return url.toString(); - }, [term]); + }, [API_BASE_URL]); + + const summary = useMemo(() => { + let definitions = 0; + let taxonomy = 0; + let failed = 0; + + for (const term of orderedTerms) { + const item = resultsByTerm[term]; + if (!item) continue; + if (item.error) failed += 1; + definitions += item.meta?.definitions_count ?? item.results?.length ?? 0; + taxonomy += item.meta?.taxonomy_count ?? item.taxonomy?.length ?? 0; + } + + return { definitions, taxonomy, failed }; + }, [orderedTerms, resultsByTerm]); const handleSearch = async (event: React.FormEvent) => { event.preventDefault(); @@ -50,15 +95,32 @@ export default function App() { setError(null); try { - const response = await fetch(apiUrl); + const response = await fetch(apiUrl, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ terms: parsedTerms }), + }); if (!response.ok) { - throw new Error("Failed to fetch definitions."); + let message = "Failed to fetch definitions."; + try { + const payload = (await response.json()) as { detail?: string }; + if (payload?.detail) { + message = payload.detail; + } + } catch { + // ignore JSON parsing errors + } + throw new Error(message); } - const data = (await response.json()) as DefinitionResponse; - setResults(data.results ?? []); - setTaxonomy(data.taxonomy ?? []); + const data = (await response.json()) as BulkDefinitionResponse; + setOrderedTerms(data.terms ?? parsedTerms); + setResultsByTerm(data.results ?? {}); } catch (err) { setError(err instanceof Error ? err.message : "Something went wrong."); + setOrderedTerms([]); + setResultsByTerm({}); } finally { setLoading(false); } @@ -83,17 +145,28 @@ export default function App() { onSubmit={handleSearch} className="flex flex-col gap-4 rounded-2xl bg-white p-6 shadow-sm" > - +
+ + + {parsedTerms.length}/{MAX_TERMS} terms + +
- setTerm(event.target.value)} - placeholder="Ex: gas lift" +