added multi-term search

This commit is contained in:
gulimabr
2026-01-28 16:27:34 -03:00
parent 1461168af6
commit 5abcad4c43
2 changed files with 327 additions and 92 deletions

View File

@@ -14,7 +14,7 @@ import httpx
import structlog
from asgi_correlation_id import CorrelationIdMiddleware
from asgi_correlation_id.context import correlation_id
from fastapi import FastAPI, Query, Response
from fastapi import FastAPI, HTTPException, Query, Response
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from selectolax.parser import HTMLParser
@@ -45,6 +45,29 @@ class DefinitionResponse(BaseModel):
taxonomy: List[TaxonomyMatch] = Field(default_factory=list)
class BulkTermMeta(BaseModel):
definitions_count: int
taxonomy_count: int
class BulkTermResult(BaseModel):
term: str
results: List[Definition] = Field(default_factory=list)
taxonomy: List[TaxonomyMatch] = Field(default_factory=list)
meta: BulkTermMeta
error: Optional[str] = None
class BulkDefinitionRequest(BaseModel):
terms: List[str]
class BulkDefinitionResponse(BaseModel):
terms: List[str]
results: dict[str, BulkTermResult]
request_id: Optional[str] = None
app = FastAPI(title="TermSearch API", version="0.1.0")
logging.basicConfig(format="%(message)s", level=logging.INFO)
@@ -79,6 +102,21 @@ def normalize_text(text: str) -> str:
return " ".join(text.lower().split())
MAX_BULK_TERMS = 5
def normalize_terms(raw_terms: List[str]) -> List[str]:
normalized: List[str] = []
seen: set[str] = set()
for raw in raw_terms:
term = raw.strip()
if not term or term in seen:
continue
normalized.append(term)
seen.add(term)
return normalized
@lru_cache(maxsize=1)
def load_taxonomy() -> dict:
root_dir = Path(__file__).resolve().parents[1]
@@ -486,3 +524,73 @@ async def get_definitions(
request_id=request_id,
taxonomy=taxonomy,
)
async def build_bulk_term_result(term: str) -> BulkTermResult:
try:
results = [
result
for result in await asyncio.gather(
scrape_dicionario_first(term),
scrape_slb_first(term),
scrape_merriam_first(term),
)
if result
]
taxonomy = find_taxonomy_matches(term)
meta = BulkTermMeta(
definitions_count=len(results),
taxonomy_count=len(taxonomy),
)
return BulkTermResult(
term=term,
results=results,
taxonomy=taxonomy,
meta=meta,
)
except Exception as exc: # noqa: BLE001
logger.exception(
"bulk_term_failed",
term=term,
request_id=correlation_id.get(),
error=str(exc),
)
meta = BulkTermMeta(definitions_count=0, taxonomy_count=0)
return BulkTermResult(
term=term,
results=[],
taxonomy=[],
meta=meta,
error="Failed to fetch definitions for this term.",
)
@app.post("/api/definitions/bulk", response_model=BulkDefinitionResponse)
async def get_definitions_bulk(
payload: BulkDefinitionRequest,
response: Response,
) -> BulkDefinitionResponse:
request_id = correlation_id.get()
if request_id:
response.headers["X-Request-ID"] = request_id
terms = normalize_terms(payload.terms)
if len(terms) > MAX_BULK_TERMS:
raise HTTPException(
status_code=400,
detail=f"Maximum of {MAX_BULK_TERMS} terms allowed.",
)
if not terms:
return BulkDefinitionResponse(terms=[], results={}, request_id=request_id)
term_results = await asyncio.gather(
*(build_bulk_term_result(term) for term in terms)
)
results_map = {item.term: item for item in term_results}
return BulkDefinitionResponse(
terms=terms,
results=results_map,
request_id=request_id,
)