added multi-term search
This commit is contained in:
@@ -14,7 +14,7 @@ import httpx
|
||||
import structlog
|
||||
from asgi_correlation_id import CorrelationIdMiddleware
|
||||
from asgi_correlation_id.context import correlation_id
|
||||
from fastapi import FastAPI, Query, Response
|
||||
from fastapi import FastAPI, HTTPException, Query, Response
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel, Field
|
||||
from selectolax.parser import HTMLParser
|
||||
@@ -45,6 +45,29 @@ class DefinitionResponse(BaseModel):
|
||||
taxonomy: List[TaxonomyMatch] = Field(default_factory=list)
|
||||
|
||||
|
||||
class BulkTermMeta(BaseModel):
|
||||
definitions_count: int
|
||||
taxonomy_count: int
|
||||
|
||||
|
||||
class BulkTermResult(BaseModel):
|
||||
term: str
|
||||
results: List[Definition] = Field(default_factory=list)
|
||||
taxonomy: List[TaxonomyMatch] = Field(default_factory=list)
|
||||
meta: BulkTermMeta
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class BulkDefinitionRequest(BaseModel):
|
||||
terms: List[str]
|
||||
|
||||
|
||||
class BulkDefinitionResponse(BaseModel):
|
||||
terms: List[str]
|
||||
results: dict[str, BulkTermResult]
|
||||
request_id: Optional[str] = None
|
||||
|
||||
|
||||
app = FastAPI(title="TermSearch API", version="0.1.0")
|
||||
|
||||
logging.basicConfig(format="%(message)s", level=logging.INFO)
|
||||
@@ -79,6 +102,21 @@ def normalize_text(text: str) -> str:
|
||||
return " ".join(text.lower().split())
|
||||
|
||||
|
||||
MAX_BULK_TERMS = 5
|
||||
|
||||
|
||||
def normalize_terms(raw_terms: List[str]) -> List[str]:
|
||||
normalized: List[str] = []
|
||||
seen: set[str] = set()
|
||||
for raw in raw_terms:
|
||||
term = raw.strip()
|
||||
if not term or term in seen:
|
||||
continue
|
||||
normalized.append(term)
|
||||
seen.add(term)
|
||||
return normalized
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def load_taxonomy() -> dict:
|
||||
root_dir = Path(__file__).resolve().parents[1]
|
||||
@@ -486,3 +524,73 @@ async def get_definitions(
|
||||
request_id=request_id,
|
||||
taxonomy=taxonomy,
|
||||
)
|
||||
|
||||
|
||||
async def build_bulk_term_result(term: str) -> BulkTermResult:
|
||||
try:
|
||||
results = [
|
||||
result
|
||||
for result in await asyncio.gather(
|
||||
scrape_dicionario_first(term),
|
||||
scrape_slb_first(term),
|
||||
scrape_merriam_first(term),
|
||||
)
|
||||
if result
|
||||
]
|
||||
taxonomy = find_taxonomy_matches(term)
|
||||
meta = BulkTermMeta(
|
||||
definitions_count=len(results),
|
||||
taxonomy_count=len(taxonomy),
|
||||
)
|
||||
return BulkTermResult(
|
||||
term=term,
|
||||
results=results,
|
||||
taxonomy=taxonomy,
|
||||
meta=meta,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.exception(
|
||||
"bulk_term_failed",
|
||||
term=term,
|
||||
request_id=correlation_id.get(),
|
||||
error=str(exc),
|
||||
)
|
||||
meta = BulkTermMeta(definitions_count=0, taxonomy_count=0)
|
||||
return BulkTermResult(
|
||||
term=term,
|
||||
results=[],
|
||||
taxonomy=[],
|
||||
meta=meta,
|
||||
error="Failed to fetch definitions for this term.",
|
||||
)
|
||||
|
||||
|
||||
@app.post("/api/definitions/bulk", response_model=BulkDefinitionResponse)
|
||||
async def get_definitions_bulk(
|
||||
payload: BulkDefinitionRequest,
|
||||
response: Response,
|
||||
) -> BulkDefinitionResponse:
|
||||
request_id = correlation_id.get()
|
||||
if request_id:
|
||||
response.headers["X-Request-ID"] = request_id
|
||||
|
||||
terms = normalize_terms(payload.terms)
|
||||
if len(terms) > MAX_BULK_TERMS:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Maximum of {MAX_BULK_TERMS} terms allowed.",
|
||||
)
|
||||
|
||||
if not terms:
|
||||
return BulkDefinitionResponse(terms=[], results={}, request_id=request_id)
|
||||
|
||||
term_results = await asyncio.gather(
|
||||
*(build_bulk_term_result(term) for term in terms)
|
||||
)
|
||||
results_map = {item.term: item for item in term_results}
|
||||
|
||||
return BulkDefinitionResponse(
|
||||
terms=terms,
|
||||
results=results_map,
|
||||
request_id=request_id,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user