added multi-term search
This commit is contained in:
@@ -14,7 +14,7 @@ import httpx
|
||||
import structlog
|
||||
from asgi_correlation_id import CorrelationIdMiddleware
|
||||
from asgi_correlation_id.context import correlation_id
|
||||
from fastapi import FastAPI, Query, Response
|
||||
from fastapi import FastAPI, HTTPException, Query, Response
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel, Field
|
||||
from selectolax.parser import HTMLParser
|
||||
@@ -45,6 +45,29 @@ class DefinitionResponse(BaseModel):
|
||||
taxonomy: List[TaxonomyMatch] = Field(default_factory=list)
|
||||
|
||||
|
||||
class BulkTermMeta(BaseModel):
|
||||
definitions_count: int
|
||||
taxonomy_count: int
|
||||
|
||||
|
||||
class BulkTermResult(BaseModel):
|
||||
term: str
|
||||
results: List[Definition] = Field(default_factory=list)
|
||||
taxonomy: List[TaxonomyMatch] = Field(default_factory=list)
|
||||
meta: BulkTermMeta
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class BulkDefinitionRequest(BaseModel):
|
||||
terms: List[str]
|
||||
|
||||
|
||||
class BulkDefinitionResponse(BaseModel):
|
||||
terms: List[str]
|
||||
results: dict[str, BulkTermResult]
|
||||
request_id: Optional[str] = None
|
||||
|
||||
|
||||
app = FastAPI(title="TermSearch API", version="0.1.0")
|
||||
|
||||
logging.basicConfig(format="%(message)s", level=logging.INFO)
|
||||
@@ -79,6 +102,21 @@ def normalize_text(text: str) -> str:
|
||||
return " ".join(text.lower().split())
|
||||
|
||||
|
||||
MAX_BULK_TERMS = 5
|
||||
|
||||
|
||||
def normalize_terms(raw_terms: List[str]) -> List[str]:
|
||||
normalized: List[str] = []
|
||||
seen: set[str] = set()
|
||||
for raw in raw_terms:
|
||||
term = raw.strip()
|
||||
if not term or term in seen:
|
||||
continue
|
||||
normalized.append(term)
|
||||
seen.add(term)
|
||||
return normalized
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def load_taxonomy() -> dict:
|
||||
root_dir = Path(__file__).resolve().parents[1]
|
||||
@@ -486,3 +524,73 @@ async def get_definitions(
|
||||
request_id=request_id,
|
||||
taxonomy=taxonomy,
|
||||
)
|
||||
|
||||
|
||||
async def build_bulk_term_result(term: str) -> BulkTermResult:
|
||||
try:
|
||||
results = [
|
||||
result
|
||||
for result in await asyncio.gather(
|
||||
scrape_dicionario_first(term),
|
||||
scrape_slb_first(term),
|
||||
scrape_merriam_first(term),
|
||||
)
|
||||
if result
|
||||
]
|
||||
taxonomy = find_taxonomy_matches(term)
|
||||
meta = BulkTermMeta(
|
||||
definitions_count=len(results),
|
||||
taxonomy_count=len(taxonomy),
|
||||
)
|
||||
return BulkTermResult(
|
||||
term=term,
|
||||
results=results,
|
||||
taxonomy=taxonomy,
|
||||
meta=meta,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.exception(
|
||||
"bulk_term_failed",
|
||||
term=term,
|
||||
request_id=correlation_id.get(),
|
||||
error=str(exc),
|
||||
)
|
||||
meta = BulkTermMeta(definitions_count=0, taxonomy_count=0)
|
||||
return BulkTermResult(
|
||||
term=term,
|
||||
results=[],
|
||||
taxonomy=[],
|
||||
meta=meta,
|
||||
error="Failed to fetch definitions for this term.",
|
||||
)
|
||||
|
||||
|
||||
@app.post("/api/definitions/bulk", response_model=BulkDefinitionResponse)
|
||||
async def get_definitions_bulk(
|
||||
payload: BulkDefinitionRequest,
|
||||
response: Response,
|
||||
) -> BulkDefinitionResponse:
|
||||
request_id = correlation_id.get()
|
||||
if request_id:
|
||||
response.headers["X-Request-ID"] = request_id
|
||||
|
||||
terms = normalize_terms(payload.terms)
|
||||
if len(terms) > MAX_BULK_TERMS:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Maximum of {MAX_BULK_TERMS} terms allowed.",
|
||||
)
|
||||
|
||||
if not terms:
|
||||
return BulkDefinitionResponse(terms=[], results={}, request_id=request_id)
|
||||
|
||||
term_results = await asyncio.gather(
|
||||
*(build_bulk_term_result(term) for term in terms)
|
||||
)
|
||||
results_map = {item.term: item for item in term_results}
|
||||
|
||||
return BulkDefinitionResponse(
|
||||
terms=terms,
|
||||
results=results_map,
|
||||
request_id=request_id,
|
||||
)
|
||||
|
||||
@@ -7,10 +7,23 @@ type Definition = {
|
||||
definition: string;
|
||||
};
|
||||
|
||||
type DefinitionResponse = {
|
||||
type BulkTermMeta = {
|
||||
definitions_count: number;
|
||||
taxonomy_count: number;
|
||||
};
|
||||
|
||||
type BulkTermResult = {
|
||||
term: string;
|
||||
results: Definition[];
|
||||
taxonomy?: TaxonomyMatch[];
|
||||
meta: BulkTermMeta;
|
||||
error?: string | null;
|
||||
};
|
||||
|
||||
type BulkDefinitionResponse = {
|
||||
terms: string[];
|
||||
results: Record<string, BulkTermResult>;
|
||||
request_id?: string | null;
|
||||
};
|
||||
|
||||
type TaxonomyMatch = {
|
||||
@@ -26,20 +39,52 @@ type TaxonomyMatch = {
|
||||
const API_BASE_URL =
|
||||
import.meta.env.VITE_API_BASE_URL?.toString() || "http://localhost:8000";
|
||||
|
||||
const MAX_TERMS = 5;
|
||||
|
||||
export default function App() {
|
||||
const [term, setTerm] = useState("");
|
||||
const [results, setResults] = useState<Definition[]>([]);
|
||||
const [taxonomy, setTaxonomy] = useState<TaxonomyMatch[]>([]);
|
||||
const [termInput, setTermInput] = useState("");
|
||||
const [resultsByTerm, setResultsByTerm] = useState<
|
||||
Record<string, BulkTermResult>
|
||||
>({});
|
||||
const [orderedTerms, setOrderedTerms] = useState<string[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const canSearch = term.trim().length > 0 && !loading;
|
||||
const parsedTerms = useMemo(() => {
|
||||
const terms: string[] = [];
|
||||
const seen = new Set<string>();
|
||||
for (const line of termInput.split(/\r?\n/)) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed || seen.has(trimmed)) continue;
|
||||
terms.push(trimmed);
|
||||
seen.add(trimmed);
|
||||
}
|
||||
return terms;
|
||||
}, [termInput]);
|
||||
|
||||
const hasTooManyTerms = parsedTerms.length > MAX_TERMS;
|
||||
const canSearch = parsedTerms.length > 0 && !hasTooManyTerms && !loading;
|
||||
|
||||
const apiUrl = useMemo(() => {
|
||||
const url = new URL("/api/definitions", API_BASE_URL);
|
||||
url.searchParams.set("term", term.trim());
|
||||
const url = new URL("/api/definitions/bulk", API_BASE_URL);
|
||||
return url.toString();
|
||||
}, [term]);
|
||||
}, [API_BASE_URL]);
|
||||
|
||||
const summary = useMemo(() => {
|
||||
let definitions = 0;
|
||||
let taxonomy = 0;
|
||||
let failed = 0;
|
||||
|
||||
for (const term of orderedTerms) {
|
||||
const item = resultsByTerm[term];
|
||||
if (!item) continue;
|
||||
if (item.error) failed += 1;
|
||||
definitions += item.meta?.definitions_count ?? item.results?.length ?? 0;
|
||||
taxonomy += item.meta?.taxonomy_count ?? item.taxonomy?.length ?? 0;
|
||||
}
|
||||
|
||||
return { definitions, taxonomy, failed };
|
||||
}, [orderedTerms, resultsByTerm]);
|
||||
|
||||
const handleSearch = async (event: React.FormEvent) => {
|
||||
event.preventDefault();
|
||||
@@ -50,15 +95,32 @@ export default function App() {
|
||||
setError(null);
|
||||
|
||||
try {
|
||||
const response = await fetch(apiUrl);
|
||||
const response = await fetch(apiUrl, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({ terms: parsedTerms }),
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to fetch definitions.");
|
||||
let message = "Failed to fetch definitions.";
|
||||
try {
|
||||
const payload = (await response.json()) as { detail?: string };
|
||||
if (payload?.detail) {
|
||||
message = payload.detail;
|
||||
}
|
||||
} catch {
|
||||
// ignore JSON parsing errors
|
||||
}
|
||||
throw new Error(message);
|
||||
}
|
||||
const data = (await response.json()) as DefinitionResponse;
|
||||
setResults(data.results ?? []);
|
||||
setTaxonomy(data.taxonomy ?? []);
|
||||
const data = (await response.json()) as BulkDefinitionResponse;
|
||||
setOrderedTerms(data.terms ?? parsedTerms);
|
||||
setResultsByTerm(data.results ?? {});
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : "Something went wrong.");
|
||||
setOrderedTerms([]);
|
||||
setResultsByTerm({});
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
@@ -83,17 +145,28 @@ export default function App() {
|
||||
onSubmit={handleSearch}
|
||||
className="flex flex-col gap-4 rounded-2xl bg-white p-6 shadow-sm"
|
||||
>
|
||||
<label className="text-sm font-medium text-slate-700" htmlFor="term">
|
||||
Search term
|
||||
</label>
|
||||
<div className="flex items-center justify-between">
|
||||
<label className="text-sm font-medium text-slate-700" htmlFor="terms">
|
||||
Search terms (one per line)
|
||||
</label>
|
||||
<span
|
||||
className={`text-xs ${
|
||||
hasTooManyTerms ? "text-rose-600" : "text-slate-500"
|
||||
}`}
|
||||
>
|
||||
{parsedTerms.length}/{MAX_TERMS} terms
|
||||
</span>
|
||||
</div>
|
||||
<div className="flex flex-col gap-3 sm:flex-row">
|
||||
<input
|
||||
id="term"
|
||||
name="term"
|
||||
type="text"
|
||||
value={term}
|
||||
onChange={(event) => setTerm(event.target.value)}
|
||||
placeholder="Ex: gas lift"
|
||||
<textarea
|
||||
id="terms"
|
||||
name="terms"
|
||||
rows={4}
|
||||
value={termInput}
|
||||
onChange={(
|
||||
event: React.ChangeEvent<HTMLTextAreaElement>
|
||||
) => setTermInput(event.target.value)}
|
||||
placeholder={`Ex:\ngas lift\npump\nflow assurance`}
|
||||
className="flex-1 rounded-xl border border-slate-200 px-4 py-3 text-base focus:border-sky-500 focus:outline-none focus:ring-2 focus:ring-sky-200"
|
||||
/>
|
||||
<button
|
||||
@@ -104,16 +177,28 @@ export default function App() {
|
||||
{loading ? "Searching..." : "Search"}
|
||||
</button>
|
||||
</div>
|
||||
{hasTooManyTerms ? (
|
||||
<p className="text-xs text-rose-600">
|
||||
Maximum of {MAX_TERMS} terms allowed. Please remove extra terms to
|
||||
continue.
|
||||
</p>
|
||||
) : (
|
||||
<p className="text-xs text-slate-500">
|
||||
Maximum of {MAX_TERMS} terms per search.
|
||||
</p>
|
||||
)}
|
||||
<p className="text-xs text-slate-500">
|
||||
API base: <span className="font-medium">{API_BASE_URL}</span>
|
||||
</p>
|
||||
</form>
|
||||
|
||||
<section className="space-y-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex flex-wrap items-center justify-between gap-2">
|
||||
<h2 className="text-xl font-semibold text-slate-800">Results</h2>
|
||||
<span className="text-sm text-slate-500">
|
||||
{results.length} {results.length === 1 ? "source" : "sources"}
|
||||
{summary.definitions} definitions · {summary.taxonomy} taxonomy
|
||||
matches · {summary.failed} failed term
|
||||
{summary.failed === 1 ? "" : "s"}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@@ -123,79 +208,121 @@ export default function App() {
|
||||
</div>
|
||||
) : null}
|
||||
|
||||
{results.length === 0 && !loading ? (
|
||||
{orderedTerms.length === 0 && !loading ? (
|
||||
<div className="rounded-xl border border-dashed border-slate-200 bg-white p-6 text-sm text-slate-500">
|
||||
No definitions yet. Try searching for a term.
|
||||
No definitions yet. Add terms and search.
|
||||
</div>
|
||||
) : null}
|
||||
|
||||
<div className="space-y-3">
|
||||
{results.map((result) => (
|
||||
<article
|
||||
key={`${result.source}-${result.title}`}
|
||||
className="rounded-xl border border-slate-100 bg-white p-5 shadow-sm"
|
||||
>
|
||||
<h3 className="text-sm font-semibold uppercase tracking-wide text-sky-600">
|
||||
{result.source}
|
||||
</h3>
|
||||
<p className="mt-2 text-lg font-semibold text-slate-900">
|
||||
{result.title}
|
||||
</p>
|
||||
<a
|
||||
href={result.url}
|
||||
target="_blank"
|
||||
rel="noreferrer"
|
||||
className="mt-2 inline-flex text-sm font-medium text-sky-600 hover:text-sky-700"
|
||||
<div className="space-y-6">
|
||||
{orderedTerms.map((term) => {
|
||||
const item = resultsByTerm[term];
|
||||
const definitions = item?.results ?? [];
|
||||
const taxonomy = item?.taxonomy ?? [];
|
||||
const termError = item?.error;
|
||||
|
||||
return (
|
||||
<section
|
||||
key={term}
|
||||
className="space-y-4 rounded-2xl border border-slate-100 bg-white p-5 shadow-sm"
|
||||
>
|
||||
View source
|
||||
</a>
|
||||
<p className="mt-2 text-base text-slate-700">
|
||||
{result.definition}
|
||||
</p>
|
||||
</article>
|
||||
))}
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section className="space-y-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<h2 className="text-xl font-semibold text-slate-800">
|
||||
ISO 14224 Taxonomy
|
||||
</h2>
|
||||
<span className="text-sm text-slate-500">
|
||||
{taxonomy.length} {taxonomy.length === 1 ? "match" : "matches"}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{taxonomy.length === 0 && !loading ? (
|
||||
<div className="rounded-xl border border-dashed border-slate-200 bg-white p-6 text-sm text-slate-500">
|
||||
No taxonomy matches found.
|
||||
</div>
|
||||
) : null}
|
||||
|
||||
<div className="space-y-3">
|
||||
{taxonomy.map((item) => (
|
||||
<article
|
||||
key={`${item.class_code}-${item.type_code ?? "class"}`}
|
||||
className="rounded-xl border border-slate-100 bg-white p-5 shadow-sm"
|
||||
>
|
||||
<p className="text-xs font-semibold uppercase tracking-wide text-emerald-600">
|
||||
{item.category}
|
||||
</p>
|
||||
<h3 className="mt-2 text-lg font-semibold text-slate-900">
|
||||
{item.full_name}
|
||||
</h3>
|
||||
<div className="mt-2 flex flex-wrap gap-3 text-sm text-slate-600">
|
||||
<span>Class: {item.class_name} ({item.class_code})</span>
|
||||
{item.type_description ? (
|
||||
<span>
|
||||
Type: {item.type_description} ({item.type_code})
|
||||
<div className="flex flex-wrap items-center justify-between gap-2">
|
||||
<h3 className="text-lg font-semibold text-slate-900">
|
||||
{term}
|
||||
</h3>
|
||||
<span className="text-xs text-slate-500">
|
||||
{definitions.length} source
|
||||
{definitions.length === 1 ? "" : "s"} · {taxonomy.length}
|
||||
{taxonomy.length === 1 ? " match" : " matches"}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{termError ? (
|
||||
<div className="rounded-xl border border-rose-200 bg-rose-50 p-4 text-sm text-rose-600">
|
||||
{termError}
|
||||
</div>
|
||||
) : null}
|
||||
{item.annex ? <span>Annex: {item.annex}</span> : null}
|
||||
</div>
|
||||
</article>
|
||||
))}
|
||||
|
||||
<div className="space-y-3">
|
||||
<h4 className="text-xs font-semibold uppercase tracking-wide text-slate-500">
|
||||
Definitions
|
||||
</h4>
|
||||
{definitions.length === 0 ? (
|
||||
<div className="rounded-xl border border-dashed border-slate-200 bg-white p-4 text-sm text-slate-500">
|
||||
No definitions found for this term.
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-3">
|
||||
{definitions.map((result) => (
|
||||
<article
|
||||
key={`${term}-${result.source}-${result.title}`}
|
||||
className="rounded-xl border border-slate-100 bg-white p-4 shadow-sm"
|
||||
>
|
||||
<h5 className="text-xs font-semibold uppercase tracking-wide text-sky-600">
|
||||
{result.source}
|
||||
</h5>
|
||||
<p className="mt-2 text-base font-semibold text-slate-900">
|
||||
{result.title}
|
||||
</p>
|
||||
<a
|
||||
href={result.url}
|
||||
target="_blank"
|
||||
rel="noreferrer"
|
||||
className="mt-2 inline-flex text-sm font-medium text-sky-600 hover:text-sky-700"
|
||||
>
|
||||
View source
|
||||
</a>
|
||||
<p className="mt-2 text-sm text-slate-700">
|
||||
{result.definition}
|
||||
</p>
|
||||
</article>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="space-y-3">
|
||||
<h4 className="text-xs font-semibold uppercase tracking-wide text-slate-500">
|
||||
ISO 14224 Taxonomy
|
||||
</h4>
|
||||
{taxonomy.length === 0 ? (
|
||||
<div className="rounded-xl border border-dashed border-slate-200 bg-white p-4 text-sm text-slate-500">
|
||||
No taxonomy matches found for this term.
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-3">
|
||||
{taxonomy.map((item) => (
|
||||
<article
|
||||
key={`${term}-${item.class_code}-${
|
||||
item.type_code ?? "class"
|
||||
}`}
|
||||
className="rounded-xl border border-slate-100 bg-white p-4 shadow-sm"
|
||||
>
|
||||
<p className="text-xs font-semibold uppercase tracking-wide text-emerald-600">
|
||||
{item.category}
|
||||
</p>
|
||||
<h5 className="mt-2 text-base font-semibold text-slate-900">
|
||||
{item.full_name}
|
||||
</h5>
|
||||
<div className="mt-2 flex flex-wrap gap-3 text-sm text-slate-600">
|
||||
<span>
|
||||
Class: {item.class_name} ({item.class_code})
|
||||
</span>
|
||||
{item.type_description ? (
|
||||
<span>
|
||||
Type: {item.type_description} ({item.type_code})
|
||||
</span>
|
||||
) : null}
|
||||
{item.annex ? <span>Annex: {item.annex}</span> : null}
|
||||
</div>
|
||||
</article>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</section>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user