initial commit
This commit is contained in:
17
.gitignore
vendored
Normal file
17
.gitignore
vendored
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
# Node
|
||||||
|
node_modules
|
||||||
|
dist
|
||||||
|
pnpm-debug.log*
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
.venv/
|
||||||
|
.pytest_cache/
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
13
.vscode/tasks.json
vendored
Normal file
13
.vscode/tasks.json
vendored
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"version": "2.0.0",
|
||||||
|
"tasks": [
|
||||||
|
{
|
||||||
|
"label": "noop",
|
||||||
|
"type": "shell",
|
||||||
|
"command": "echo",
|
||||||
|
"args": [
|
||||||
|
"noop"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
28
README.md
Normal file
28
README.md
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# TermSearch
|
||||||
|
|
||||||
|
Monorepo with a FastAPI backend and React + Tailwind frontend.
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
- Backend runs on http://localhost:8000
|
||||||
|
- Frontend runs on http://localhost:5173
|
||||||
|
|
||||||
|
### Docker
|
||||||
|
|
||||||
|
Build and start both services:
|
||||||
|
|
||||||
|
- `docker compose up --build`
|
||||||
|
|
||||||
|
### Local (without Docker)
|
||||||
|
|
||||||
|
Backend:
|
||||||
|
|
||||||
|
- `cd backend`
|
||||||
|
- `poetry install`
|
||||||
|
- `poetry run uvicorn app.main:app --reload`
|
||||||
|
|
||||||
|
Frontend:
|
||||||
|
|
||||||
|
- `cd frontend`
|
||||||
|
- `pnpm install`
|
||||||
|
- `pnpm dev`
|
||||||
18
backend/Dockerfile
Normal file
18
backend/Dockerfile
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENV POETRY_VERSION=1.8.3
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir "poetry==$POETRY_VERSION"
|
||||||
|
|
||||||
|
COPY pyproject.toml poetry.lock* /app/
|
||||||
|
|
||||||
|
RUN poetry config virtualenvs.create false \
|
||||||
|
&& poetry install --no-interaction --no-ansi
|
||||||
|
|
||||||
|
COPY . /app
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
CMD ["poetry", "run", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
13
backend/README.md
Normal file
13
backend/README.md
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# TermSearch Backend
|
||||||
|
|
||||||
|
FastAPI service that will provide definitions from oil & gas glossary sources.
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
- `poetry install`
|
||||||
|
- `poetry run uvicorn app.main:app --reload`
|
||||||
|
|
||||||
|
## Endpoints
|
||||||
|
|
||||||
|
- `GET /api/health`
|
||||||
|
- `GET /api/definitions?term=...`
|
||||||
0
backend/app/__init__.py
Normal file
0
backend/app/__init__.py
Normal file
488
backend/app/main.py
Normal file
488
backend/app/main.py
Normal file
@@ -0,0 +1,488 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from functools import lru_cache
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Optional
|
||||||
|
from urllib.parse import quote_plus
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import structlog
|
||||||
|
from asgi_correlation_id import CorrelationIdMiddleware
|
||||||
|
from asgi_correlation_id.context import correlation_id
|
||||||
|
from fastapi import FastAPI, Query, Response
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from selectolax.parser import HTMLParser
|
||||||
|
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
||||||
|
|
||||||
|
|
||||||
|
class Definition(BaseModel):
|
||||||
|
source: str
|
||||||
|
title: str
|
||||||
|
url: str
|
||||||
|
definition: str
|
||||||
|
|
||||||
|
|
||||||
|
class TaxonomyMatch(BaseModel):
|
||||||
|
category: str
|
||||||
|
class_name: str
|
||||||
|
class_code: str
|
||||||
|
type_description: Optional[str] = None
|
||||||
|
type_code: Optional[str] = None
|
||||||
|
annex: Optional[str] = None
|
||||||
|
full_name: str
|
||||||
|
|
||||||
|
|
||||||
|
class DefinitionResponse(BaseModel):
|
||||||
|
term: str
|
||||||
|
results: List[Definition]
|
||||||
|
request_id: Optional[str] = None
|
||||||
|
taxonomy: List[TaxonomyMatch] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(title="TermSearch API", version="0.1.0")
|
||||||
|
|
||||||
|
logging.basicConfig(format="%(message)s", level=logging.INFO)
|
||||||
|
structlog.configure(
|
||||||
|
processors=[
|
||||||
|
structlog.processors.TimeStamper(fmt="iso"),
|
||||||
|
structlog.processors.add_log_level,
|
||||||
|
structlog.processors.EventRenamer("event"),
|
||||||
|
structlog.processors.JSONRenderer(),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
logger = structlog.get_logger("termsearch")
|
||||||
|
|
||||||
|
app.add_middleware(
|
||||||
|
CorrelationIdMiddleware,
|
||||||
|
header_name="X-Request-ID",
|
||||||
|
)
|
||||||
|
|
||||||
|
frontend_origin = os.getenv("FRONTEND_ORIGIN", "http://localhost:5173")
|
||||||
|
allowed_origins = [origin.strip() for origin in frontend_origin.split(",") if origin.strip()]
|
||||||
|
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=allowed_origins,
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"] ,
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_text(text: str) -> str:
|
||||||
|
return " ".join(text.lower().split())
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1)
|
||||||
|
def load_taxonomy() -> dict:
|
||||||
|
root_dir = Path(__file__).resolve().parents[1]
|
||||||
|
tax_path = root_dir / "data" / "iso" / "iso-14244-tax.json"
|
||||||
|
if not tax_path.exists():
|
||||||
|
tax_path = Path("/data/iso/iso-14244-tax.json")
|
||||||
|
with tax_path.open("r", encoding="utf-8") as handle:
|
||||||
|
return json.load(handle)
|
||||||
|
|
||||||
|
|
||||||
|
def find_taxonomy_matches(term: str) -> List[TaxonomyMatch]:
|
||||||
|
normalized_term = normalize_text(term)
|
||||||
|
term_tokens = normalized_term.split()
|
||||||
|
if not term_tokens:
|
||||||
|
return []
|
||||||
|
|
||||||
|
data = load_taxonomy()
|
||||||
|
taxonomy = data.get("taxonomy", {}) if isinstance(data, dict) else {}
|
||||||
|
categories = taxonomy.get("categories", {}) if isinstance(taxonomy, dict) else {}
|
||||||
|
matches: List[TaxonomyMatch] = []
|
||||||
|
|
||||||
|
for category_name, category in categories.items():
|
||||||
|
classes = category.get("classes", {}) if isinstance(category, dict) else {}
|
||||||
|
for class_code, class_info in classes.items():
|
||||||
|
class_name = class_info.get("name", "")
|
||||||
|
annex = class_info.get("annex")
|
||||||
|
types = class_info.get("types", {}) if isinstance(class_info, dict) else {}
|
||||||
|
|
||||||
|
for type_code, type_info in types.items():
|
||||||
|
type_description = type_info.get("description", "")
|
||||||
|
full_name = f"{type_description} {class_name}".strip()
|
||||||
|
full_name_normalized = normalize_text(full_name)
|
||||||
|
|
||||||
|
if all(token in full_name_normalized for token in term_tokens):
|
||||||
|
matches.append(
|
||||||
|
TaxonomyMatch(
|
||||||
|
category=category_name,
|
||||||
|
class_name=class_name,
|
||||||
|
class_code=class_code,
|
||||||
|
type_description=type_description,
|
||||||
|
type_code=type_code,
|
||||||
|
annex=annex,
|
||||||
|
full_name=full_name,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
class_name_normalized = normalize_text(class_name)
|
||||||
|
if class_name and all(token in class_name_normalized for token in term_tokens):
|
||||||
|
matches.append(
|
||||||
|
TaxonomyMatch(
|
||||||
|
category=category_name,
|
||||||
|
class_name=class_name,
|
||||||
|
class_code=class_code,
|
||||||
|
annex=annex,
|
||||||
|
full_name=class_name,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return matches
|
||||||
|
|
||||||
|
|
||||||
|
@retry(
|
||||||
|
retry=retry_if_exception_type((httpx.RequestError, httpx.TimeoutException)),
|
||||||
|
stop=stop_after_attempt(3),
|
||||||
|
wait=wait_exponential(multiplier=0.5, min=0.5, max=4),
|
||||||
|
)
|
||||||
|
async def fetch_text(client: httpx.AsyncClient, url: str) -> str:
|
||||||
|
response = await client.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
|
||||||
|
@retry(
|
||||||
|
retry=retry_if_exception_type((httpx.RequestError, httpx.TimeoutException)),
|
||||||
|
stop=stop_after_attempt(3),
|
||||||
|
wait=wait_exponential(multiplier=0.5, min=0.5, max=4),
|
||||||
|
)
|
||||||
|
async def fetch_json(
|
||||||
|
client: httpx.AsyncClient,
|
||||||
|
url: str,
|
||||||
|
data: dict[str, str],
|
||||||
|
) -> dict:
|
||||||
|
response = await client.post(
|
||||||
|
url,
|
||||||
|
data=data,
|
||||||
|
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
|
||||||
|
@retry(
|
||||||
|
retry=retry_if_exception_type((httpx.RequestError, httpx.TimeoutException)),
|
||||||
|
stop=stop_after_attempt(3),
|
||||||
|
wait=wait_exponential(multiplier=0.5, min=0.5, max=4),
|
||||||
|
)
|
||||||
|
async def fetch_json_get(client: httpx.AsyncClient, url: str) -> dict:
|
||||||
|
response = await client.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
|
||||||
|
async def scrape_dicionario_first(term: str) -> Optional[Definition]:
|
||||||
|
search_url = f"https://dicionariopetroleoegas.com.br/?s={quote_plus(term)}"
|
||||||
|
timeout = httpx.Timeout(10.0, connect=5.0)
|
||||||
|
start_time = time.perf_counter()
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
|
||||||
|
search_html = await fetch_text(client, search_url)
|
||||||
|
search_parser = HTMLParser(search_html)
|
||||||
|
first_link = search_parser.css_first("div.definitionlist a")
|
||||||
|
|
||||||
|
if not first_link:
|
||||||
|
logger.info(
|
||||||
|
"no_results",
|
||||||
|
source="dicionariopetroleoegas",
|
||||||
|
term=term,
|
||||||
|
url=search_url,
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
detail_url = first_link.attributes.get("href")
|
||||||
|
if not detail_url:
|
||||||
|
logger.warning(
|
||||||
|
"missing_detail_url",
|
||||||
|
source="dicionariopetroleoegas",
|
||||||
|
term=term,
|
||||||
|
url=search_url,
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
detail_html = await fetch_text(client, detail_url)
|
||||||
|
detail_parser = HTMLParser(detail_html)
|
||||||
|
article_node = detail_parser.css_first("div.maincontent article")
|
||||||
|
if not article_node:
|
||||||
|
logger.warning(
|
||||||
|
"missing_article",
|
||||||
|
source="dicionariopetroleoegas",
|
||||||
|
term=term,
|
||||||
|
url=detail_url,
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
definition_text = " ".join(article_node.text().split())
|
||||||
|
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"scrape_success",
|
||||||
|
source="dicionariopetroleoegas",
|
||||||
|
term=term,
|
||||||
|
url=detail_url,
|
||||||
|
elapsed_ms=round(elapsed_ms, 2),
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
|
||||||
|
title = first_link.attributes.get("title") or term
|
||||||
|
|
||||||
|
return Definition(
|
||||||
|
source="dicionariopetroleoegas",
|
||||||
|
title=title,
|
||||||
|
url=detail_url,
|
||||||
|
definition=definition_text,
|
||||||
|
)
|
||||||
|
except httpx.HTTPStatusError as exc:
|
||||||
|
logger.warning(
|
||||||
|
"http_status_error",
|
||||||
|
source="dicionariopetroleoegas",
|
||||||
|
term=term,
|
||||||
|
url=str(exc.request.url),
|
||||||
|
status_code=exc.response.status_code,
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
except httpx.RequestError as exc:
|
||||||
|
logger.warning(
|
||||||
|
"network_error",
|
||||||
|
source="dicionariopetroleoegas",
|
||||||
|
term=term,
|
||||||
|
url=str(exc.request.url) if exc.request else search_url,
|
||||||
|
error=str(exc),
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def scrape_slb_first(term: str) -> Optional[Definition]:
|
||||||
|
search_url = "https://glossary.slb.com/coveo/rest/search/v2?siteName=OilfieldGlossary"
|
||||||
|
payload = {
|
||||||
|
"q": term,
|
||||||
|
"aq": "(@z95xpath==28F6D9B16B684F7C9BE6937026AB0B6B)",
|
||||||
|
"searchHub": "OilfieldGlossarySearchPage",
|
||||||
|
"locale": "en",
|
||||||
|
"pipeline": "SLBCom",
|
||||||
|
"numberOfResults": "12",
|
||||||
|
}
|
||||||
|
timeout = httpx.Timeout(10.0, connect=5.0)
|
||||||
|
start_time = time.perf_counter()
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
|
||||||
|
search_json = await fetch_json(client, search_url, payload)
|
||||||
|
results = search_json.get("results", []) if isinstance(search_json, dict) else []
|
||||||
|
if not results:
|
||||||
|
logger.info(
|
||||||
|
"no_results",
|
||||||
|
source="slb-glossary",
|
||||||
|
term=term,
|
||||||
|
url=search_url,
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
first_result = results[0]
|
||||||
|
detail_url = first_result.get("printableUri") or first_result.get("clickUri")
|
||||||
|
if not detail_url:
|
||||||
|
logger.warning(
|
||||||
|
"missing_detail_url",
|
||||||
|
source="slb-glossary",
|
||||||
|
term=term,
|
||||||
|
url=search_url,
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
detail_html = await fetch_text(client, detail_url)
|
||||||
|
detail_parser = HTMLParser(detail_html)
|
||||||
|
content_node = detail_parser.css_first("div.content-two-col__text")
|
||||||
|
if not content_node:
|
||||||
|
logger.warning(
|
||||||
|
"missing_article",
|
||||||
|
source="slb-glossary",
|
||||||
|
term=term,
|
||||||
|
url=detail_url,
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
definition_text = " ".join(content_node.text().split())
|
||||||
|
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"scrape_success",
|
||||||
|
source="slb-glossary",
|
||||||
|
term=term,
|
||||||
|
url=detail_url,
|
||||||
|
elapsed_ms=round(elapsed_ms, 2),
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
|
||||||
|
raw = first_result.get("raw", {}) if isinstance(first_result, dict) else {}
|
||||||
|
title = raw.get("mainz32xtitle") or term
|
||||||
|
|
||||||
|
return Definition(
|
||||||
|
source="slb-glossary",
|
||||||
|
title=title,
|
||||||
|
url=detail_url,
|
||||||
|
definition=definition_text,
|
||||||
|
)
|
||||||
|
except httpx.HTTPStatusError as exc:
|
||||||
|
logger.warning(
|
||||||
|
"http_status_error",
|
||||||
|
source="slb-glossary",
|
||||||
|
term=term,
|
||||||
|
url=str(exc.request.url),
|
||||||
|
status_code=exc.response.status_code,
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
except httpx.RequestError as exc:
|
||||||
|
logger.warning(
|
||||||
|
"network_error",
|
||||||
|
source="slb-glossary",
|
||||||
|
term=term,
|
||||||
|
url=str(exc.request.url) if exc.request else search_url,
|
||||||
|
error=str(exc),
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def scrape_merriam_first(term: str) -> Optional[Definition]:
|
||||||
|
search_url = (
|
||||||
|
"https://www.merriam-webster.com/lapi/v1/mwol-search/autocomplete"
|
||||||
|
f"?search={quote_plus(term)}"
|
||||||
|
)
|
||||||
|
timeout = httpx.Timeout(10.0, connect=5.0)
|
||||||
|
start_time = time.perf_counter()
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
|
||||||
|
search_json = await fetch_json_get(client, search_url)
|
||||||
|
docs = search_json.get("docs", []) if isinstance(search_json, dict) else []
|
||||||
|
if not docs:
|
||||||
|
logger.info(
|
||||||
|
"no_results",
|
||||||
|
source="merriam-webster",
|
||||||
|
term=term,
|
||||||
|
url=search_url,
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
first_doc = docs[0] if isinstance(docs[0], dict) else {}
|
||||||
|
slug = first_doc.get("slug")
|
||||||
|
title = first_doc.get("word") or term
|
||||||
|
if not slug:
|
||||||
|
logger.warning(
|
||||||
|
"missing_detail_url",
|
||||||
|
source="merriam-webster",
|
||||||
|
term=term,
|
||||||
|
url=search_url,
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
detail_url = f"https://www.merriam-webster.com{slug}"
|
||||||
|
detail_html = await fetch_text(client, detail_url)
|
||||||
|
detail_parser = HTMLParser(detail_html)
|
||||||
|
|
||||||
|
content_node = detail_parser.css_first("span.dtText")
|
||||||
|
if not content_node:
|
||||||
|
logger.warning(
|
||||||
|
"missing_article",
|
||||||
|
source="merriam-webster",
|
||||||
|
term=term,
|
||||||
|
url=detail_url,
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
definition_text = " ".join(content_node.text().split())
|
||||||
|
definition_text = definition_text.lstrip(":").strip()
|
||||||
|
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"scrape_success",
|
||||||
|
source="merriam-webster",
|
||||||
|
term=term,
|
||||||
|
url=detail_url,
|
||||||
|
elapsed_ms=round(elapsed_ms, 2),
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
|
||||||
|
return Definition(
|
||||||
|
source="merriam-webster",
|
||||||
|
title=title,
|
||||||
|
url=detail_url,
|
||||||
|
definition=definition_text,
|
||||||
|
)
|
||||||
|
except httpx.HTTPStatusError as exc:
|
||||||
|
logger.warning(
|
||||||
|
"http_status_error",
|
||||||
|
source="merriam-webster",
|
||||||
|
term=term,
|
||||||
|
url=str(exc.request.url),
|
||||||
|
status_code=exc.response.status_code,
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
except httpx.RequestError as exc:
|
||||||
|
logger.warning(
|
||||||
|
"network_error",
|
||||||
|
source="merriam-webster",
|
||||||
|
term=term,
|
||||||
|
url=str(exc.request.url) if exc.request else search_url,
|
||||||
|
error=str(exc),
|
||||||
|
request_id=correlation_id.get(),
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/health")
|
||||||
|
def health() -> dict:
|
||||||
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/definitions", response_model=DefinitionResponse)
|
||||||
|
async def get_definitions(
|
||||||
|
response: Response,
|
||||||
|
term: str = Query(min_length=1),
|
||||||
|
) -> DefinitionResponse:
|
||||||
|
request_id = correlation_id.get()
|
||||||
|
if request_id:
|
||||||
|
response.headers["X-Request-ID"] = request_id
|
||||||
|
|
||||||
|
results = [
|
||||||
|
result
|
||||||
|
for result in await asyncio.gather(
|
||||||
|
scrape_dicionario_first(term),
|
||||||
|
scrape_slb_first(term),
|
||||||
|
scrape_merriam_first(term),
|
||||||
|
)
|
||||||
|
if result
|
||||||
|
]
|
||||||
|
|
||||||
|
taxonomy = find_taxonomy_matches(term)
|
||||||
|
|
||||||
|
return DefinitionResponse(
|
||||||
|
term=term,
|
||||||
|
results=results,
|
||||||
|
request_id=request_id,
|
||||||
|
taxonomy=taxonomy,
|
||||||
|
)
|
||||||
24
backend/pyproject.toml
Normal file
24
backend/pyproject.toml
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
[tool.poetry]
|
||||||
|
name = "termsearch-backend"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "FastAPI backend for TermSearch"
|
||||||
|
authors = ["Your Name <you@example.com>"]
|
||||||
|
readme = "README.md"
|
||||||
|
packages = [{ include = "app" }]
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.12"
|
||||||
|
fastapi = "^0.111.0"
|
||||||
|
uvicorn = { extras = ["standard"], version = "^0.30.0" }
|
||||||
|
httpx = "^0.27.0"
|
||||||
|
selectolax = "^0.3.21"
|
||||||
|
tenacity = "^8.3.0"
|
||||||
|
structlog = "^24.4.0"
|
||||||
|
asgi-correlation-id = "^4.3.1"
|
||||||
|
|
||||||
|
[tool.poetry.group.dev.dependencies]
|
||||||
|
ruff = "^0.5.0"
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
||||||
4290
data/iso/iso-14244-tax.json
Normal file
4290
data/iso/iso-14244-tax.json
Normal file
File diff suppressed because it is too large
Load Diff
24
docker-compose.yml
Normal file
24
docker-compose.yml
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
services:
|
||||||
|
backend:
|
||||||
|
build: ./backend
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
volumes:
|
||||||
|
- ./backend:/app
|
||||||
|
- ./data:/app/data
|
||||||
|
environment:
|
||||||
|
- FRONTEND_ORIGIN=http://localhost:5173
|
||||||
|
command: poetry run uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
|
||||||
|
|
||||||
|
frontend:
|
||||||
|
build: ./frontend
|
||||||
|
ports:
|
||||||
|
- "5173:5173"
|
||||||
|
volumes:
|
||||||
|
- ./frontend:/app
|
||||||
|
- /app/node_modules
|
||||||
|
environment:
|
||||||
|
- VITE_API_BASE_URL=http://localhost:8000
|
||||||
|
command: pnpm dev --host 0.0.0.0 --port 5173
|
||||||
|
depends_on:
|
||||||
|
- backend
|
||||||
15
frontend/Dockerfile
Normal file
15
frontend/Dockerfile
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
FROM node:20-alpine
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN corepack enable && corepack prepare pnpm@9.12.3 --activate
|
||||||
|
|
||||||
|
COPY package.json pnpm-lock.yaml* /app/
|
||||||
|
|
||||||
|
RUN pnpm install
|
||||||
|
|
||||||
|
COPY . /app
|
||||||
|
|
||||||
|
EXPOSE 5173
|
||||||
|
|
||||||
|
CMD ["pnpm", "dev", "--host", "0.0.0.0", "--port", "5173"]
|
||||||
12
frontend/index.html
Normal file
12
frontend/index.html
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
|
<title>TermSearch</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="root"></div>
|
||||||
|
<script type="module" src="/src/main.tsx"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
25
frontend/package.json
Normal file
25
frontend/package.json
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"name": "termsearch-frontend",
|
||||||
|
"private": true,
|
||||||
|
"version": "0.1.0",
|
||||||
|
"type": "module",
|
||||||
|
"scripts": {
|
||||||
|
"dev": "vite",
|
||||||
|
"build": "vite build",
|
||||||
|
"preview": "vite preview"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"react": "^18.3.1",
|
||||||
|
"react-dom": "^18.3.1"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/react": "^18.3.3",
|
||||||
|
"@types/react-dom": "^18.3.0",
|
||||||
|
"@vitejs/plugin-react": "^4.3.1",
|
||||||
|
"autoprefixer": "^10.4.20",
|
||||||
|
"postcss": "^8.4.41",
|
||||||
|
"tailwindcss": "^3.4.10",
|
||||||
|
"typescript": "^5.5.4",
|
||||||
|
"vite": "^5.4.2"
|
||||||
|
}
|
||||||
|
}
|
||||||
6
frontend/postcss.config.cjs
Normal file
6
frontend/postcss.config.cjs
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
module.exports = {
|
||||||
|
plugins: {
|
||||||
|
tailwindcss: {},
|
||||||
|
autoprefixer: {},
|
||||||
|
},
|
||||||
|
};
|
||||||
204
frontend/src/App.tsx
Normal file
204
frontend/src/App.tsx
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
import { useMemo, useState } from "react";
|
||||||
|
|
||||||
|
type Definition = {
|
||||||
|
source: string;
|
||||||
|
title: string;
|
||||||
|
url: string;
|
||||||
|
definition: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type DefinitionResponse = {
|
||||||
|
term: string;
|
||||||
|
results: Definition[];
|
||||||
|
taxonomy?: TaxonomyMatch[];
|
||||||
|
};
|
||||||
|
|
||||||
|
type TaxonomyMatch = {
|
||||||
|
category: string;
|
||||||
|
class_name: string;
|
||||||
|
class_code: string;
|
||||||
|
type_description?: string | null;
|
||||||
|
type_code?: string | null;
|
||||||
|
annex?: string | null;
|
||||||
|
full_name: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
const API_BASE_URL =
|
||||||
|
import.meta.env.VITE_API_BASE_URL?.toString() || "http://localhost:8000";
|
||||||
|
|
||||||
|
export default function App() {
|
||||||
|
const [term, setTerm] = useState("");
|
||||||
|
const [results, setResults] = useState<Definition[]>([]);
|
||||||
|
const [taxonomy, setTaxonomy] = useState<TaxonomyMatch[]>([]);
|
||||||
|
const [loading, setLoading] = useState(false);
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
|
||||||
|
const canSearch = term.trim().length > 0 && !loading;
|
||||||
|
|
||||||
|
const apiUrl = useMemo(() => {
|
||||||
|
const url = new URL("/api/definitions", API_BASE_URL);
|
||||||
|
url.searchParams.set("term", term.trim());
|
||||||
|
return url.toString();
|
||||||
|
}, [term]);
|
||||||
|
|
||||||
|
const handleSearch = async (event: React.FormEvent) => {
|
||||||
|
event.preventDefault();
|
||||||
|
|
||||||
|
if (!canSearch) return;
|
||||||
|
|
||||||
|
setLoading(true);
|
||||||
|
setError(null);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(apiUrl);
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error("Failed to fetch definitions.");
|
||||||
|
}
|
||||||
|
const data = (await response.json()) as DefinitionResponse;
|
||||||
|
setResults(data.results ?? []);
|
||||||
|
setTaxonomy(data.taxonomy ?? []);
|
||||||
|
} catch (err) {
|
||||||
|
setError(err instanceof Error ? err.message : "Something went wrong.");
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="min-h-screen px-6 py-12">
|
||||||
|
<div className="mx-auto flex w-full max-w-3xl flex-col gap-8">
|
||||||
|
<header className="space-y-3">
|
||||||
|
<p className="text-sm font-semibold uppercase tracking-wide text-sky-600">
|
||||||
|
TermSearch
|
||||||
|
</p>
|
||||||
|
<h1 className="text-4xl font-semibold text-slate-900">
|
||||||
|
Oil & Gas term definitions
|
||||||
|
</h1>
|
||||||
|
<p className="text-base text-slate-600">
|
||||||
|
Search multiple glossary sources from a single interface.
|
||||||
|
</p>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<form
|
||||||
|
onSubmit={handleSearch}
|
||||||
|
className="flex flex-col gap-4 rounded-2xl bg-white p-6 shadow-sm"
|
||||||
|
>
|
||||||
|
<label className="text-sm font-medium text-slate-700" htmlFor="term">
|
||||||
|
Search term
|
||||||
|
</label>
|
||||||
|
<div className="flex flex-col gap-3 sm:flex-row">
|
||||||
|
<input
|
||||||
|
id="term"
|
||||||
|
name="term"
|
||||||
|
type="text"
|
||||||
|
value={term}
|
||||||
|
onChange={(event) => setTerm(event.target.value)}
|
||||||
|
placeholder="Ex: gas lift"
|
||||||
|
className="flex-1 rounded-xl border border-slate-200 px-4 py-3 text-base focus:border-sky-500 focus:outline-none focus:ring-2 focus:ring-sky-200"
|
||||||
|
/>
|
||||||
|
<button
|
||||||
|
type="submit"
|
||||||
|
disabled={!canSearch}
|
||||||
|
className="rounded-xl bg-sky-600 px-6 py-3 text-base font-semibold text-white transition hover:bg-sky-700 disabled:cursor-not-allowed disabled:bg-slate-300"
|
||||||
|
>
|
||||||
|
{loading ? "Searching..." : "Search"}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<p className="text-xs text-slate-500">
|
||||||
|
API base: <span className="font-medium">{API_BASE_URL}</span>
|
||||||
|
</p>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<section className="space-y-4">
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<h2 className="text-xl font-semibold text-slate-800">Results</h2>
|
||||||
|
<span className="text-sm text-slate-500">
|
||||||
|
{results.length} {results.length === 1 ? "source" : "sources"}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{error ? (
|
||||||
|
<div className="rounded-xl border border-rose-200 bg-rose-50 p-4 text-sm text-rose-600">
|
||||||
|
{error}
|
||||||
|
</div>
|
||||||
|
) : null}
|
||||||
|
|
||||||
|
{results.length === 0 && !loading ? (
|
||||||
|
<div className="rounded-xl border border-dashed border-slate-200 bg-white p-6 text-sm text-slate-500">
|
||||||
|
No definitions yet. Try searching for a term.
|
||||||
|
</div>
|
||||||
|
) : null}
|
||||||
|
|
||||||
|
<div className="space-y-3">
|
||||||
|
{results.map((result) => (
|
||||||
|
<article
|
||||||
|
key={`${result.source}-${result.title}`}
|
||||||
|
className="rounded-xl border border-slate-100 bg-white p-5 shadow-sm"
|
||||||
|
>
|
||||||
|
<h3 className="text-sm font-semibold uppercase tracking-wide text-sky-600">
|
||||||
|
{result.source}
|
||||||
|
</h3>
|
||||||
|
<p className="mt-2 text-lg font-semibold text-slate-900">
|
||||||
|
{result.title}
|
||||||
|
</p>
|
||||||
|
<a
|
||||||
|
href={result.url}
|
||||||
|
target="_blank"
|
||||||
|
rel="noreferrer"
|
||||||
|
className="mt-2 inline-flex text-sm font-medium text-sky-600 hover:text-sky-700"
|
||||||
|
>
|
||||||
|
View source
|
||||||
|
</a>
|
||||||
|
<p className="mt-2 text-base text-slate-700">
|
||||||
|
{result.definition}
|
||||||
|
</p>
|
||||||
|
</article>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section className="space-y-4">
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<h2 className="text-xl font-semibold text-slate-800">
|
||||||
|
ISO 14224 Taxonomy
|
||||||
|
</h2>
|
||||||
|
<span className="text-sm text-slate-500">
|
||||||
|
{taxonomy.length} {taxonomy.length === 1 ? "match" : "matches"}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{taxonomy.length === 0 && !loading ? (
|
||||||
|
<div className="rounded-xl border border-dashed border-slate-200 bg-white p-6 text-sm text-slate-500">
|
||||||
|
No taxonomy matches found.
|
||||||
|
</div>
|
||||||
|
) : null}
|
||||||
|
|
||||||
|
<div className="space-y-3">
|
||||||
|
{taxonomy.map((item) => (
|
||||||
|
<article
|
||||||
|
key={`${item.class_code}-${item.type_code ?? "class"}`}
|
||||||
|
className="rounded-xl border border-slate-100 bg-white p-5 shadow-sm"
|
||||||
|
>
|
||||||
|
<p className="text-xs font-semibold uppercase tracking-wide text-emerald-600">
|
||||||
|
{item.category}
|
||||||
|
</p>
|
||||||
|
<h3 className="mt-2 text-lg font-semibold text-slate-900">
|
||||||
|
{item.full_name}
|
||||||
|
</h3>
|
||||||
|
<div className="mt-2 flex flex-wrap gap-3 text-sm text-slate-600">
|
||||||
|
<span>Class: {item.class_name} ({item.class_code})</span>
|
||||||
|
{item.type_description ? (
|
||||||
|
<span>
|
||||||
|
Type: {item.type_description} ({item.type_code})
|
||||||
|
</span>
|
||||||
|
) : null}
|
||||||
|
{item.annex ? <span>Annex: {item.annex}</span> : null}
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
11
frontend/src/index.css
Normal file
11
frontend/src/index.css
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
@tailwind base;
|
||||||
|
@tailwind components;
|
||||||
|
@tailwind utilities;
|
||||||
|
|
||||||
|
:root {
|
||||||
|
color-scheme: light;
|
||||||
|
}
|
||||||
|
|
||||||
|
body {
|
||||||
|
@apply bg-slate-50 text-slate-900;
|
||||||
|
}
|
||||||
10
frontend/src/main.tsx
Normal file
10
frontend/src/main.tsx
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
import React from "react";
|
||||||
|
import ReactDOM from "react-dom/client";
|
||||||
|
import App from "./App";
|
||||||
|
import "./index.css";
|
||||||
|
|
||||||
|
ReactDOM.createRoot(document.getElementById("root")!).render(
|
||||||
|
<React.StrictMode>
|
||||||
|
<App />
|
||||||
|
</React.StrictMode>
|
||||||
|
);
|
||||||
8
frontend/tailwind.config.js
Normal file
8
frontend/tailwind.config.js
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
/** @type {import('tailwindcss').Config} */
|
||||||
|
export default {
|
||||||
|
content: ["./index.html", "./src/**/*.{ts,tsx}"],
|
||||||
|
theme: {
|
||||||
|
extend: {},
|
||||||
|
},
|
||||||
|
plugins: [],
|
||||||
|
};
|
||||||
17
frontend/tsconfig.json
Normal file
17
frontend/tsconfig.json
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ES2020",
|
||||||
|
"useDefineForClassFields": true,
|
||||||
|
"lib": ["ES2020", "DOM", "DOM.Iterable"],
|
||||||
|
"module": "ESNext",
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"moduleResolution": "Bundler",
|
||||||
|
"allowImportingTsExtensions": true,
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"isolatedModules": true,
|
||||||
|
"noEmit": true,
|
||||||
|
"jsx": "react-jsx",
|
||||||
|
"strict": true
|
||||||
|
},
|
||||||
|
"include": ["src"]
|
||||||
|
}
|
||||||
9
frontend/tsconfig.node.json
Normal file
9
frontend/tsconfig.node.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"composite": true,
|
||||||
|
"module": "ESNext",
|
||||||
|
"moduleResolution": "Bundler",
|
||||||
|
"allowSyntheticDefaultImports": true
|
||||||
|
},
|
||||||
|
"include": ["vite.config.ts"]
|
||||||
|
}
|
||||||
10
frontend/vite.config.ts
Normal file
10
frontend/vite.config.ts
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
import { defineConfig } from "vite";
|
||||||
|
import react from "@vitejs/plugin-react";
|
||||||
|
|
||||||
|
export default defineConfig({
|
||||||
|
plugins: [react()],
|
||||||
|
server: {
|
||||||
|
host: true,
|
||||||
|
port: 5173,
|
||||||
|
},
|
||||||
|
});
|
||||||
6
package.json
Normal file
6
package.json
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"name": "termsearch",
|
||||||
|
"private": true,
|
||||||
|
"version": "0.1.0",
|
||||||
|
"packageManager": "pnpm@9.12.3"
|
||||||
|
}
|
||||||
3
pnpm-workspace.yaml
Normal file
3
pnpm-workspace.yaml
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
packages:
|
||||||
|
- "frontend"
|
||||||
|
- "packages/*"
|
||||||
Reference in New Issue
Block a user