feat: add detailed employee publications storage and MCP docs
This commit is contained in:
@@ -333,7 +333,7 @@ def _load_widget_publications(
|
||||
items = _extract_publication_items(result)
|
||||
if not items:
|
||||
break
|
||||
publications.extend(_normalize_publication_item(item) for item in items)
|
||||
publications.extend(_normalize_publication_item(item, author_id) for item in items)
|
||||
|
||||
total = int(result.get("total") or 0)
|
||||
if not result.get("more") and len(publications) >= total:
|
||||
@@ -575,20 +575,37 @@ def _parse_vkr_items(nodes: list) -> list[str]:
|
||||
return [item for item in dict.fromkeys(items) if item]
|
||||
|
||||
|
||||
def _normalize_publication_item(item: dict) -> dict:
|
||||
def _normalize_publication_item(item: dict, current_author_id: str | None = None) -> dict:
|
||||
publication_id = str(item.get("id") or "").strip()
|
||||
title = _html_to_text(item.get("title"))
|
||||
year = item.get("year")
|
||||
year = _int_or_none(item.get("year"))
|
||||
publication_type = str(item.get("type") or "").strip() or None
|
||||
description = item.get("description") if isinstance(item.get("description"), dict) else {}
|
||||
short_description = _localized_value(description.get("short")) or _localized_value(description.get("shortLeft"))
|
||||
documents = item.get("documents") if isinstance(item.get("documents"), dict) else {}
|
||||
language = item.get("language") if isinstance(item.get("language"), dict) else {}
|
||||
annotation = _localized_text_map(item.get("annotation"))
|
||||
authors = _normalize_publication_authors(item.get("authorsByType"), current_author_id)
|
||||
citation_text = normalize_ws(str(description.get("main") or "")) or _build_publication_citation(title, authors, year)
|
||||
text = normalize_ws(" ".join(part for part in [title, str(year or ""), short_description] if part))
|
||||
return {
|
||||
"id": publication_id or None,
|
||||
"publication_id": publication_id or None,
|
||||
"title": title or publication_id,
|
||||
"year": year,
|
||||
"type": publication_type,
|
||||
"publication_type": publication_type,
|
||||
"language": normalize_ws(language.get("name")) or None,
|
||||
"status": _int_or_none(item.get("status")),
|
||||
"url": f"https://publications.hse.ru/view/{publication_id}" if publication_id else None,
|
||||
"doi_url": _document_href(documents, "DOI"),
|
||||
"other_url": _document_href(documents, "OTHER_URL"),
|
||||
"document_url": _document_href(documents, "DOCUMENT"),
|
||||
"citation_text": citation_text or None,
|
||||
"annotation": annotation,
|
||||
"description": description or None,
|
||||
"authors": authors,
|
||||
"raw_data": item,
|
||||
"text": text or title or publication_id,
|
||||
}
|
||||
|
||||
@@ -685,12 +702,69 @@ def _html_to_text(value: object) -> str:
|
||||
return normalize_ws(BeautifulSoup(str(value or ""), "html.parser").get_text(" ", strip=True))
|
||||
|
||||
|
||||
def _localized_text_map(value: object) -> dict[str, str]:
|
||||
if not isinstance(value, dict):
|
||||
return {}
|
||||
localized = {}
|
||||
for key in ("ru", "en", "publ"):
|
||||
text = _html_to_text(value.get(key))
|
||||
if text:
|
||||
localized[key] = text
|
||||
return localized
|
||||
|
||||
|
||||
def _localized_value(value: object) -> str:
|
||||
if isinstance(value, dict):
|
||||
return normalize_ws(value.get("ru") or value.get("publ") or value.get("en"))
|
||||
return normalize_ws(str(value or ""))
|
||||
|
||||
|
||||
def _normalize_publication_authors(value: object, current_author_id: str | None) -> list[dict]:
|
||||
if not isinstance(value, dict):
|
||||
return []
|
||||
authors = []
|
||||
for author in value.get("author") or []:
|
||||
if not isinstance(author, dict):
|
||||
continue
|
||||
title = author.get("title") if isinstance(author.get("title"), dict) else {}
|
||||
reverse_title = author.get("reverseTitle") if isinstance(author.get("reverseTitle"), dict) else {}
|
||||
author_id = normalize_ws(author.get("id"))
|
||||
href = normalize_ws(author.get("href"))
|
||||
authors.append(
|
||||
{
|
||||
"id": author_id or None,
|
||||
"href": urljoin("https://www.hse.ru", href) if href else None,
|
||||
"title_ru": _html_to_text(title.get("ru")),
|
||||
"title_en": _html_to_text(title.get("en")),
|
||||
"reverse_title_ru": _html_to_text(reverse_title.get("ru")),
|
||||
"reverse_title_en": _html_to_text(reverse_title.get("en")),
|
||||
"alt_name": normalize_ws(author.get("altName")) or None,
|
||||
"other_name": normalize_ws(author.get("otherName")) or None,
|
||||
"is_current_employee": bool(current_author_id and author_id == current_author_id),
|
||||
}
|
||||
)
|
||||
return authors
|
||||
|
||||
|
||||
def _document_href(documents: dict, key: str) -> str | None:
|
||||
document = documents.get(key)
|
||||
if not isinstance(document, dict):
|
||||
return None
|
||||
return normalize_ws(document.get("href")) or None
|
||||
|
||||
|
||||
def _build_publication_citation(title: str, authors: list[dict], year: int | None) -> str:
|
||||
author_names = [author.get("title_ru") or author.get("title_en") or author.get("alt_name") for author in authors]
|
||||
return normalize_ws(". ".join(part for part in [", ".join(filter(None, author_names)), title, str(year or "")] if part))
|
||||
|
||||
|
||||
def _int_or_none(value: object) -> int | None:
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _slugify(value: str) -> str:
|
||||
cleaned = re.sub(r"[^\w\s-]", "", value.lower(), flags=re.UNICODE)
|
||||
return re.sub(r"[-\s]+", "_", cleaned).strip("_") or "section"
|
||||
|
||||
Reference in New Issue
Block a user