fix: enrich HSE profile parsing with publications and theses

This commit is contained in:
Anton
2026-04-29 14:15:29 +03:00
parent cf578ce699
commit cc9481fc6c
15 changed files with 418 additions and 11 deletions

View File

@@ -112,7 +112,7 @@ def list_employees_page(
limit: int = 50,
offset: int = 0,
) -> dict[str, Any]:
limit = max(1, min(limit, 200))
limit = limit if limit in {25, 50, 100} else 50
offset = max(0, offset)
base_stmt = build_employee_query(
status=status,
@@ -281,6 +281,8 @@ def _normalize_section(section: Any) -> dict[str, Any]:
"year_entries": _normalize_year_entries(section.get("year_entries")),
"publications": _normalize_publications(section.get("publications")),
"publications_count": section.get("publications_count"),
"theses": _normalize_theses(section.get("theses")),
"theses_count": section.get("theses_count"),
"academic_year": section.get("academic_year"),
"courses": _normalize_courses(section.get("courses")),
"table": _normalize_table(section.get("table")),
@@ -349,6 +351,35 @@ def _normalize_courses(items: Any) -> list[dict[str, str | None]]:
return normalized
def _normalize_theses(items: Any) -> list[dict[str, Any]]:
normalized = []
if not isinstance(items, list):
return normalized
for item in items:
if not isinstance(item, dict):
continue
title = str(item.get("title") or "").strip()
student = str(item.get("student") or "").strip()
if not title and not student:
continue
normalized.append(
{
"id": item.get("id"),
"student": student,
"title": title,
"defense_year": item.get("defense_year") or item.get("year"),
"level": str(item.get("level") or "").strip(),
"rating": item.get("rating"),
"project_url": str(item.get("project_url") or "").strip() or None,
"program": str(item.get("program") or "").strip(),
"program_url": str(item.get("program_url") or "").strip() or None,
"org_unit": str(item.get("org_unit") or "").strip(),
"org_unit_url": str(item.get("org_unit_url") or "").strip() or None,
}
)
return normalized
def _normalize_table(table: Any) -> dict[str, Any] | None:
if not isinstance(table, dict):
return None