feat: add employee news links parsing and storage
This commit is contained in:
@@ -8,7 +8,7 @@ from zoneinfo import ZoneInfo
|
||||
from sqlalchemy import Select, Text, and_, desc, func, or_, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models import CrawlError, CrawlRun, CrawlRunEmployeeChange, Employee
|
||||
from app.models import CrawlError, CrawlRun, CrawlRunEmployeeChange, Employee, EmployeeNewsLink
|
||||
|
||||
EMPLOYEE_SORTS = {
|
||||
"full_name": Employee.full_name,
|
||||
@@ -24,6 +24,7 @@ def employee_display_payload(employee: Employee) -> dict[str, Any]:
|
||||
data = _as_dict(employee.current_data)
|
||||
contacts = _as_dict(data.get("contacts"))
|
||||
sections = _as_list(data.get("sections"))
|
||||
stored_news_links = _stored_news_links(employee)
|
||||
positions = _clean_list(data.get("positions"))
|
||||
emails = _clean_list(contacts.get("emails"))
|
||||
phones = _clean_list(contacts.get("phones"))
|
||||
@@ -43,6 +44,7 @@ def employee_display_payload(employee: Employee) -> dict[str, Any]:
|
||||
"address": contacts.get("address"),
|
||||
"publications_count": _count_section_items(sections, "publications"),
|
||||
"courses_count": _count_section_items(sections, "courses_by_year"),
|
||||
"news_count": len(stored_news_links) or _count_section_items(sections, "news"),
|
||||
"first_seen_at": employee.first_seen_at.isoformat() if employee.first_seen_at else None,
|
||||
"last_seen_at": employee.last_seen_at.isoformat() if employee.last_seen_at else None,
|
||||
"dismissed_at": employee.dismissed_at.isoformat() if employee.dismissed_at else None,
|
||||
@@ -67,6 +69,7 @@ def employee_detail_payload(employee: Employee) -> dict[str, Any]:
|
||||
"contact_items": _normalize_contact_items(contacts.get("items")),
|
||||
},
|
||||
"external_ids": _normalize_external_ids(data.get("external_ids")),
|
||||
"news_links": _detail_news_links(employee, data),
|
||||
"sections": [_normalize_section(section) for section in _as_list(data.get("sections"))],
|
||||
}
|
||||
|
||||
@@ -276,6 +279,8 @@ def _count_section_items(sections: list[dict[str, Any]], section_type: str) -> i
|
||||
total += len(section.get("publications") or section.get("items") or [])
|
||||
elif section_type == "courses_by_year":
|
||||
total += len(section.get("courses") or [])
|
||||
elif section_type == "news":
|
||||
total += len(section.get("news_links") or section.get("items") or [])
|
||||
return total
|
||||
|
||||
|
||||
@@ -348,6 +353,8 @@ def _normalize_section(section: Any) -> dict[str, Any]:
|
||||
"year_entries": _normalize_year_entries(section.get("year_entries")),
|
||||
"publications": _normalize_publications(section.get("publications")),
|
||||
"publications_count": section.get("publications_count"),
|
||||
"news_links": _normalize_news_links(section.get("news_links")),
|
||||
"news_count": section.get("news_count"),
|
||||
"theses": _normalize_theses(section.get("theses")),
|
||||
"theses_count": section.get("theses_count"),
|
||||
"academic_year": section.get("academic_year"),
|
||||
@@ -370,6 +377,77 @@ def _normalize_links(items: Any) -> list[dict[str, str | None]]:
|
||||
return normalized
|
||||
|
||||
|
||||
def _stored_news_links(employee: Employee) -> list[dict[str, Any]]:
|
||||
return [_stored_news_link_payload(item) for item in sorted(employee.news_links, key=_news_link_sort_key)]
|
||||
|
||||
|
||||
def _news_link_sort_key(item: EmployeeNewsLink) -> tuple:
|
||||
timestamp = item.published_at.timestamp() if item.published_at else 0
|
||||
return (-timestamp, item.title or "", item.id)
|
||||
|
||||
|
||||
def _stored_news_link_payload(item: EmployeeNewsLink) -> dict[str, Any]:
|
||||
return {
|
||||
"title": item.title,
|
||||
"url": item.url,
|
||||
"summary": item.summary,
|
||||
"published_at": item.published_at.isoformat() if item.published_at else None,
|
||||
"published_year": item.published_year,
|
||||
"published_display": format_admin_date(item.published_at) if item.published_at else str(item.published_year or ""),
|
||||
}
|
||||
|
||||
|
||||
def _detail_news_links(employee: Employee, data: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
stored = _stored_news_links(employee)
|
||||
if stored:
|
||||
return stored
|
||||
for section in _as_list(data.get("sections")):
|
||||
if isinstance(section, dict) and section.get("type") == "news":
|
||||
return _normalize_news_links(section.get("news_links"))
|
||||
return []
|
||||
|
||||
|
||||
def format_admin_date(value: Any) -> str:
|
||||
if not value:
|
||||
return ""
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
value = datetime.fromisoformat(value.replace("Z", "+00:00"))
|
||||
except ValueError:
|
||||
return value
|
||||
if not isinstance(value, datetime):
|
||||
return str(value)
|
||||
if value.tzinfo:
|
||||
value = value.astimezone(ZoneInfo("Europe/Moscow"))
|
||||
return value.strftime("%d.%m.%Y")
|
||||
|
||||
|
||||
def _normalize_news_links(items: Any) -> list[dict[str, Any]]:
|
||||
normalized = []
|
||||
if not isinstance(items, list):
|
||||
return normalized
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
title = str(item.get("title") or item.get("url") or "").strip()
|
||||
url = str(item.get("url") or "").strip()
|
||||
summary = str(item.get("summary") or "").strip()
|
||||
published_at = str(item.get("published_at") or "").strip()
|
||||
published_year = item.get("published_year")
|
||||
if title or url:
|
||||
normalized.append(
|
||||
{
|
||||
"title": title or url,
|
||||
"url": url or None,
|
||||
"summary": summary or None,
|
||||
"published_at": published_at or None,
|
||||
"published_year": published_year,
|
||||
"published_display": format_admin_date(published_at) if published_at else str(published_year or ""),
|
||||
}
|
||||
)
|
||||
return normalized
|
||||
|
||||
|
||||
def _normalize_year_entries(items: Any) -> list[dict[str, Any]]:
|
||||
normalized = []
|
||||
if not isinstance(items, list):
|
||||
|
||||
Reference in New Issue
Block a user