feat: add employee news links parsing and storage
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from app.models import CrawlError, CrawlRun, CrawlRunEmployeeChange, Employee
|
||||
from app.models import CrawlError, CrawlRun, CrawlRunEmployeeChange, Employee, EmployeeNewsLink
|
||||
from app.services.admin_data import (
|
||||
employee_detail_payload,
|
||||
employee_display_payload,
|
||||
@@ -35,6 +35,7 @@ def test_employee_display_payload_extracts_common_fields(db_session):
|
||||
"sections": [
|
||||
{"type": "publications", "publications": [{"title": "Paper"}]},
|
||||
{"type": "courses_by_year", "courses": [{"title": "Course"}]},
|
||||
{"type": "news", "news_links": [{"title": "News", "url": "https://example.test/news"}]},
|
||||
],
|
||||
},
|
||||
)
|
||||
@@ -46,6 +47,7 @@ def test_employee_display_payload_extracts_common_fields(db_session):
|
||||
assert payload["email_text"] == "person@hse.ru"
|
||||
assert payload["publications_count"] == 1
|
||||
assert payload["courses_count"] == 1
|
||||
assert payload["news_count"] == 1
|
||||
assert payload["first_seen_display"] != "Не указано"
|
||||
|
||||
|
||||
@@ -104,6 +106,19 @@ def test_employee_detail_payload_normalizes_human_readable_sections(db_session):
|
||||
"type": "generic",
|
||||
"raw_text": "Fallback text",
|
||||
},
|
||||
{
|
||||
"title": "В новостях",
|
||||
"type": "news",
|
||||
"news_links": [
|
||||
{
|
||||
"title": "News title",
|
||||
"url": "https://example.test/news",
|
||||
"summary": "News summary",
|
||||
"published_at": "2026-04-28T00:00:00+00:00",
|
||||
"published_year": 2026,
|
||||
}
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
@@ -118,6 +133,41 @@ def test_employee_detail_payload_normalizes_human_readable_sections(db_session):
|
||||
assert payload["sections"][2]["courses"][0]["title"] == "Course"
|
||||
assert payload["sections"][3]["theses"][0]["student"] == "Student Name"
|
||||
assert payload["sections"][4]["paragraphs"] == ["Fallback text"]
|
||||
assert payload["sections"][5]["news_links"][0]["title"] == "News title"
|
||||
assert payload["news_links"][0]["published_display"] == "28.04.2026"
|
||||
|
||||
|
||||
def test_employee_payload_prefers_stored_news_links(db_session):
|
||||
employee = Employee(
|
||||
profile_key="staff:news",
|
||||
canonical_url="https://www.hse.ru/staff/news",
|
||||
full_name="News Person",
|
||||
status="active",
|
||||
first_seen_at=datetime.now(timezone.utc),
|
||||
last_seen_at=datetime.now(timezone.utc),
|
||||
current_data={"sections": [{"type": "news", "news_links": [{"title": "Old news"}]}]},
|
||||
)
|
||||
db_session.add(employee)
|
||||
db_session.commit()
|
||||
db_session.add(
|
||||
EmployeeNewsLink(
|
||||
employee_id=employee.id,
|
||||
title="Stored news",
|
||||
url="https://example.test/stored",
|
||||
summary="Stored summary",
|
||||
published_at=datetime(2026, 4, 28, tzinfo=timezone.utc),
|
||||
published_year=2026,
|
||||
source_hash="b" * 64,
|
||||
)
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
display = employee_display_payload(employee)
|
||||
detail = employee_detail_payload(employee)
|
||||
|
||||
assert display["news_count"] == 1
|
||||
assert detail["news_links"][0]["title"] == "Stored news"
|
||||
assert detail["news_links"][0]["published_display"] == "28.04.2026"
|
||||
|
||||
|
||||
def test_employee_payloads_tolerate_malformed_current_data(db_session):
|
||||
|
||||
Reference in New Issue
Block a user