Files
miem_workers/tests/test_admin_data.py

302 lines
11 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from datetime import datetime, timezone
from app.models import CrawlError, CrawlRun, CrawlRunEmployeeChange, Employee, EmployeeNewsLink
from app.services.admin_data import (
employee_detail_payload,
employee_display_payload,
format_admin_datetime,
list_employees_page,
run_detail_payload,
run_payload,
stats_payload,
)
def test_format_admin_datetime_handles_datetime_string_and_none():
value = datetime(2026, 4, 28, 17, 13, 34, tzinfo=timezone.utc)
assert format_admin_datetime(value) == "28.04.2026 20:13"
assert format_admin_datetime("2026-04-28T17:13:34.448605+00:00") == "28.04.2026 20:13"
assert format_admin_datetime(None) == "Не указано"
def test_employee_display_payload_extracts_common_fields(db_session):
employee = Employee(
profile_key="staff:person",
canonical_url="https://www.hse.ru/staff/person",
full_name="Person Name",
status="active",
first_seen_at=datetime.now(timezone.utc),
last_seen_at=datetime.now(timezone.utc),
current_data={
"positions": ["Professor"],
"hse_start_year": 2024,
"contacts": {"emails": ["person@hse.ru"], "phones": ["+79990000000"], "address": "Moscow"},
"sections": [
{"type": "publications", "publications": [{"title": "Paper"}]},
{"type": "courses_by_year", "courses": [{"title": "Course"}]},
{"type": "news", "news_links": [{"title": "News", "url": "https://example.test/news"}]},
],
},
)
payload = employee_display_payload(employee)
assert payload["positions_text"] == "Professor"
assert payload["status_display"] == "Работает"
assert payload["email_text"] == "person@hse.ru"
assert payload["publications_count"] == 1
assert payload["courses_count"] == 1
assert payload["news_count"] == 1
assert payload["first_seen_display"] != "Не указано"
def test_employee_detail_payload_normalizes_human_readable_sections(db_session):
employee = Employee(
profile_key="staff:person",
profile_type="staff",
profile_id="person",
canonical_url="https://www.hse.ru/staff/person",
full_name="Person Name",
status="active",
first_seen_at=datetime.now(timezone.utc),
last_seen_at=datetime.now(timezone.utc),
current_data={
"positions": ["Professor"],
"hse_start_year": 2024,
"contacts": {
"emails": ["person@hse.ru"],
"phones": ["+79990000000"],
"address": "Moscow",
"items": [{"raw": "consultation hours"}],
},
"external_ids": [{"system": "ORCID", "value": "0000", "url": "https://orcid.org/0000"}],
"sections": [
{
"title": "Education",
"type": "year_blocks",
"year_entries": [{"year": 2020, "text": "Master degree"}],
},
{
"title": "Publications",
"type": "publications",
"publications": [{"title": "Paper", "text": "Paper details", "url": "https://example.test/paper"}],
},
{
"title": "Courses",
"type": "courses_by_year",
"academic_year": "2025/2026",
"courses": [{"title": "Course", "url": "https://example.test/course"}],
},
{
"title": "ВКР",
"type": "graduation_theses",
"theses_count": 1,
"theses": [
{
"student": "Student Name",
"title": "Thesis title",
"defense_year": 2025,
"project_url": "https://www.hse.ru/edu/vkr/1",
}
],
},
{
"title": "Fallback",
"type": "generic",
"raw_text": "Fallback text",
},
{
"title": "В новостях",
"type": "news",
"news_links": [
{
"title": "News title",
"url": "https://example.test/news",
"summary": "News summary",
"published_at": "2026-04-28T00:00:00+00:00",
"published_year": 2026,
}
],
},
],
},
)
payload = employee_detail_payload(employee)
assert payload["contacts"]["emails"] == ["person@hse.ru"]
assert payload["contacts"]["contact_items"] == ["consultation hours"]
assert payload["external_ids"][0]["system"] == "ORCID"
assert payload["sections"][0]["year_entries"][0]["text"] == "Master degree"
assert payload["sections"][1]["publications"][0]["title"] == "Paper"
assert payload["sections"][2]["courses"][0]["title"] == "Course"
assert payload["sections"][3]["theses"][0]["student"] == "Student Name"
assert payload["sections"][4]["paragraphs"] == ["Fallback text"]
assert payload["sections"][5]["news_links"][0]["title"] == "News title"
assert payload["news_links"][0]["published_display"] == "28.04.2026"
def test_employee_payload_prefers_stored_news_links(db_session):
employee = Employee(
profile_key="staff:news",
canonical_url="https://www.hse.ru/staff/news",
full_name="News Person",
status="active",
first_seen_at=datetime.now(timezone.utc),
last_seen_at=datetime.now(timezone.utc),
current_data={"sections": [{"type": "news", "news_links": [{"title": "Old news"}]}]},
)
db_session.add(employee)
db_session.commit()
db_session.add(
EmployeeNewsLink(
employee_id=employee.id,
title="Stored news",
url="https://example.test/stored",
summary="Stored summary",
published_at=datetime(2026, 4, 28, tzinfo=timezone.utc),
published_year=2026,
source_hash="b" * 64,
)
)
db_session.commit()
display = employee_display_payload(employee)
detail = employee_detail_payload(employee)
assert display["news_count"] == 1
assert detail["news_links"][0]["title"] == "Stored news"
assert detail["news_links"][0]["published_display"] == "28.04.2026"
def test_employee_payloads_tolerate_malformed_current_data(db_session):
employee = Employee(
profile_key="staff:broken",
canonical_url="https://www.hse.ru/staff/broken",
full_name="Broken Data",
status="active",
first_seen_at=datetime.now(timezone.utc),
last_seen_at=datetime.now(timezone.utc),
current_data="not-a-dict",
)
display = employee_display_payload(employee)
detail = employee_detail_payload(employee)
assert display["positions"] == []
assert display["email_text"] == ""
assert detail["contacts"]["emails"] == []
assert detail["contacts"]["contact_items"] == []
assert detail["sections"] == []
def test_list_employees_page_filters_sorts_and_paginates(db_session):
db_session.add(
Employee(
profile_key="staff:b",
canonical_url="https://www.hse.ru/staff/b",
full_name="Beta",
status="dismissed",
first_seen_at=datetime.now(timezone.utc),
last_seen_at=datetime.now(timezone.utc),
current_data={"contacts": {"emails": []}},
)
)
db_session.add(
Employee(
profile_key="staff:a",
canonical_url="https://www.hse.ru/staff/a",
full_name="Alpha",
status="active",
first_seen_at=datetime.now(timezone.utc),
last_seen_at=datetime.now(timezone.utc),
current_data={"contacts": {"emails": ["alpha@hse.ru"]}},
)
)
db_session.commit()
page = list_employees_page(db_session, status="active", sort="full_name", direction="asc", limit=10)
assert page["total"] == 1
assert page["employees"][0]["full_name"] == "Alpha"
assert page["limit"] == 50
def test_stats_payload_uses_latest_run_new_count(db_session):
db_session.add(
Employee(
profile_key="staff:a",
canonical_url="https://www.hse.ru/staff/a",
full_name="Alpha",
status="active",
first_seen_at=datetime.now(timezone.utc),
last_seen_at=datetime.now(timezone.utc),
)
)
db_session.add(CrawlRun(source_url="https://miem.hse.ru/persons", status="completed", new_count=3))
db_session.commit()
payload = stats_payload(db_session)
assert payload["total"] == 1
assert payload["active"] == 1
assert payload["new_in_last_run"] == 3
def test_run_payload_calculates_progress():
run = CrawlRun(
source_url="https://miem.hse.ru/persons",
status="running",
found_count=10,
parsed_count=4,
skipped_count=2,
error_count=1,
)
payload = run_payload(run)
assert payload["processed_count"] == 7
assert payload["progress_percent"] == 70.0
assert payload["status_display"] == "Выполняется"
def test_run_detail_payload_groups_changes_and_handles_old_runs(db_session):
old_run = CrawlRun(source_url="https://miem.hse.ru/persons", status="completed")
run = CrawlRun(source_url="https://miem.hse.ru/persons", status="completed", new_count=1)
employee = Employee(
profile_key="staff:new",
canonical_url="https://www.hse.ru/staff/new",
full_name="New Person",
status="active",
first_seen_at=datetime.now(timezone.utc),
last_seen_at=datetime.now(timezone.utc),
)
db_session.add_all([old_run, run, employee])
db_session.commit()
db_session.add(
CrawlRunEmployeeChange(
crawl_run_id=run.id,
employee_id=employee.id,
profile_key=employee.profile_key,
profile_url=employee.canonical_url,
full_name=employee.full_name,
change_type="new",
profile_available=True,
message="added",
)
)
db_session.add(
CrawlError(crawl_run_id=run.id, profile_url=employee.canonical_url, error_type="ValueError", message="bad")
)
db_session.commit()
payload = run_detail_payload(db_session, run)
old_payload = run_detail_payload(db_session, old_run)
assert payload["changes_detail_available"] is True
assert payload["changes"]["new"][0]["full_name"] == "New Person"
assert payload["errors"][0]["error_type"] == "ValueError"
assert old_payload["changes_detail_available"] is False
assert old_payload["changes"]["new"] == []