from datetime import datetime, timezone from app.models import CrawlError, CrawlRun, CrawlRunEmployeeChange, Employee from app.services.admin_data import ( employee_detail_payload, employee_display_payload, format_admin_datetime, list_employees_page, run_detail_payload, run_payload, stats_payload, ) def test_format_admin_datetime_handles_datetime_string_and_none(): value = datetime(2026, 4, 28, 17, 13, 34, tzinfo=timezone.utc) assert format_admin_datetime(value) == "28.04.2026 20:13" assert format_admin_datetime("2026-04-28T17:13:34.448605+00:00") == "28.04.2026 20:13" assert format_admin_datetime(None) == "Не указано" def test_employee_display_payload_extracts_common_fields(db_session): employee = Employee( profile_key="staff:person", canonical_url="https://www.hse.ru/staff/person", full_name="Person Name", status="active", first_seen_at=datetime.now(timezone.utc), last_seen_at=datetime.now(timezone.utc), current_data={ "positions": ["Professor"], "hse_start_year": 2024, "contacts": {"emails": ["person@hse.ru"], "phones": ["+79990000000"], "address": "Moscow"}, "sections": [ {"type": "publications", "publications": [{"title": "Paper"}]}, {"type": "courses_by_year", "courses": [{"title": "Course"}]}, ], }, ) payload = employee_display_payload(employee) assert payload["positions_text"] == "Professor" assert payload["status_display"] == "Работает" assert payload["email_text"] == "person@hse.ru" assert payload["publications_count"] == 1 assert payload["courses_count"] == 1 assert payload["first_seen_display"] != "Не указано" def test_employee_detail_payload_normalizes_human_readable_sections(db_session): employee = Employee( profile_key="staff:person", profile_type="staff", profile_id="person", canonical_url="https://www.hse.ru/staff/person", full_name="Person Name", status="active", first_seen_at=datetime.now(timezone.utc), last_seen_at=datetime.now(timezone.utc), current_data={ "positions": ["Professor"], "hse_start_year": 2024, "contacts": { "emails": ["person@hse.ru"], "phones": ["+79990000000"], "address": "Moscow", "items": [{"raw": "consultation hours"}], }, "external_ids": [{"system": "ORCID", "value": "0000", "url": "https://orcid.org/0000"}], "sections": [ { "title": "Education", "type": "year_blocks", "year_entries": [{"year": 2020, "text": "Master degree"}], }, { "title": "Publications", "type": "publications", "publications": [{"title": "Paper", "text": "Paper details", "url": "https://example.test/paper"}], }, { "title": "Courses", "type": "courses_by_year", "academic_year": "2025/2026", "courses": [{"title": "Course", "url": "https://example.test/course"}], }, { "title": "ВКР", "type": "graduation_theses", "theses_count": 1, "theses": [ { "student": "Student Name", "title": "Thesis title", "defense_year": 2025, "project_url": "https://www.hse.ru/edu/vkr/1", } ], }, { "title": "Fallback", "type": "generic", "raw_text": "Fallback text", }, ], }, ) payload = employee_detail_payload(employee) assert payload["contacts"]["emails"] == ["person@hse.ru"] assert payload["contacts"]["contact_items"] == ["consultation hours"] assert payload["external_ids"][0]["system"] == "ORCID" assert payload["sections"][0]["year_entries"][0]["text"] == "Master degree" assert payload["sections"][1]["publications"][0]["title"] == "Paper" assert payload["sections"][2]["courses"][0]["title"] == "Course" assert payload["sections"][3]["theses"][0]["student"] == "Student Name" assert payload["sections"][4]["paragraphs"] == ["Fallback text"] def test_employee_payloads_tolerate_malformed_current_data(db_session): employee = Employee( profile_key="staff:broken", canonical_url="https://www.hse.ru/staff/broken", full_name="Broken Data", status="active", first_seen_at=datetime.now(timezone.utc), last_seen_at=datetime.now(timezone.utc), current_data="not-a-dict", ) display = employee_display_payload(employee) detail = employee_detail_payload(employee) assert display["positions"] == [] assert display["email_text"] == "" assert detail["contacts"]["emails"] == [] assert detail["contacts"]["contact_items"] == [] assert detail["sections"] == [] def test_list_employees_page_filters_sorts_and_paginates(db_session): db_session.add( Employee( profile_key="staff:b", canonical_url="https://www.hse.ru/staff/b", full_name="Beta", status="dismissed", first_seen_at=datetime.now(timezone.utc), last_seen_at=datetime.now(timezone.utc), current_data={"contacts": {"emails": []}}, ) ) db_session.add( Employee( profile_key="staff:a", canonical_url="https://www.hse.ru/staff/a", full_name="Alpha", status="active", first_seen_at=datetime.now(timezone.utc), last_seen_at=datetime.now(timezone.utc), current_data={"contacts": {"emails": ["alpha@hse.ru"]}}, ) ) db_session.commit() page = list_employees_page(db_session, status="active", sort="full_name", direction="asc", limit=10) assert page["total"] == 1 assert page["employees"][0]["full_name"] == "Alpha" assert page["limit"] == 50 def test_stats_payload_uses_latest_run_new_count(db_session): db_session.add( Employee( profile_key="staff:a", canonical_url="https://www.hse.ru/staff/a", full_name="Alpha", status="active", first_seen_at=datetime.now(timezone.utc), last_seen_at=datetime.now(timezone.utc), ) ) db_session.add(CrawlRun(source_url="https://miem.hse.ru/persons", status="completed", new_count=3)) db_session.commit() payload = stats_payload(db_session) assert payload["total"] == 1 assert payload["active"] == 1 assert payload["new_in_last_run"] == 3 def test_run_payload_calculates_progress(): run = CrawlRun( source_url="https://miem.hse.ru/persons", status="running", found_count=10, parsed_count=4, skipped_count=2, error_count=1, ) payload = run_payload(run) assert payload["processed_count"] == 7 assert payload["progress_percent"] == 70.0 assert payload["status_display"] == "Выполняется" def test_run_detail_payload_groups_changes_and_handles_old_runs(db_session): old_run = CrawlRun(source_url="https://miem.hse.ru/persons", status="completed") run = CrawlRun(source_url="https://miem.hse.ru/persons", status="completed", new_count=1) employee = Employee( profile_key="staff:new", canonical_url="https://www.hse.ru/staff/new", full_name="New Person", status="active", first_seen_at=datetime.now(timezone.utc), last_seen_at=datetime.now(timezone.utc), ) db_session.add_all([old_run, run, employee]) db_session.commit() db_session.add( CrawlRunEmployeeChange( crawl_run_id=run.id, employee_id=employee.id, profile_key=employee.profile_key, profile_url=employee.canonical_url, full_name=employee.full_name, change_type="new", profile_available=True, message="added", ) ) db_session.add( CrawlError(crawl_run_id=run.id, profile_url=employee.canonical_url, error_type="ValueError", message="bad") ) db_session.commit() payload = run_detail_payload(db_session, run) old_payload = run_detail_payload(db_session, old_run) assert payload["changes_detail_available"] is True assert payload["changes"]["new"][0]["full_name"] == "New Person" assert payload["errors"][0]["error_type"] == "ValueError" assert old_payload["changes_detail_available"] is False assert old_payload["changes"]["new"] == []