99 lines
3.2 KiB
Python
99 lines
3.2 KiB
Python
from datetime import datetime, timezone
|
|
|
|
from app.models import CrawlRun, Employee
|
|
from app.services.admin_data import employee_display_payload, list_employees_page, run_payload, stats_payload
|
|
|
|
|
|
def test_employee_display_payload_extracts_common_fields(db_session):
|
|
employee = Employee(
|
|
profile_key="staff:person",
|
|
canonical_url="https://www.hse.ru/staff/person",
|
|
full_name="Person Name",
|
|
status="active",
|
|
first_seen_at=datetime.now(timezone.utc),
|
|
last_seen_at=datetime.now(timezone.utc),
|
|
current_data={
|
|
"positions": ["Professor"],
|
|
"hse_start_year": 2024,
|
|
"contacts": {"emails": ["person@hse.ru"], "phones": ["+79990000000"], "address": "Moscow"},
|
|
"sections": [
|
|
{"type": "publications", "publications": [{"title": "Paper"}]},
|
|
{"type": "courses_by_year", "courses": [{"title": "Course"}]},
|
|
],
|
|
},
|
|
)
|
|
|
|
payload = employee_display_payload(employee)
|
|
|
|
assert payload["positions_text"] == "Professor"
|
|
assert payload["email_text"] == "person@hse.ru"
|
|
assert payload["publications_count"] == 1
|
|
assert payload["courses_count"] == 1
|
|
|
|
|
|
def test_list_employees_page_filters_sorts_and_paginates(db_session):
|
|
db_session.add(
|
|
Employee(
|
|
profile_key="staff:b",
|
|
canonical_url="https://www.hse.ru/staff/b",
|
|
full_name="Beta",
|
|
status="dismissed",
|
|
first_seen_at=datetime.now(timezone.utc),
|
|
last_seen_at=datetime.now(timezone.utc),
|
|
current_data={"contacts": {"emails": []}},
|
|
)
|
|
)
|
|
db_session.add(
|
|
Employee(
|
|
profile_key="staff:a",
|
|
canonical_url="https://www.hse.ru/staff/a",
|
|
full_name="Alpha",
|
|
status="active",
|
|
first_seen_at=datetime.now(timezone.utc),
|
|
last_seen_at=datetime.now(timezone.utc),
|
|
current_data={"contacts": {"emails": ["alpha@hse.ru"]}},
|
|
)
|
|
)
|
|
db_session.commit()
|
|
|
|
page = list_employees_page(db_session, status="active", sort="full_name", direction="asc", limit=10)
|
|
|
|
assert page["total"] == 1
|
|
assert page["items"][0]["full_name"] == "Alpha"
|
|
|
|
|
|
def test_stats_payload_uses_latest_run_new_count(db_session):
|
|
db_session.add(
|
|
Employee(
|
|
profile_key="staff:a",
|
|
canonical_url="https://www.hse.ru/staff/a",
|
|
full_name="Alpha",
|
|
status="active",
|
|
first_seen_at=datetime.now(timezone.utc),
|
|
last_seen_at=datetime.now(timezone.utc),
|
|
)
|
|
)
|
|
db_session.add(CrawlRun(source_url="https://miem.hse.ru/persons", status="completed", new_count=3))
|
|
db_session.commit()
|
|
|
|
payload = stats_payload(db_session)
|
|
|
|
assert payload["total"] == 1
|
|
assert payload["active"] == 1
|
|
assert payload["new_in_last_run"] == 3
|
|
|
|
|
|
def test_run_payload_calculates_progress():
|
|
run = CrawlRun(
|
|
source_url="https://miem.hse.ru/persons",
|
|
status="running",
|
|
found_count=10,
|
|
parsed_count=4,
|
|
error_count=1,
|
|
)
|
|
|
|
payload = run_payload(run)
|
|
|
|
assert payload["processed_count"] == 5
|
|
assert payload["progress_percent"] == 50.0
|