from datetime import datetime, timezone from app.models import CrawlRun, Employee from app.services.crawler import _mark_dismissed, _upsert_employee def test_mark_dismissed_only_marks_missing_active_employees(db_session): db_session.add( Employee( profile_key="staff:kept", canonical_url="https://www.hse.ru/staff/kept", status="active", first_seen_at=datetime.now(timezone.utc), last_seen_at=datetime.now(timezone.utc), ) ) db_session.add( Employee( profile_key="staff:gone", canonical_url="https://www.hse.ru/staff/gone", status="active", first_seen_at=datetime.now(timezone.utc), last_seen_at=datetime.now(timezone.utc), ) ) db_session.commit() dismissed = _mark_dismissed(db_session, {"staff:kept"}) assert dismissed == 1 assert db_session.query(Employee).filter_by(profile_key="staff:kept").one().status == "active" gone = db_session.query(Employee).filter_by(profile_key="staff:gone").one() assert gone.status == "dismissed" assert gone.dismissed_at is not None def test_upsert_employee_increments_new_count_for_new_employee(db_session): run = CrawlRun(source_url="https://miem.hse.ru/persons", status="running") db_session.add(run) db_session.commit() _upsert_employee( db_session, run, { "source_url": "https://www.hse.ru/staff/newperson", "profile_type": "staff", "profile_id": "newperson", "full_name": "New Person", "tabs": [], "sections": [], "parser_version": "0.2.0", "_html": "", }, ) db_session.commit() assert run.new_count == 1