Files
miem_workers/tests/test_crawler.py

59 lines
1.8 KiB
Python

from datetime import datetime, timezone
from app.models import CrawlRun, Employee
from app.services.crawler import _mark_dismissed, _upsert_employee
def test_mark_dismissed_only_marks_missing_active_employees(db_session):
db_session.add(
Employee(
profile_key="staff:kept",
canonical_url="https://www.hse.ru/staff/kept",
status="active",
first_seen_at=datetime.now(timezone.utc),
last_seen_at=datetime.now(timezone.utc),
)
)
db_session.add(
Employee(
profile_key="staff:gone",
canonical_url="https://www.hse.ru/staff/gone",
status="active",
first_seen_at=datetime.now(timezone.utc),
last_seen_at=datetime.now(timezone.utc),
)
)
db_session.commit()
dismissed = _mark_dismissed(db_session, {"staff:kept"})
assert dismissed == 1
assert db_session.query(Employee).filter_by(profile_key="staff:kept").one().status == "active"
gone = db_session.query(Employee).filter_by(profile_key="staff:gone").one()
assert gone.status == "dismissed"
assert gone.dismissed_at is not None
def test_upsert_employee_increments_new_count_for_new_employee(db_session):
run = CrawlRun(source_url="https://miem.hse.ru/persons", status="running")
db_session.add(run)
db_session.commit()
_upsert_employee(
db_session,
run,
{
"source_url": "https://www.hse.ru/staff/newperson",
"profile_type": "staff",
"profile_id": "newperson",
"full_name": "New Person",
"tabs": [],
"sections": [],
"parser_version": "0.2.0",
"_html": "<html></html>",
},
)
db_session.commit()
assert run.new_count == 1