feat: add employee news links parsing and storage
This commit is contained in:
@@ -1,7 +1,16 @@
|
||||
import gzip
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from app.models import CrawlError, CrawlRun, CrawlRunEmployeeChange, Employee, EmployeePublication, EmployeeSnapshot, ParseResourceCache
|
||||
from app.models import (
|
||||
CrawlError,
|
||||
CrawlRun,
|
||||
CrawlRunEmployeeChange,
|
||||
Employee,
|
||||
EmployeeNewsLink,
|
||||
EmployeePublication,
|
||||
EmployeeSnapshot,
|
||||
ParseResourceCache,
|
||||
)
|
||||
from app.services.crawler import _checksum, _mark_dismissed, _upsert_employee
|
||||
from app.services.resource_cache import ResourceCache
|
||||
|
||||
@@ -253,6 +262,44 @@ def test_upsert_employee_records_publication_errors_without_failing_employee(mon
|
||||
assert "публикации" in error.message.lower()
|
||||
|
||||
|
||||
def test_upsert_employee_saves_news_links_and_reuses_existing_rows(db_session):
|
||||
first_run = CrawlRun(source_url="https://miem.hse.ru/persons", status="running")
|
||||
second_run = CrawlRun(source_url="https://miem.hse.ru/persons", status="running")
|
||||
db_session.add_all([first_run, second_run])
|
||||
db_session.commit()
|
||||
|
||||
employee, _ = _upsert_employee(db_session, first_run, _parsed_employee_with_news("news-person"))
|
||||
db_session.commit()
|
||||
_upsert_employee(db_session, second_run, _parsed_employee_with_news("news-person"))
|
||||
db_session.commit()
|
||||
|
||||
news_links = db_session.query(EmployeeNewsLink).filter_by(employee_id=employee.id).all()
|
||||
assert len(news_links) == 1
|
||||
assert news_links[0].title == "News Title"
|
||||
assert news_links[0].url == "https://www.hse.ru/news/1.html"
|
||||
assert news_links[0].published_year == 2026
|
||||
|
||||
|
||||
def test_upsert_employee_records_news_errors_without_failing_employee(monkeypatch, db_session):
|
||||
run = CrawlRun(source_url="https://miem.hse.ru/persons", status="running")
|
||||
db_session.add(run)
|
||||
db_session.commit()
|
||||
|
||||
def broken_sync(*_args, **_kwargs):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
monkeypatch.setattr("app.services.crawler._sync_employee_news_links", broken_sync)
|
||||
|
||||
employee, changed = _upsert_employee(db_session, run, _parsed_employee_with_news("news-error-safe"))
|
||||
db_session.commit()
|
||||
|
||||
assert changed is True
|
||||
assert employee.full_name == "Same Person"
|
||||
assert db_session.query(Employee).filter_by(profile_key="staff:news-error-safe").one()
|
||||
error = db_session.query(CrawlError).one()
|
||||
assert "новости" in error.message.lower()
|
||||
|
||||
|
||||
def test_checksum_changes_when_widget_data_changes():
|
||||
base = _parsed_employee("widgets")
|
||||
changed = _parsed_employee("widgets")
|
||||
@@ -314,3 +361,23 @@ def _parsed_employee_with_publication(profile_id: str) -> dict:
|
||||
}
|
||||
]
|
||||
return parsed
|
||||
|
||||
|
||||
def _parsed_employee_with_news(profile_id: str) -> dict:
|
||||
parsed = _parsed_employee(profile_id)
|
||||
parsed["sections"] = [
|
||||
{
|
||||
"type": "news",
|
||||
"news_links": [
|
||||
{
|
||||
"title": "News Title",
|
||||
"url": "https://www.hse.ru/news/1.html",
|
||||
"summary": "News summary",
|
||||
"published_at": "2026-04-28T00:00:00+00:00",
|
||||
"published_year": 2026,
|
||||
"raw_data": {"title": "News Title", "url": "https://www.hse.ru/news/1.html"},
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
return parsed
|
||||
|
||||
Reference in New Issue
Block a user