feat: add detailed employee publications storage and MCP docs
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
import gzip
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from app.models import CrawlRun, CrawlRunEmployeeChange, Employee, EmployeeSnapshot, ParseResourceCache
|
||||
from app.models import CrawlError, CrawlRun, CrawlRunEmployeeChange, Employee, EmployeePublication, EmployeeSnapshot, ParseResourceCache
|
||||
from app.services.crawler import _checksum, _mark_dismissed, _upsert_employee
|
||||
from app.services.resource_cache import ResourceCache
|
||||
|
||||
@@ -191,6 +191,68 @@ def test_upsert_employee_skips_snapshot_when_checksum_is_unchanged(db_session):
|
||||
assert db_session.query(EmployeeSnapshot).count() == 1
|
||||
|
||||
|
||||
def test_upsert_employee_saves_publications_and_reuses_existing_rows(db_session):
|
||||
first_run = CrawlRun(source_url="https://miem.hse.ru/persons", status="running")
|
||||
second_run = CrawlRun(source_url="https://miem.hse.ru/persons", status="running")
|
||||
db_session.add_all([first_run, second_run])
|
||||
db_session.commit()
|
||||
|
||||
parsed = _parsed_employee("published")
|
||||
parsed["sections"] = [
|
||||
{
|
||||
"type": "publications",
|
||||
"publications": [
|
||||
{
|
||||
"id": "888959076",
|
||||
"publication_id": "888959076",
|
||||
"title": "Detailed Publication",
|
||||
"year": 2023,
|
||||
"publication_type": "ARTICLE",
|
||||
"language": "ru",
|
||||
"status": 1,
|
||||
"url": "https://publications.hse.ru/view/888959076",
|
||||
"doi_url": "https://doi.org/10.1/test",
|
||||
"citation_text": "Detailed citation",
|
||||
"annotation": {"ru": "Аннотация"},
|
||||
"description": {"main": "Detailed citation"},
|
||||
"authors": [{"id": "1", "title_ru": "Автор"}],
|
||||
"raw_data": {"id": "888959076", "title": "Detailed Publication"},
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
employee, _ = _upsert_employee(db_session, first_run, parsed)
|
||||
db_session.commit()
|
||||
_upsert_employee(db_session, second_run, _parsed_employee_with_publication("published"))
|
||||
db_session.commit()
|
||||
|
||||
publications = db_session.query(EmployeePublication).filter_by(employee_id=employee.id).all()
|
||||
assert len(publications) == 1
|
||||
assert publications[0].doi_url == "https://doi.org/10.1/test"
|
||||
assert publications[0].authors == [{"id": "1", "title_ru": "Автор"}]
|
||||
|
||||
|
||||
def test_upsert_employee_records_publication_errors_without_failing_employee(monkeypatch, db_session):
|
||||
run = CrawlRun(source_url="https://miem.hse.ru/persons", status="running")
|
||||
db_session.add(run)
|
||||
db_session.commit()
|
||||
|
||||
def broken_sync(*_args, **_kwargs):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
monkeypatch.setattr("app.services.crawler._sync_employee_publications", broken_sync)
|
||||
|
||||
employee, changed = _upsert_employee(db_session, run, _parsed_employee_with_publication("error-safe"))
|
||||
db_session.commit()
|
||||
|
||||
assert changed is True
|
||||
assert employee.full_name == "Same Person"
|
||||
assert db_session.query(Employee).filter_by(profile_key="staff:error-safe").one()
|
||||
error = db_session.query(CrawlError).one()
|
||||
assert "публикации" in error.message.lower()
|
||||
|
||||
|
||||
def test_checksum_changes_when_widget_data_changes():
|
||||
base = _parsed_employee("widgets")
|
||||
changed = _parsed_employee("widgets")
|
||||
@@ -224,3 +286,31 @@ def _parsed_employee(profile_id: str) -> dict:
|
||||
"parser_version": "0.6.0",
|
||||
"_html": "<html></html>",
|
||||
}
|
||||
|
||||
|
||||
def _parsed_employee_with_publication(profile_id: str) -> dict:
|
||||
parsed = _parsed_employee(profile_id)
|
||||
parsed["sections"] = [
|
||||
{
|
||||
"type": "publications",
|
||||
"publications": [
|
||||
{
|
||||
"id": "888959076",
|
||||
"publication_id": "888959076",
|
||||
"title": "Detailed Publication",
|
||||
"year": 2023,
|
||||
"publication_type": "ARTICLE",
|
||||
"language": "ru",
|
||||
"status": 1,
|
||||
"url": "https://publications.hse.ru/view/888959076",
|
||||
"doi_url": "https://doi.org/10.1/test",
|
||||
"citation_text": "Detailed citation",
|
||||
"annotation": {"ru": "Аннотация"},
|
||||
"description": {"main": "Detailed citation"},
|
||||
"authors": [{"id": "1", "title_ru": "Автор"}],
|
||||
"raw_data": {"id": "888959076", "title": "Detailed Publication"},
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
return parsed
|
||||
|
||||
Reference in New Issue
Block a user