feat: add employee news links parsing and storage

This commit is contained in:
Anton
2026-05-22 18:50:25 +03:00
parent 680ac6e980
commit 4d2a071ec0
19 changed files with 636 additions and 16 deletions

View File

@@ -232,3 +232,45 @@ def test_news_heading_with_publications_word_does_not_absorb_widget_publications
assert len(publications) == 1
assert publications[0]["title"] == "Публикации и исследования"
assert publications[0]["publications_count"] == 1
def test_extract_sections_parses_employee_news_links():
soup = BeautifulSoup(
"""
<div class="b-person-data posts hidden printable" data-tab="press_links_news" tab-node="press_links_news">
<div class="post f8">
<div class="post__extra">
<div class="post-meta">
<div class="post-meta__date">
<div class="post-meta__day">28</div>
<div class="post-meta__month">апр.</div>
<div class="post-meta__year">2026</div>
</div>
</div>
</div>
<div class="post__content">
<h2 class="first_child"><a class="link" href="/news/edu/1153850518.html">Как финал ВсОШ формирует кадры</a></h2>
<div class="post__text"><p class="with-indent">Краткое описание новости.</p></div>
</div>
</div>
<div class="post f8">
<div class="post__content">
<h2><a href="https://miem.hse.ru/news/1123589375.html">Партнер магистратуры</a></h2>
</div>
</div>
</div>
""",
"html.parser",
)
sections = extract_sections(soup, "https://www.hse.ru/staff/avsergeev")
assert len(sections) == 1
news = sections[0]
assert news["type"] == "news"
assert news["news_count"] == 2
assert news["news_links"][0]["title"] == "Как финал ВсОШ формирует кадры"
assert news["news_links"][0]["url"] == "https://www.hse.ru/news/edu/1153850518.html"
assert news["news_links"][0]["summary"] == "Краткое описание новости."
assert news["news_links"][0]["published_at"] == "2026-04-28T00:00:00+00:00"
assert news["news_links"][0]["published_year"] == 2026