fix: separate news from publications and add employee refresh
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from app.parser.profile import enrich_sections_from_hse_widgets, extract_person_tabs
|
||||
from app.parser.profile import enrich_sections_from_hse_widgets, extract_person_tabs, extract_sections
|
||||
from app.parser.profile_url import normalize_profile_url, parse_profile_identity
|
||||
|
||||
|
||||
@@ -184,3 +184,34 @@ def test_enrich_sections_from_hse_widgets_loads_grouped_publications():
|
||||
assert [item["id"] for item in publications["publications"]] == ["146366790", "146367323"]
|
||||
assert publications["publications"][0]["url"] == "https://publications.hse.ru/view/146366790"
|
||||
assert publications["publications"][1]["url"] == "https://publications.hse.ru/view/146367323"
|
||||
|
||||
|
||||
def test_news_heading_with_publications_word_does_not_absorb_widget_publications():
|
||||
soup = BeautifulSoup(
|
||||
"""
|
||||
<h2>Статья профессора МИЭМ вошла в число самых популярных публикаций на портале SpringerLink</h2>
|
||||
<div class="post__text">
|
||||
<p>Первоначально статья профессора вышла в российском журнале.</p>
|
||||
</div>
|
||||
<script src="/n/stat/publications/dist-w/publs.js" data-author="133709486" data-widget-name="AuthorSearch"></script>
|
||||
""",
|
||||
"html.parser",
|
||||
)
|
||||
session = FakeSession()
|
||||
|
||||
sections = extract_sections(soup, "https://www.hse.ru/org/persons/133709486")
|
||||
sections = enrich_sections_from_hse_widgets(
|
||||
session,
|
||||
soup,
|
||||
"https://www.hse.ru/org/persons/133709486",
|
||||
{"User-Agent": "test"},
|
||||
10,
|
||||
sections,
|
||||
)
|
||||
|
||||
assert sections[0]["type"] == "paragraphs"
|
||||
assert sections[0]["title"].startswith("Статья профессора")
|
||||
publications = [section for section in sections if section["type"] == "publications"]
|
||||
assert len(publications) == 1
|
||||
assert publications[0]["title"] == "Публикации и исследования"
|
||||
assert publications[0]["publications_count"] == 1
|
||||
|
||||
Reference in New Issue
Block a user