Files
miem_workers/tests/test_parser.py

29 lines
1.2 KiB
Python

from bs4 import BeautifulSoup
from app.parser.profile import extract_person_tabs
from app.parser.profile_url import normalize_profile_url, parse_profile_identity
def test_normalize_profile_url_supports_staff_and_org_persons():
assert normalize_profile_url("/staff/avsergeev#sci") == "https://www.hse.ru/staff/avsergeev"
assert normalize_profile_url("https://www.hse.ru/org/persons/123/") == "https://www.hse.ru/org/persons/123"
assert parse_profile_identity("https://www.hse.ru/staff/avsergeev") == ("staff", "avsergeev")
def test_extract_person_tabs_prefers_person_menu_addition():
soup = BeautifulSoup(
"""
<div class="person-menu is-desktop small person-menu-addition">
<a href="#main">Домашняя страница</a>
<a href="#sci" data-index="1">Публикации</a>
</div>
<a href="/org/persons/999">Other person</a>
""",
"html.parser",
)
tabs = extract_person_tabs(soup, "https://www.hse.ru/staff/avsergeev")
assert [tab["title"] for tab in tabs] == ["Домашняя страница", "Публикации"]
assert tabs[1]["href"] == "https://www.hse.ru/staff/avsergeev#sci"