from bs4 import BeautifulSoup from app.parser.profile import enrich_sections_from_hse_widgets, extract_person_tabs from app.parser.profile_url import normalize_profile_url, parse_profile_identity class FakeResponse: def __init__(self, payload): self.payload = payload def raise_for_status(self): return None def json(self): return self.payload class FakeSession: def __init__(self): self.posts = [] self.gets = [] def post(self, url, **kwargs): self.posts.append((url, kwargs)) return FakeResponse( { "status": "ok", "result": { "more": False, "total": 1, "items": [ { "id": "888959076", "type": "ARTICLE", "title": "Дублирование пакетов", "year": 2023, "description": {"short": {"ru": "Информационные процессы. 2023."}}, } ], }, } ) def get(self, url, **kwargs): self.gets.append((url, kwargs)) return FakeResponse( { "lang": "ru", "success": True, "data": [ { "id": 1045750164, "year": 2025, "level": "Бакалавриат", "title": "Аппаратно-программный комплекс защиты сети", "rating": 8, "student": "Лесняк Владислав Евгеньевич", "learnProgram": {"title": "Информатика и вычислительная техника", "url": "https://hse.ru/ba/isct/"}, "orgUnit": {"title": "МИЭМ", "url": "https://www.hse.ru/org/url/59315150"}, "supervisors": [{"url": "https://www.hse.ru/org/persons/803294906", "name": "Борисов Сергей Петрович"}], } ], } ) class GroupedPublicationsSession(FakeSession): def post(self, url, **kwargs): self.posts.append((url, kwargs)) return FakeResponse( { "status": "ok", "result": { "more": False, "total": 1, "groupType": 2, "items": { "year": { "header": {"ru": "по году", "en": "by year"}, "criteria": {"year": []}, "items": { "2011": [ { "id": "146366790", "type": "ARTICLE", "title": "Развитие теории самосогласованного поля", "year": 2011, "description": {"short": {"ru": "Журнал физической химии 2011."}}, } ], "2012": [ { "id": "146367323", "type": "ARTICLE", "title": "Self-consistent field theory investigation", "year": 2012, "description": {"short": {"en": "Russian Journal of Physical Chemistry A 2012."}}, } ], }, } }, }, } ) def test_normalize_profile_url_supports_staff_and_org_persons(): assert normalize_profile_url("/staff/avsergeev#sci") == "https://www.hse.ru/staff/avsergeev" assert normalize_profile_url("https://www.hse.ru/org/persons/123/") == "https://www.hse.ru/org/persons/123" assert parse_profile_identity("https://www.hse.ru/staff/avsergeev") == ("staff", "avsergeev") def test_extract_person_tabs_prefers_person_menu_addition(): soup = BeautifulSoup( """
Домашняя страница Публикации
Other person """, "html.parser", ) tabs = extract_person_tabs(soup, "https://www.hse.ru/staff/avsergeev") assert [tab["title"] for tab in tabs] == ["Домашняя страница", "Публикации"] assert tabs[1]["href"] == "https://www.hse.ru/staff/avsergeev#sci" def test_enrich_sections_from_hse_widgets_loads_publications_and_vkr(): soup = BeautifulSoup( """ """, "html.parser", ) session = FakeSession() sections = enrich_sections_from_hse_widgets( session, soup, "https://www.hse.ru/org/persons/803294906", {"User-Agent": "test"}, 10, [], ) publications = next(section for section in sections if section["type"] == "publications") theses = next(section for section in sections if section["type"] == "graduation_theses") assert publications["publications_count"] == 1 assert publications["publications"][0]["url"] == "https://publications.hse.ru/view/888959076" assert theses["theses_count"] == 1 assert theses["theses"][0]["student"] == "Лесняк Владислав Евгеньевич" assert theses["theses"][0]["project_url"] == "https://www.hse.ru/edu/vkr/1045750164" assert session.posts[0][0] == "https://publications.hse.ru/api/searchPubs" assert session.gets[0][1]["params"] == {"supervisorId": "803294906"} def test_enrich_sections_from_hse_widgets_loads_grouped_publications(): soup = BeautifulSoup( """ """, "html.parser", ) session = GroupedPublicationsSession() sections = enrich_sections_from_hse_widgets( session, soup, "https://www.hse.ru/org/persons/133709486", {"User-Agent": "test"}, 10, [], ) publications = next(section for section in sections if section["type"] == "publications") assert publications["publications_count"] == 2 assert [item["id"] for item in publications["publications"]] == ["146366790", "146367323"] assert publications["publications"][0]["url"] == "https://publications.hse.ru/view/146366790" assert publications["publications"][1]["url"] == "https://publications.hse.ru/view/146367323"