Compare commits
2 Commits
fix/remove
...
fix/groupe
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
efa7192e45 | ||
| b27d613143 |
@@ -263,10 +263,10 @@ def _load_widget_publications(session: Session, soup: BeautifulSoup, headers: di
|
||||
return publications
|
||||
|
||||
result = data.get("result") if isinstance(data, dict) else {}
|
||||
items = result.get("items") if isinstance(result, dict) else []
|
||||
if not isinstance(items, list) or not items:
|
||||
items = _extract_publication_items(result)
|
||||
if not items:
|
||||
break
|
||||
publications.extend(_normalize_publication_item(item) for item in items if isinstance(item, dict))
|
||||
publications.extend(_normalize_publication_item(item) for item in items)
|
||||
|
||||
total = int(result.get("total") or 0)
|
||||
if not result.get("more") and len(publications) >= total:
|
||||
@@ -275,6 +275,34 @@ def _load_widget_publications(session: Session, soup: BeautifulSoup, headers: di
|
||||
return _dedupe_publications(publications)
|
||||
|
||||
|
||||
def _extract_publication_items(result: object) -> list[dict]:
|
||||
if not isinstance(result, dict):
|
||||
return []
|
||||
return _flatten_publication_items(result.get("items"))
|
||||
|
||||
|
||||
def _flatten_publication_items(value: object) -> list[dict]:
|
||||
if isinstance(value, list):
|
||||
return [item for item in value if _is_publication_item(item)]
|
||||
if not isinstance(value, dict):
|
||||
return []
|
||||
|
||||
nested_items = value.get("items")
|
||||
if isinstance(nested_items, list):
|
||||
return [item for item in nested_items if _is_publication_item(item)]
|
||||
if isinstance(nested_items, dict):
|
||||
return _flatten_publication_items(nested_items)
|
||||
|
||||
publications = []
|
||||
for child in value.values():
|
||||
publications.extend(_flatten_publication_items(child))
|
||||
return publications
|
||||
|
||||
|
||||
def _is_publication_item(value: object) -> bool:
|
||||
return isinstance(value, dict) and ("id" in value or "title" in value)
|
||||
|
||||
|
||||
def _load_widget_graduation_theses(
|
||||
session: Session,
|
||||
soup: BeautifulSoup,
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
APP_VERSION = "0.4.5"
|
||||
FRONTEND_VERSION = "0.4.5"
|
||||
BACKEND_VERSION = "0.4.5"
|
||||
APP_VERSION = "0.4.6"
|
||||
FRONTEND_VERSION = "0.4.6"
|
||||
BACKEND_VERSION = "0.4.6"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "miem-workers"
|
||||
version = "0.4.5"
|
||||
version = "0.4.6"
|
||||
description = "MIEM employees parser, admin API, and MCP server"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
|
||||
@@ -18,7 +18,7 @@ def test_health_returns_versions():
|
||||
response = client.get("/api/health")
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.json()["backend_version"] == "0.4.5"
|
||||
assert response.json()["backend_version"] == "0.4.6"
|
||||
|
||||
|
||||
def test_mcp_lists_tools_without_auth_and_ignores_auth_header():
|
||||
|
||||
@@ -64,6 +64,47 @@ class FakeSession:
|
||||
)
|
||||
|
||||
|
||||
class GroupedPublicationsSession(FakeSession):
|
||||
def post(self, url, **kwargs):
|
||||
self.posts.append((url, kwargs))
|
||||
return FakeResponse(
|
||||
{
|
||||
"status": "ok",
|
||||
"result": {
|
||||
"more": False,
|
||||
"total": 1,
|
||||
"groupType": 2,
|
||||
"items": {
|
||||
"year": {
|
||||
"header": {"ru": "по году", "en": "by year"},
|
||||
"criteria": {"year": []},
|
||||
"items": {
|
||||
"2011": [
|
||||
{
|
||||
"id": "146366790",
|
||||
"type": "ARTICLE",
|
||||
"title": "Развитие теории самосогласованного поля",
|
||||
"year": 2011,
|
||||
"description": {"short": {"ru": "Журнал физической химии 2011."}},
|
||||
}
|
||||
],
|
||||
"2012": [
|
||||
{
|
||||
"id": "146367323",
|
||||
"type": "ARTICLE",
|
||||
"title": "Self-consistent field theory investigation",
|
||||
"year": 2012,
|
||||
"description": {"short": {"en": "Russian Journal of Physical Chemistry A 2012."}},
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def test_normalize_profile_url_supports_staff_and_org_persons():
|
||||
assert normalize_profile_url("/staff/avsergeev#sci") == "https://www.hse.ru/staff/avsergeev"
|
||||
assert normalize_profile_url("https://www.hse.ru/org/persons/123/") == "https://www.hse.ru/org/persons/123"
|
||||
@@ -117,3 +158,29 @@ def test_enrich_sections_from_hse_widgets_loads_publications_and_vkr():
|
||||
assert theses["theses"][0]["project_url"] == "https://www.hse.ru/edu/vkr/1045750164"
|
||||
assert session.posts[0][0] == "https://publications.hse.ru/api/searchPubs"
|
||||
assert session.gets[0][1]["params"] == {"supervisorId": "803294906"}
|
||||
|
||||
|
||||
def test_enrich_sections_from_hse_widgets_loads_grouped_publications():
|
||||
soup = BeautifulSoup(
|
||||
"""
|
||||
<script src="/n/stat/publications/dist-w/publs.js" data-author="133709486" data-widget-name="AuthorSearch"></script>
|
||||
""",
|
||||
"html.parser",
|
||||
)
|
||||
session = GroupedPublicationsSession()
|
||||
|
||||
sections = enrich_sections_from_hse_widgets(
|
||||
session,
|
||||
soup,
|
||||
"https://www.hse.ru/org/persons/133709486",
|
||||
{"User-Agent": "test"},
|
||||
10,
|
||||
[],
|
||||
)
|
||||
|
||||
publications = next(section for section in sections if section["type"] == "publications")
|
||||
|
||||
assert publications["publications_count"] == 2
|
||||
assert [item["id"] for item in publications["publications"]] == ["146366790", "146367323"]
|
||||
assert publications["publications"][0]["url"] == "https://publications.hse.ru/view/146366790"
|
||||
assert publications["publications"][1]["url"] == "https://publications.hse.ru/view/146367323"
|
||||
|
||||
Reference in New Issue
Block a user