Compare commits

...

7 Commits

10 changed files with 638 additions and 34 deletions

View File

@@ -110,4 +110,4 @@ docker compose exec postgres pg_dump -U miem miem_workers > backup.sql
docker compose down
```
Версия сервиса: `0.2.0`. Админка всегда показывает версии backend и frontend в footer.
Версия сервиса: `0.2.3`. Админка всегда показывает версии backend и frontend в footer.

View File

@@ -8,7 +8,7 @@ from app.config import Settings, get_settings
from app.db import SessionLocal, get_db
from app.models import CrawlError, CrawlRun, Employee
from app.security import SESSION_COOKIE, require_admin, sign_session, verify_admin
from app.services.admin_data import list_employees_page, run_payload, stats_payload
from app.services.admin_data import employee_detail_payload, list_employees_page, run_payload, stats_payload
from app.services.crawl_control import get_running_run, run_crawl_if_idle
from app.version import BACKEND_VERSION, FRONTEND_VERSION
@@ -134,7 +134,11 @@ def employee_detail(
if not employee:
return RedirectResponse("/admin/employees", status_code=303)
snapshots = sorted(employee.snapshots, key=lambda item: item.captured_at, reverse=True)[:20]
return _render(request, "employee_detail.html", {"employee": employee, "snapshots": snapshots})
return _render(
request,
"employee_detail.html",
{"employee": employee, "employee_view": employee_detail_payload(employee), "snapshots": snapshots},
)
@router.get("/runs", response_class=HTMLResponse)
@@ -190,7 +194,7 @@ def _render(request: Request, template: str, context: dict, status_code: int = 2
"frontend_version": FRONTEND_VERSION,
**context,
}
return templates.TemplateResponse(template, payload, status_code=status_code)
return templates.TemplateResponse(request, template, payload, status_code=status_code)
def _parse_date(value: str | None):

View File

@@ -20,18 +20,19 @@ EMPLOYEE_SORTS = {
def employee_display_payload(employee: Employee) -> dict[str, Any]:
data = employee.current_data or {}
contacts = data.get("contacts") or {}
sections = data.get("sections") or []
emails = contacts.get("emails") or []
phones = contacts.get("phones") or []
data = _as_dict(employee.current_data)
contacts = _as_dict(data.get("contacts"))
sections = _as_list(data.get("sections"))
positions = _clean_list(data.get("positions"))
emails = _clean_list(contacts.get("emails"))
phones = _clean_list(contacts.get("phones"))
return {
"id": employee.id,
"full_name": employee.full_name,
"status": employee.status,
"canonical_url": employee.canonical_url,
"positions": data.get("positions") or [],
"positions_text": "; ".join(data.get("positions") or []),
"positions": positions,
"positions_text": "; ".join(positions),
"hse_start_year": data.get("hse_start_year"),
"emails": emails,
"email_text": ", ".join(emails),
@@ -46,6 +47,25 @@ def employee_display_payload(employee: Employee) -> dict[str, Any]:
}
def employee_detail_payload(employee: Employee) -> dict[str, Any]:
data = _as_dict(employee.current_data)
contacts = _as_dict(data.get("contacts"))
return {
**employee_display_payload(employee),
"profile_type": employee.profile_type or data.get("profile_type"),
"profile_id": employee.profile_id or data.get("profile_id"),
"parser_version": employee.parser_version or data.get("parser_version"),
"contacts": {
"emails": _clean_list(contacts.get("emails")),
"phones": _clean_list(contacts.get("phones")),
"address": contacts.get("address"),
"items": _normalize_contact_items(contacts.get("items")),
},
"external_ids": _normalize_external_ids(data.get("external_ids")),
"sections": [_normalize_section(section) for section in _as_list(data.get("sections"))],
}
def build_employee_query(
*,
status: str | None = None,
@@ -157,3 +177,156 @@ def _count_section_items(sections: list[dict[str, Any]], section_type: str) -> i
elif section_type == "courses_by_year":
total += len(section.get("courses") or [])
return total
def _clean_list(values: Any) -> list[str]:
if values is None:
return []
if not isinstance(values, list):
values = [values]
return [str(value).strip() for value in values if str(value or "").strip()]
def _as_dict(value: Any) -> dict[str, Any]:
return value if isinstance(value, dict) else {}
def _as_list(value: Any) -> list[Any]:
if value is None:
return []
return value if isinstance(value, list) else [value]
def _normalize_contact_items(items: Any) -> list[str]:
normalized = []
if not isinstance(items, list):
return normalized
for item in items:
if isinstance(item, dict):
value = item.get("raw") or item.get("value") or item.get("text")
else:
value = item
value = str(value or "").strip()
if value:
normalized.append(value)
return normalized
def _normalize_external_ids(items: Any) -> list[dict[str, str | None]]:
normalized = []
if not isinstance(items, list):
return normalized
for item in items:
if not isinstance(item, dict):
continue
system = str(item.get("system") or "").strip()
value = str(item.get("value") or "").strip()
url = str(item.get("url") or "").strip()
if system or value or url:
normalized.append({"system": system or "ID", "value": value or url, "url": url or None})
return normalized
def _normalize_section(section: Any) -> dict[str, Any]:
if not isinstance(section, dict):
return {"title": "Раздел", "type": "generic", "paragraphs": [str(section)], "items": [], "links": []}
section_type = section.get("type") or "generic"
paragraphs = _clean_list(section.get("paragraphs"))
items = _clean_list(section.get("items"))
raw_text = str(section.get("raw_text") or "").strip()
if not paragraphs and not items and raw_text:
paragraphs = [raw_text]
return {
"title": section.get("title") or "Раздел",
"type": section_type,
"raw_text": raw_text,
"paragraphs": paragraphs,
"items": items,
"links": _normalize_links(section.get("links")),
"year_entries": _normalize_year_entries(section.get("year_entries")),
"publications": _normalize_publications(section.get("publications")),
"publications_count": section.get("publications_count"),
"academic_year": section.get("academic_year"),
"courses": _normalize_courses(section.get("courses")),
"table": _normalize_table(section.get("table")),
}
def _normalize_links(items: Any) -> list[dict[str, str | None]]:
normalized = []
if not isinstance(items, list):
return normalized
for item in items:
if not isinstance(item, dict):
continue
text = str(item.get("text") or item.get("url") or "").strip()
url = str(item.get("url") or "").strip()
if text and url:
normalized.append({"text": text, "url": url})
return normalized
def _normalize_year_entries(items: Any) -> list[dict[str, Any]]:
normalized = []
if not isinstance(items, list):
return normalized
for item in items:
if not isinstance(item, dict):
continue
text = str(item.get("text") or "").strip()
if text:
normalized.append({"year": item.get("year"), "text": text, "links": _normalize_links(item.get("links"))})
return normalized
def _normalize_publications(items: Any) -> list[dict[str, str | None]]:
normalized = []
if not isinstance(items, list):
return normalized
for item in items:
if not isinstance(item, dict):
text = str(item or "").strip()
if text:
normalized.append({"title": text, "text": text, "url": None})
continue
title = str(item.get("title") or "").strip()
text = str(item.get("text") or title).strip()
url = str(item.get("url") or "").strip()
if title or text:
normalized.append({"title": title or text, "text": text or title, "url": url or None})
return normalized
def _normalize_courses(items: Any) -> list[dict[str, str | None]]:
normalized = []
if not isinstance(items, list):
return normalized
for item in items:
if not isinstance(item, dict):
title = str(item or "").strip()
if title:
normalized.append({"title": title, "url": None})
continue
title = str(item.get("title") or "").strip()
url = str(item.get("url") or "").strip()
if title or url:
normalized.append({"title": title or url, "url": url or None})
return normalized
def _normalize_table(table: Any) -> dict[str, Any] | None:
if not isinstance(table, dict):
return None
headers = _clean_list(table.get("headers"))
rows = []
for row in table.get("rows") or []:
if not isinstance(row, dict):
continue
cells = _clean_list(row.get("cells"))
if cells:
rows.append({"cells": cells, "link_url": row.get("link_url")})
if not headers and not rows:
return None
return {"headers": headers, "rows": rows}

View File

@@ -152,6 +152,165 @@
white-space: pre-wrap;
}
.employee-card {
display: grid;
gap: 18px;
}
.employee-card__header {
display: flex;
align-items: flex-start;
justify-content: space-between;
gap: 18px;
padding: 22px;
background: #ffffff;
border: 1px solid #d9dee7;
border-radius: 8px;
}
.employee-card__identity {
display: grid;
gap: 10px;
}
.employee-card__title {
margin: 0;
font-size: 24px;
}
.employee-card__section {
padding: 20px;
background: #ffffff;
border: 1px solid #d9dee7;
border-radius: 8px;
}
.employee-card__meta {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(240px, 1fr));
gap: 14px;
margin: 0;
}
.employee-card__meta-item {
min-width: 0;
}
.employee-card__meta-item--wide {
grid-column: 1 / -1;
}
.employee-card__meta-label {
margin-bottom: 5px;
color: #6b7280;
font-size: 12px;
font-weight: 700;
text-transform: uppercase;
}
.employee-card__meta-value {
margin: 0;
color: #1f2937;
line-height: 1.45;
}
.employee-card__list {
display: grid;
gap: 8px;
margin: 0;
padding-left: 18px;
}
.employee-card__list-item {
line-height: 1.45;
}
.employee-card__sections {
display: grid;
gap: 14px;
}
.employee-section {
padding: 16px;
background: #f9fafb;
border: 1px solid #e5e7eb;
border-radius: 8px;
}
.employee-section__header {
display: flex;
align-items: flex-start;
justify-content: space-between;
gap: 12px;
margin-bottom: 12px;
}
.employee-section__title {
margin: 0;
font-size: 17px;
}
.employee-section__type {
flex: 0 0 auto;
padding: 3px 8px;
color: #475569;
background: #e2e8f0;
border-radius: 999px;
font-size: 12px;
}
.employee-section__note {
margin: 0 0 10px;
color: #4b5563;
font-weight: 700;
}
.employee-section__text {
margin: 0 0 10px;
line-height: 1.55;
}
.employee-section__table-wrap {
overflow-x: auto;
}
.employee-section__table {
width: 100%;
border-collapse: collapse;
background: #ffffff;
}
.employee-section__head,
.employee-section__cell {
padding: 10px;
border-bottom: 1px solid #e5e7eb;
text-align: left;
vertical-align: top;
}
.employee-section__head {
color: #374151;
background: #f3f4f6;
font-size: 13px;
}
.employee-section__links {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-top: 12px;
}
.employee-section__link {
padding: 5px 9px;
color: #0f766e;
background: #ccfbf1;
border-radius: 999px;
font-size: 12px;
font-weight: 700;
text-decoration: none;
}
.stats-strip {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
@@ -399,7 +558,8 @@
}
.progress-panel__header,
.directory__header {
.directory__header,
.employee-card__header {
align-items: stretch;
flex-direction: column;
}

View File

@@ -1,19 +1,192 @@
{% extends "base.html" %}
{% block title %}{{ employee.full_name }} · MIEM Employees{% endblock %}
{% block title %}{{ employee_view.full_name }} · MIEM Employees{% endblock %}
{% block content %}
<section class="panel">
<h2 class="panel__title">{{ employee.full_name or employee.profile_key }}</h2>
<p><span class="badge {% if employee.status == "dismissed" %}badge--dismissed{% endif %}">{{ employee.status }}</span></p>
<p><a class="admin__link" href="{{ employee.canonical_url }}">{{ employee.canonical_url }}</a></p>
<h3>Tabs</h3>
<ul>
{% for tab in employee.tabs %}
<li><a class="admin__link" href="{{ tab.href }}">{{ tab.title }}</a></li>
{% endfor %}
</ul>
<h3>Current data</h3>
<pre class="code">{{ employee.current_data | tojson(indent=2) }}</pre>
<section class="employee-card">
<div class="employee-card__header">
<div class="employee-card__identity">
<h2 class="employee-card__title">{{ employee_view.full_name or employee.profile_key }}</h2>
<span class="badge {% if employee_view.status == "dismissed" %}badge--dismissed{% endif %}">{{ employee_view.status }}</span>
</div>
<a class="admin__link" href="{{ employee_view.canonical_url }}">{{ employee_view.canonical_url }}</a>
</div>
<section class="employee-card__section">
<h3 class="employee-section__title">Основная информация</h3>
<dl class="employee-card__meta">
<div class="employee-card__meta-item">
<dt class="employee-card__meta-label">Должности</dt>
<dd class="employee-card__meta-value">
{% if employee_view.positions %}
<ul class="employee-card__list">
{% for position in employee_view.positions %}
<li class="employee-card__list-item">{{ position }}</li>
{% endfor %}
</ul>
{% else %}
Не указано
{% endif %}
</dd>
</div>
<div class="employee-card__meta-item"><dt class="employee-card__meta-label">Год начала работы в ВШЭ</dt><dd class="employee-card__meta-value">{{ employee_view.hse_start_year or "Не указано" }}</dd></div>
<div class="employee-card__meta-item"><dt class="employee-card__meta-label">Profile type</dt><dd class="employee-card__meta-value">{{ employee_view.profile_type or "Не указано" }}</dd></div>
<div class="employee-card__meta-item"><dt class="employee-card__meta-label">Profile ID</dt><dd class="employee-card__meta-value">{{ employee_view.profile_id or "Не указано" }}</dd></div>
<div class="employee-card__meta-item"><dt class="employee-card__meta-label">First seen</dt><dd class="employee-card__meta-value">{{ employee_view.first_seen_at or "Не указано" }}</dd></div>
<div class="employee-card__meta-item"><dt class="employee-card__meta-label">Last seen</dt><dd class="employee-card__meta-value">{{ employee_view.last_seen_at or "Не указано" }}</dd></div>
<div class="employee-card__meta-item"><dt class="employee-card__meta-label">Dismissed at</dt><dd class="employee-card__meta-value">{{ employee_view.dismissed_at or "Не указано" }}</dd></div>
<div class="employee-card__meta-item"><dt class="employee-card__meta-label">Parser version</dt><dd class="employee-card__meta-value">{{ employee_view.parser_version or "Не указано" }}</dd></div>
</dl>
</section>
<section class="employee-card__section">
<h3 class="employee-section__title">Контакты</h3>
<dl class="employee-card__meta">
<div class="employee-card__meta-item">
<dt class="employee-card__meta-label">Email</dt>
<dd class="employee-card__meta-value">
{% if employee_view.contacts.emails %}
<ul class="employee-card__list">
{% for email in employee_view.contacts.emails %}
<li class="employee-card__list-item"><a class="admin__link" href="mailto:{{ email }}">{{ email }}</a></li>
{% endfor %}
</ul>
{% else %}
Не указано
{% endif %}
</dd>
</div>
<div class="employee-card__meta-item">
<dt class="employee-card__meta-label">Телефоны</dt>
<dd class="employee-card__meta-value">{{ employee_view.contacts.phones | join(", ") if employee_view.contacts.phones else "Не указано" }}</dd>
</div>
<div class="employee-card__meta-item">
<dt class="employee-card__meta-label">Адрес</dt>
<dd class="employee-card__meta-value">{{ employee_view.contacts.address or "Не указано" }}</dd>
</div>
{% if employee_view.contacts.items %}
<div class="employee-card__meta-item employee-card__meta-item--wide">
<dt class="employee-card__meta-label">Прочее</dt>
<dd class="employee-card__meta-value">
<ul class="employee-card__list">
{% for item in employee_view.contacts.items %}
<li class="employee-card__list-item">{{ item }}</li>
{% endfor %}
</ul>
</dd>
</div>
{% endif %}
</dl>
</section>
{% if employee_view.external_ids %}
<section class="employee-card__section">
<h3 class="employee-section__title">Внешние идентификаторы</h3>
<ul class="employee-card__list">
{% for external_id in employee_view.external_ids %}
<li class="employee-card__list-item">
<strong>{{ external_id.system }}:</strong>
{% if external_id.url %}
<a class="admin__link" href="{{ external_id.url }}">{{ external_id.value }}</a>
{% else %}
{{ external_id.value }}
{% endif %}
</li>
{% endfor %}
</ul>
</section>
{% endif %}
<section class="employee-card__section">
<h3 class="employee-section__title">Разделы профиля</h3>
{% if employee_view.sections %}
<div class="employee-card__sections">
{% for section in employee_view.sections %}
<article class="employee-section">
<div class="employee-section__header">
<h4 class="employee-section__title">{{ section.title }}</h4>
<span class="employee-section__type">{{ section.type }}</span>
</div>
{% if section.type == "year_blocks" and section.year_entries %}
<ul class="employee-card__list">
{% for entry in section.year_entries %}
<li class="employee-card__list-item">{% if entry.year %}<strong>{{ entry.year }}:</strong> {% endif %}{{ entry.text }}</li>
{% endfor %}
</ul>
{% elif section.type == "publications" and section.publications %}
{% if section.publications_count %}<p class="employee-section__note">Всего: {{ section.publications_count }}</p>{% endif %}
<ul class="employee-card__list">
{% for publication in section.publications %}
<li class="employee-card__list-item">
{% if publication.url %}
<a class="admin__link" href="{{ publication.url }}">{{ publication.title }}</a>
{% else %}
{{ publication.title }}
{% endif %}
{% if publication.text and publication.text != publication.title %}<div class="employee-section__text">{{ publication.text }}</div>{% endif %}
</li>
{% endfor %}
</ul>
{% elif section.type == "courses_by_year" and section.courses %}
{% if section.academic_year %}<p class="employee-section__note">Учебный год: {{ section.academic_year }}</p>{% endif %}
<ul class="employee-card__list">
{% for course in section.courses %}
<li class="employee-card__list-item">
{% if course.url %}
<a class="admin__link" href="{{ course.url }}">{{ course.title }}</a>
{% else %}
{{ course.title }}
{% endif %}
</li>
{% endfor %}
</ul>
{% elif section.type == "table" and section.table %}
<div class="employee-section__table-wrap">
<table class="employee-section__table">
{% if section.table.headers %}
<thead><tr>{% for header in section.table.headers %}<th class="employee-section__head">{{ header }}</th>{% endfor %}</tr></thead>
{% endif %}
<tbody>
{% for row in section.table.rows %}
<tr>
{% for cell in row.cells %}
<td class="employee-section__cell">{{ cell }}</td>
{% endfor %}
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% else %}
{% if section.paragraphs %}
{% for paragraph in section.paragraphs %}
<p class="employee-section__text">{{ paragraph }}</p>
{% endfor %}
{% endif %}
{% if section.items %}
<ul class="employee-card__list">
{% for item in section.items %}
<li class="employee-card__list-item">{{ item }}</li>
{% endfor %}
</ul>
{% endif %}
{% endif %}
{% if section.links and section.type not in ["courses_by_year"] %}
<div class="employee-section__links">
{% for link in section.links %}
<a class="employee-section__link" href="{{ link.url }}">{{ link.text }}</a>
{% endfor %}
</div>
{% endif %}
</article>
{% endfor %}
</div>
{% else %}
<p class="employee-section__text">Разделы профиля не найдены.</p>
{% endif %}
</section>
</section>
<section class="panel">
<h2 class="panel__title">Snapshots</h2>
<table class="table">

View File

@@ -1,3 +1,3 @@
APP_VERSION = "0.2.0"
FRONTEND_VERSION = "0.2.0"
BACKEND_VERSION = "0.2.0"
APP_VERSION = "0.2.3"
FRONTEND_VERSION = "0.2.3"
BACKEND_VERSION = "0.2.3"

View File

@@ -7,8 +7,6 @@ services:
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-miem_password}
volumes:
- postgres_data:/var/lib/postgresql/data
ports:
- "${POSTGRES_PORT:-5432}:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-miem} -d ${POSTGRES_DB:-miem_workers}"]
interval: 10s
@@ -22,7 +20,7 @@ services:
environment:
DATABASE_URL: postgresql+psycopg://${POSTGRES_USER:-miem}:${POSTGRES_PASSWORD:-miem_password}@postgres:5432/${POSTGRES_DB:-miem_workers}
ports:
- "${API_PORT:-8000}:8000"
- "127.0.0.1:8000:8000"
depends_on:
postgres:
condition: service_healthy
@@ -44,7 +42,7 @@ services:
environment:
DATABASE_URL: postgresql+psycopg://${POSTGRES_USER:-miem}:${POSTGRES_PASSWORD:-miem_password}@postgres:5432/${POSTGRES_DB:-miem_workers}
ports:
- "${MCP_PORT:-8001}:8000"
- "127.0.0.1:8001:8000"
depends_on:
postgres:
condition: service_healthy

View File

@@ -1,7 +1,13 @@
from datetime import datetime, timezone
from app.models import CrawlRun, Employee
from app.services.admin_data import employee_display_payload, list_employees_page, run_payload, stats_payload
from app.services.admin_data import (
employee_detail_payload,
employee_display_payload,
list_employees_page,
run_payload,
stats_payload,
)
def test_employee_display_payload_extracts_common_fields(db_session):
@@ -31,6 +37,83 @@ def test_employee_display_payload_extracts_common_fields(db_session):
assert payload["courses_count"] == 1
def test_employee_detail_payload_normalizes_human_readable_sections(db_session):
employee = Employee(
profile_key="staff:person",
profile_type="staff",
profile_id="person",
canonical_url="https://www.hse.ru/staff/person",
full_name="Person Name",
status="active",
first_seen_at=datetime.now(timezone.utc),
last_seen_at=datetime.now(timezone.utc),
current_data={
"positions": ["Professor"],
"hse_start_year": 2024,
"contacts": {
"emails": ["person@hse.ru"],
"phones": ["+79990000000"],
"address": "Moscow",
"items": [{"raw": "consultation hours"}],
},
"external_ids": [{"system": "ORCID", "value": "0000", "url": "https://orcid.org/0000"}],
"sections": [
{
"title": "Education",
"type": "year_blocks",
"year_entries": [{"year": 2020, "text": "Master degree"}],
},
{
"title": "Publications",
"type": "publications",
"publications": [{"title": "Paper", "text": "Paper details", "url": "https://example.test/paper"}],
},
{
"title": "Courses",
"type": "courses_by_year",
"academic_year": "2025/2026",
"courses": [{"title": "Course", "url": "https://example.test/course"}],
},
{
"title": "Fallback",
"type": "generic",
"raw_text": "Fallback text",
},
],
},
)
payload = employee_detail_payload(employee)
assert payload["contacts"]["emails"] == ["person@hse.ru"]
assert payload["contacts"]["items"] == ["consultation hours"]
assert payload["external_ids"][0]["system"] == "ORCID"
assert payload["sections"][0]["year_entries"][0]["text"] == "Master degree"
assert payload["sections"][1]["publications"][0]["title"] == "Paper"
assert payload["sections"][2]["courses"][0]["title"] == "Course"
assert payload["sections"][3]["paragraphs"] == ["Fallback text"]
def test_employee_payloads_tolerate_malformed_current_data(db_session):
employee = Employee(
profile_key="staff:broken",
canonical_url="https://www.hse.ru/staff/broken",
full_name="Broken Data",
status="active",
first_seen_at=datetime.now(timezone.utc),
last_seen_at=datetime.now(timezone.utc),
current_data="not-a-dict",
)
display = employee_display_payload(employee)
detail = employee_detail_payload(employee)
assert display["positions"] == []
assert display["email_text"] == ""
assert detail["contacts"]["emails"] == []
assert detail["sections"] == []
def test_list_employees_page_filters_sorts_and_paginates(db_session):
db_session.add(
Employee(

View File

@@ -18,7 +18,7 @@ def test_health_returns_versions():
response = client.get("/api/health")
assert response.status_code == 200
assert response.json()["backend_version"] == "0.2.0"
assert response.json()["backend_version"] == "0.2.3"
def test_mcp_requires_token_and_lists_tools():

View File

@@ -0,0 +1,13 @@
from pathlib import Path
def test_employee_detail_template_is_human_readable():
template = Path("app/templates/employee_detail.html").read_text(encoding="utf-8")
assert "Current data" not in template
assert "<pre class=\"code\"" not in template
assert ">Tabs<" not in template
assert "Основная информация" in template
assert "Контакты" in template
assert "Разделы профиля" in template
assert "Snapshots" in template