feature: add MIEM employees parser service with admin UI and MCP
This commit is contained in:
23
tests/conftest.py
Normal file
23
tests/conftest.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import pytest
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from sqlalchemy.pool import StaticPool
|
||||
|
||||
from app.db import Base
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def db_session():
|
||||
engine = create_engine(
|
||||
"sqlite:///:memory:",
|
||||
connect_args={"check_same_thread": False},
|
||||
poolclass=StaticPool,
|
||||
)
|
||||
Base.metadata.create_all(engine)
|
||||
Session = sessionmaker(bind=engine)
|
||||
session = Session()
|
||||
try:
|
||||
yield session
|
||||
finally:
|
||||
session.close()
|
||||
Base.metadata.drop_all(engine)
|
||||
107
tests/test_api_mcp.py
Normal file
107
tests/test_api_mcp.py
Normal file
@@ -0,0 +1,107 @@
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from sqlalchemy.pool import StaticPool
|
||||
|
||||
from app.config import Settings, get_settings
|
||||
from app.db import Base, get_db
|
||||
from app.main import app
|
||||
from app.models import Employee
|
||||
|
||||
|
||||
def test_health_returns_versions():
|
||||
client = TestClient(app)
|
||||
|
||||
response = client.get("/api/health")
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.json()["backend_version"] == "0.1.0"
|
||||
|
||||
|
||||
def test_mcp_requires_token_and_lists_tools():
|
||||
engine = create_engine(
|
||||
"sqlite:///:memory:",
|
||||
connect_args={"check_same_thread": False},
|
||||
poolclass=StaticPool,
|
||||
)
|
||||
Base.metadata.create_all(engine)
|
||||
Session = sessionmaker(bind=engine)
|
||||
|
||||
def override_db():
|
||||
session = Session()
|
||||
try:
|
||||
yield session
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
app.dependency_overrides[get_db] = override_db
|
||||
app.dependency_overrides[get_settings] = lambda: Settings(mcp_token="secret", session_secret="session-secret")
|
||||
client = TestClient(app)
|
||||
|
||||
unauthorized = client.post("/mcp", json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}})
|
||||
authorized = client.post(
|
||||
"/mcp",
|
||||
headers={"Authorization": "Bearer secret"},
|
||||
json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}},
|
||||
)
|
||||
|
||||
assert unauthorized.status_code == 401
|
||||
assert authorized.status_code == 200
|
||||
assert authorized.json()["result"]["tools"][0]["name"] == "search_employees"
|
||||
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
def test_mcp_search_employees_returns_matching_employee():
|
||||
engine = create_engine(
|
||||
"sqlite:///:memory:",
|
||||
connect_args={"check_same_thread": False},
|
||||
poolclass=StaticPool,
|
||||
)
|
||||
Base.metadata.create_all(engine)
|
||||
Session = sessionmaker(bind=engine)
|
||||
session = Session()
|
||||
session.add(
|
||||
Employee(
|
||||
profile_key="staff:avsergeev",
|
||||
profile_type="staff",
|
||||
profile_id="avsergeev",
|
||||
canonical_url="https://www.hse.ru/staff/avsergeev",
|
||||
full_name="Сергеев Алексей Викторович",
|
||||
status="active",
|
||||
first_seen_at=datetime.now(timezone.utc),
|
||||
last_seen_at=datetime.now(timezone.utc),
|
||||
current_data={"sections": []},
|
||||
)
|
||||
)
|
||||
session.commit()
|
||||
session.close()
|
||||
|
||||
def override_db():
|
||||
db = Session()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
app.dependency_overrides[get_db] = override_db
|
||||
app.dependency_overrides[get_settings] = lambda: Settings(mcp_token="secret", session_secret="session-secret")
|
||||
client = TestClient(app)
|
||||
|
||||
response = client.post(
|
||||
"/mcp",
|
||||
headers={"Authorization": "Bearer secret"},
|
||||
json={
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "tools/call",
|
||||
"params": {"name": "search_employees", "arguments": {"query": "Сергеев"}},
|
||||
},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "Сергеев Алексей Викторович" in response.json()["result"]["content"][0]["text"]
|
||||
|
||||
app.dependency_overrides.clear()
|
||||
34
tests/test_crawler.py
Normal file
34
tests/test_crawler.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from app.models import Employee
|
||||
from app.services.crawler import _mark_dismissed
|
||||
|
||||
|
||||
def test_mark_dismissed_only_marks_missing_active_employees(db_session):
|
||||
db_session.add(
|
||||
Employee(
|
||||
profile_key="staff:kept",
|
||||
canonical_url="https://www.hse.ru/staff/kept",
|
||||
status="active",
|
||||
first_seen_at=datetime.now(timezone.utc),
|
||||
last_seen_at=datetime.now(timezone.utc),
|
||||
)
|
||||
)
|
||||
db_session.add(
|
||||
Employee(
|
||||
profile_key="staff:gone",
|
||||
canonical_url="https://www.hse.ru/staff/gone",
|
||||
status="active",
|
||||
first_seen_at=datetime.now(timezone.utc),
|
||||
last_seen_at=datetime.now(timezone.utc),
|
||||
)
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
dismissed = _mark_dismissed(db_session, {"staff:kept"})
|
||||
|
||||
assert dismissed == 1
|
||||
assert db_session.query(Employee).filter_by(profile_key="staff:kept").one().status == "active"
|
||||
gone = db_session.query(Employee).filter_by(profile_key="staff:gone").one()
|
||||
assert gone.status == "dismissed"
|
||||
assert gone.dismissed_at is not None
|
||||
28
tests/test_parser.py
Normal file
28
tests/test_parser.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from app.parser.profile import extract_person_tabs
|
||||
from app.parser.profile_url import normalize_profile_url, parse_profile_identity
|
||||
|
||||
|
||||
def test_normalize_profile_url_supports_staff_and_org_persons():
|
||||
assert normalize_profile_url("/staff/avsergeev#sci") == "https://www.hse.ru/staff/avsergeev"
|
||||
assert normalize_profile_url("https://www.hse.ru/org/persons/123/") == "https://www.hse.ru/org/persons/123"
|
||||
assert parse_profile_identity("https://www.hse.ru/staff/avsergeev") == ("staff", "avsergeev")
|
||||
|
||||
|
||||
def test_extract_person_tabs_prefers_person_menu_addition():
|
||||
soup = BeautifulSoup(
|
||||
"""
|
||||
<div class="person-menu is-desktop small person-menu-addition">
|
||||
<a href="#main">Домашняя страница</a>
|
||||
<a href="#sci" data-index="1">Публикации</a>
|
||||
</div>
|
||||
<a href="/org/persons/999">Other person</a>
|
||||
""",
|
||||
"html.parser",
|
||||
)
|
||||
|
||||
tabs = extract_person_tabs(soup, "https://www.hse.ru/staff/avsergeev")
|
||||
|
||||
assert [tab["title"] for tab in tabs] == ["Домашняя страница", "Публикации"]
|
||||
assert tabs[1]["href"] == "https://www.hse.ru/staff/avsergeev#sci"
|
||||
Reference in New Issue
Block a user