Files
miem_workers/tests/test_api_mcp.py

533 lines
17 KiB
Python

import json
from datetime import datetime, timezone
from types import SimpleNamespace
from fastapi.testclient import TestClient
from sqlalchemy import create_engine, select
from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import StaticPool
from app.config import Settings, get_settings
from app.db import Base, get_db
from app.main import app
from app.models import CrawlRun, CrawlRunEmployeeChange, Employee, EmployeePublication
from app.security import SESSION_COOKIE, sign_session
def test_health_returns_versions():
client = TestClient(app)
response = client.get("/api/health")
assert response.status_code == 200
assert response.json()["backend_version"] == "0.6.2"
def test_mcp_lists_tools_without_auth_and_ignores_auth_header():
engine = create_engine(
"sqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
def override_db():
session = Session()
try:
yield session
finally:
session.close()
app.dependency_overrides[get_db] = override_db
client = TestClient(app)
without_auth = client.post("/mcp", json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}})
with_auth = client.post(
"/mcp",
headers={"Authorization": "Bearer anything"},
json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}},
)
assert without_auth.status_code == 200
assert with_auth.status_code == 200
tool_names = {tool["name"] for tool in without_auth.json()["result"]["tools"]}
assert "search_employees" in tool_names
assert "get_service_info" in tool_names
assert "sync_employees" in tool_names
assert any(tool["name"] == "get_crawl_run_details" for tool in without_auth.json()["result"]["tools"])
assert with_auth.json()["result"]["tools"] == without_auth.json()["result"]["tools"]
app.dependency_overrides.clear()
def test_mcp_search_employees_returns_matching_employee():
engine = create_engine(
"sqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
session.add(
Employee(
profile_key="staff:avsergeev",
profile_type="staff",
profile_id="avsergeev",
canonical_url="https://www.hse.ru/staff/avsergeev",
full_name="Сергеев Алексей Викторович",
status="active",
first_seen_at=datetime.now(timezone.utc),
last_seen_at=datetime.now(timezone.utc),
current_data={"sections": []},
)
)
session.commit()
session.close()
def override_db():
db = Session()
try:
yield db
finally:
db.close()
app.dependency_overrides[get_db] = override_db
client = TestClient(app)
response = client.post(
"/mcp",
json={
"jsonrpc": "2.0",
"id": 1,
"method": "tools/call",
"params": {"name": "search_employees", "arguments": {"query": "Сергеев"}},
},
)
assert response.status_code == 200
assert "Сергеев Алексей Викторович" in response.json()["result"]["content"][0]["text"]
app.dependency_overrides.clear()
def test_mcp_service_info_returns_tools_and_dataset_hash():
engine = create_engine(
"sqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
session.add(
Employee(
profile_key="staff:alpha",
profile_type="staff",
profile_id="alpha",
canonical_url="https://www.hse.ru/staff/alpha",
full_name="Alpha Person",
status="active",
current_checksum="a" * 64,
current_data={"sections": []},
)
)
session.commit()
session.close()
def override_db():
db = Session()
try:
yield db
finally:
db.close()
app.dependency_overrides[get_db] = override_db
client = TestClient(app)
response = client.post(
"/mcp",
json={"jsonrpc": "2.0", "id": 1, "method": "tools/call", "params": {"name": "get_service_info", "arguments": {}}},
)
assert response.status_code == 200
payload = json.loads(response.json()["result"]["content"][0]["text"])
assert payload["service_name"] == "miem-employees"
assert payload["backend_version"] == "0.6.2"
assert payload["dataset"]["hash"]
assert any(tool["name"] == "sync_employees" for tool in payload["tools"])
app.dependency_overrides.clear()
def test_mcp_list_employee_publications_prefers_stored_publications_with_fallback():
engine = create_engine(
"sqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
stored_employee = Employee(
profile_key="staff:stored",
profile_type="staff",
profile_id="stored",
canonical_url="https://www.hse.ru/staff/stored",
full_name="Stored Person",
status="active",
current_data={
"sections": [
{
"type": "publications",
"publications": [{"title": "Old JSON Publication", "url": "https://example.test/old"}],
}
]
},
)
fallback_employee = Employee(
profile_key="staff:fallback",
profile_type="staff",
profile_id="fallback",
canonical_url="https://www.hse.ru/staff/fallback",
full_name="Fallback Person",
status="active",
current_data={
"sections": [
{
"type": "publications",
"publications": [{"title": "Fallback Publication", "url": "https://example.test/fallback"}],
}
]
},
)
session.add_all([stored_employee, fallback_employee])
session.commit()
session.add(
EmployeePublication(
employee_id=stored_employee.id,
publication_id="pub-1",
title="Stored Publication",
year=2024,
publication_type="ARTICLE",
url="https://publications.hse.ru/view/pub-1",
doi_url="https://doi.org/10.1/test",
citation_text="Stored Citation",
annotation={"ru": "Аннотация", "en": "Abstract"},
description={"main": "Stored Citation"},
authors=[{"id": "1", "title_ru": "Автор", "is_current_employee": True}],
source_hash="a" * 64,
)
)
session.commit()
session.close()
def override_db():
db = Session()
try:
yield db
finally:
db.close()
app.dependency_overrides[get_db] = override_db
client = TestClient(app)
stored_response = client.post(
"/mcp",
json={
"jsonrpc": "2.0",
"id": 1,
"method": "tools/call",
"params": {"name": "list_employee_publications", "arguments": {"profile_id_or_url": "stored"}},
},
)
fallback_response = client.post(
"/mcp",
json={
"jsonrpc": "2.0",
"id": 2,
"method": "tools/call",
"params": {"name": "list_employee_publications", "arguments": {"profile_id_or_url": "fallback"}},
},
)
stored_payload = json.loads(stored_response.json()["result"]["content"][0]["text"])
fallback_payload = json.loads(fallback_response.json()["result"]["content"][0]["text"])
assert stored_payload["items"][0]["title"] == "Stored Publication"
assert stored_payload["items"][0]["doi_url"] == "https://doi.org/10.1/test"
assert stored_payload["items"][0]["annotation"] == {"ru": "Аннотация", "en": "Abstract"}
assert stored_payload["items"][0]["authors"] == [{"id": "1", "title_ru": "Автор", "is_current_employee": True}]
assert fallback_payload["items"][0]["title"] == "Fallback Publication"
app.dependency_overrides.clear()
def test_mcp_sync_employees_full_empty_and_unknown_hash_modes():
engine = create_engine(
"sqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
session.add(
Employee(
profile_key="staff:alpha",
profile_type="staff",
profile_id="alpha",
canonical_url="https://www.hse.ru/staff/alpha",
full_name="Alpha Person",
status="active",
current_checksum="a" * 64,
current_data={"sections": [{"type": "paragraphs"}]},
)
)
session.commit()
session.close()
def override_db():
db = Session()
try:
yield db
finally:
db.close()
app.dependency_overrides[get_db] = override_db
client = TestClient(app)
full_response = client.post(
"/mcp",
json={"jsonrpc": "2.0", "id": 1, "method": "tools/call", "params": {"name": "sync_employees", "arguments": {}}},
)
full_payload = json.loads(full_response.json()["result"]["content"][0]["text"])
current_hash = full_payload["to_hash"]
empty_response = client.post(
"/mcp",
json={
"jsonrpc": "2.0",
"id": 2,
"method": "tools/call",
"params": {"name": "sync_employees", "arguments": {"client_hash": current_hash}},
},
)
empty_payload = json.loads(empty_response.json()["result"]["content"][0]["text"])
unknown_response = client.post(
"/mcp",
json={
"jsonrpc": "2.0",
"id": 3,
"method": "tools/call",
"params": {"name": "sync_employees", "arguments": {"client_hash": "missing"}},
},
)
unknown_payload = json.loads(unknown_response.json()["result"]["content"][0]["text"])
assert full_payload["mode"] == "full"
assert full_payload["items"][0]["data"] == {"sections": [{"type": "paragraphs"}]}
assert empty_payload["mode"] == "delta"
assert empty_payload["changes"] == {"added": [], "updated": [], "dismissed": [], "removed": []}
assert unknown_payload["mode"] == "full"
assert unknown_payload["reason"] == "unknown_client_hash"
app.dependency_overrides.clear()
def test_mcp_get_crawl_run_details_returns_changes():
engine = create_engine(
"sqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
run = CrawlRun(source_url="https://miem.hse.ru/persons", status="completed", new_count=1)
employee = Employee(
profile_key="staff:new",
profile_type="staff",
profile_id="new",
canonical_url="https://www.hse.ru/staff/new",
full_name="New Person",
status="active",
first_seen_at=datetime.now(timezone.utc),
last_seen_at=datetime.now(timezone.utc),
)
session.add_all([run, employee])
session.commit()
session.add(
CrawlRunEmployeeChange(
crawl_run_id=run.id,
employee_id=employee.id,
profile_key=employee.profile_key,
profile_url=employee.canonical_url,
full_name=employee.full_name,
change_type="new",
profile_available=True,
message="added",
)
)
session.commit()
run_id = run.id
session.close()
def override_db():
db = Session()
try:
yield db
finally:
db.close()
app.dependency_overrides[get_db] = override_db
client = TestClient(app)
response = client.post(
"/mcp",
json={
"jsonrpc": "2.0",
"id": 1,
"method": "tools/call",
"params": {"name": "get_crawl_run_details", "arguments": {"run_id": run_id}},
},
)
assert response.status_code == 200
text = response.json()["result"]["content"][0]["text"]
assert "New Person" in text
assert "changes_detail_available" in text
app.dependency_overrides.clear()
def test_mcp_protected_resource_metadata_route_is_removed():
client = TestClient(app)
response = client.get("/.well-known/oauth-protected-resource")
assert response.status_code == 404
def test_api_employees_and_stats_require_admin_session():
engine = create_engine(
"sqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
db = Session()
db.add(
Employee(
profile_key="staff:alpha",
profile_type="staff",
profile_id="alpha",
canonical_url="https://www.hse.ru/staff/alpha",
full_name="Alpha Person",
status="active",
first_seen_at=datetime.now(timezone.utc),
last_seen_at=datetime.now(timezone.utc),
current_data={"contacts": {"emails": ["alpha@hse.ru"]}, "sections": []},
)
)
run = CrawlRun(source_url="https://miem.hse.ru/persons", status="completed", new_count=1)
db.add(run)
db.commit()
db.add(
CrawlRunEmployeeChange(
crawl_run_id=run.id,
employee_id=1,
profile_key="staff:alpha",
profile_url="https://www.hse.ru/staff/alpha",
full_name="Alpha Person",
change_type="new",
profile_available=True,
message="added",
)
)
db.commit()
run_id = run.id
db.close()
settings = Settings(admin_username="admin", admin_password="password", session_secret="session-secret")
def override_db():
session = Session()
try:
yield session
finally:
session.close()
app.dependency_overrides[get_db] = override_db
app.dependency_overrides[get_settings] = lambda: settings
client = TestClient(app)
client.cookies.set(SESSION_COOKIE, sign_session("admin", settings))
employees = client.get("/api/employees", params={"q": "Alpha", "has_email": True})
stats = client.get("/api/stats")
run_details = client.get(f"/api/crawl-runs/{run_id}")
assert employees.status_code == 200
assert employees.json()["total"] == 1
assert stats.status_code == 200
assert stats.json()["new_in_last_run"] == 1
assert run_details.status_code == 200
assert run_details.json()["changes"]["new"][0]["full_name"] == "Alpha Person"
app.dependency_overrides.clear()
def test_admin_refresh_employee_route_updates_only_requested_employee(monkeypatch):
engine = create_engine(
"sqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
db = Session()
db.add(
Employee(
profile_key="org_person:133709486",
profile_type="org_person",
profile_id="133709486",
canonical_url="https://www.hse.ru/org/persons/133709486",
full_name="Будков Юрий Алексеевич",
status="active",
)
)
db.commit()
employee_id = db.scalar(select(Employee.id))
db.close()
settings = Settings(admin_username="admin", admin_password="password", session_secret="session-secret")
def override_db():
session = Session()
try:
yield session
finally:
session.close()
calls = []
def fake_refresh_employee(db, refreshed_employee, route_settings):
calls.append((refreshed_employee.id, route_settings))
return SimpleNamespace(status="completed")
app.dependency_overrides[get_db] = override_db
app.dependency_overrides[get_settings] = lambda: settings
monkeypatch.setattr("app.admin.refresh_employee", fake_refresh_employee)
client = TestClient(app)
client.cookies.set(SESSION_COOKIE, sign_session("admin", settings))
response = client.post(f"/admin/employees/{employee_id}/refresh", follow_redirects=False)
assert response.status_code == 303
assert response.headers["location"] == f"/admin/employees/{employee_id}?refresh_status=success"
assert calls == [(employee_id, settings)]
app.dependency_overrides.clear()