feat: add dataset checkpoint sync for MCP
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from types import SimpleNamespace
|
||||
|
||||
@@ -19,7 +20,7 @@ def test_health_returns_versions():
|
||||
response = client.get("/api/health")
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.json()["backend_version"] == "0.4.7"
|
||||
assert response.json()["backend_version"] == "0.5.0"
|
||||
|
||||
|
||||
def test_mcp_lists_tools_without_auth_and_ignores_auth_header():
|
||||
@@ -50,7 +51,10 @@ def test_mcp_lists_tools_without_auth_and_ignores_auth_header():
|
||||
|
||||
assert without_auth.status_code == 200
|
||||
assert with_auth.status_code == 200
|
||||
assert without_auth.json()["result"]["tools"][0]["name"] == "search_employees"
|
||||
tool_names = {tool["name"] for tool in without_auth.json()["result"]["tools"]}
|
||||
assert "search_employees" in tool_names
|
||||
assert "get_service_info" in tool_names
|
||||
assert "sync_employees" in tool_names
|
||||
assert any(tool["name"] == "get_crawl_run_details" for tool in without_auth.json()["result"]["tools"])
|
||||
assert with_auth.json()["result"]["tools"] == without_auth.json()["result"]["tools"]
|
||||
|
||||
@@ -108,6 +112,128 @@ def test_mcp_search_employees_returns_matching_employee():
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
def test_mcp_service_info_returns_tools_and_dataset_hash():
|
||||
engine = create_engine(
|
||||
"sqlite:///:memory:",
|
||||
connect_args={"check_same_thread": False},
|
||||
poolclass=StaticPool,
|
||||
)
|
||||
Base.metadata.create_all(engine)
|
||||
Session = sessionmaker(bind=engine)
|
||||
session = Session()
|
||||
session.add(
|
||||
Employee(
|
||||
profile_key="staff:alpha",
|
||||
profile_type="staff",
|
||||
profile_id="alpha",
|
||||
canonical_url="https://www.hse.ru/staff/alpha",
|
||||
full_name="Alpha Person",
|
||||
status="active",
|
||||
current_checksum="a" * 64,
|
||||
current_data={"sections": []},
|
||||
)
|
||||
)
|
||||
session.commit()
|
||||
session.close()
|
||||
|
||||
def override_db():
|
||||
db = Session()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
app.dependency_overrides[get_db] = override_db
|
||||
client = TestClient(app)
|
||||
|
||||
response = client.post(
|
||||
"/mcp",
|
||||
json={"jsonrpc": "2.0", "id": 1, "method": "tools/call", "params": {"name": "get_service_info", "arguments": {}}},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = json.loads(response.json()["result"]["content"][0]["text"])
|
||||
assert payload["service_name"] == "miem-employees"
|
||||
assert payload["backend_version"] == "0.5.0"
|
||||
assert payload["dataset"]["hash"]
|
||||
assert any(tool["name"] == "sync_employees" for tool in payload["tools"])
|
||||
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
def test_mcp_sync_employees_full_empty_and_unknown_hash_modes():
|
||||
engine = create_engine(
|
||||
"sqlite:///:memory:",
|
||||
connect_args={"check_same_thread": False},
|
||||
poolclass=StaticPool,
|
||||
)
|
||||
Base.metadata.create_all(engine)
|
||||
Session = sessionmaker(bind=engine)
|
||||
session = Session()
|
||||
session.add(
|
||||
Employee(
|
||||
profile_key="staff:alpha",
|
||||
profile_type="staff",
|
||||
profile_id="alpha",
|
||||
canonical_url="https://www.hse.ru/staff/alpha",
|
||||
full_name="Alpha Person",
|
||||
status="active",
|
||||
current_checksum="a" * 64,
|
||||
current_data={"sections": [{"type": "paragraphs"}]},
|
||||
)
|
||||
)
|
||||
session.commit()
|
||||
session.close()
|
||||
|
||||
def override_db():
|
||||
db = Session()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
app.dependency_overrides[get_db] = override_db
|
||||
client = TestClient(app)
|
||||
|
||||
full_response = client.post(
|
||||
"/mcp",
|
||||
json={"jsonrpc": "2.0", "id": 1, "method": "tools/call", "params": {"name": "sync_employees", "arguments": {}}},
|
||||
)
|
||||
full_payload = json.loads(full_response.json()["result"]["content"][0]["text"])
|
||||
current_hash = full_payload["to_hash"]
|
||||
|
||||
empty_response = client.post(
|
||||
"/mcp",
|
||||
json={
|
||||
"jsonrpc": "2.0",
|
||||
"id": 2,
|
||||
"method": "tools/call",
|
||||
"params": {"name": "sync_employees", "arguments": {"client_hash": current_hash}},
|
||||
},
|
||||
)
|
||||
empty_payload = json.loads(empty_response.json()["result"]["content"][0]["text"])
|
||||
|
||||
unknown_response = client.post(
|
||||
"/mcp",
|
||||
json={
|
||||
"jsonrpc": "2.0",
|
||||
"id": 3,
|
||||
"method": "tools/call",
|
||||
"params": {"name": "sync_employees", "arguments": {"client_hash": "missing"}},
|
||||
},
|
||||
)
|
||||
unknown_payload = json.loads(unknown_response.json()["result"]["content"][0]["text"])
|
||||
|
||||
assert full_payload["mode"] == "full"
|
||||
assert full_payload["items"][0]["data"] == {"sections": [{"type": "paragraphs"}]}
|
||||
assert empty_payload["mode"] == "delta"
|
||||
assert empty_payload["changes"] == {"added": [], "updated": [], "dismissed": [], "removed": []}
|
||||
assert unknown_payload["mode"] == "full"
|
||||
assert unknown_payload["reason"] == "unknown_client_hash"
|
||||
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
def test_mcp_get_crawl_run_details_returns_changes():
|
||||
engine = create_engine(
|
||||
"sqlite:///:memory:",
|
||||
|
||||
88
tests/test_dataset_versions.py
Normal file
88
tests/test_dataset_versions.py
Normal file
@@ -0,0 +1,88 @@
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from app.models import Employee
|
||||
from app.services.dataset_versions import get_or_create_current_version, sync_employees_payload
|
||||
|
||||
|
||||
def _employee(profile_key: str, checksum: str, *, status: str = "active") -> Employee:
|
||||
return Employee(
|
||||
profile_key=profile_key,
|
||||
profile_type=profile_key.split(":", 1)[0],
|
||||
profile_id=profile_key.split(":", 1)[1],
|
||||
canonical_url=f"https://www.hse.ru/{profile_key}",
|
||||
full_name=profile_key,
|
||||
status=status,
|
||||
first_seen_at=datetime.now(timezone.utc),
|
||||
last_seen_at=datetime.now(timezone.utc),
|
||||
current_data={"profile_key": profile_key},
|
||||
current_checksum=checksum,
|
||||
)
|
||||
|
||||
|
||||
def test_dataset_version_hash_is_stable_for_same_employee_state(db_session):
|
||||
db_session.add(_employee("staff:alpha", "a" * 64))
|
||||
db_session.commit()
|
||||
|
||||
first = get_or_create_current_version(db_session)
|
||||
db_session.commit()
|
||||
second = get_or_create_current_version(db_session)
|
||||
|
||||
assert second.id == first.id
|
||||
assert second.hash == first.hash
|
||||
assert second.employee_count == 1
|
||||
|
||||
|
||||
def test_dataset_version_hash_changes_when_employee_checksum_changes(db_session):
|
||||
employee = _employee("staff:alpha", "a" * 64)
|
||||
db_session.add(employee)
|
||||
db_session.commit()
|
||||
first = get_or_create_current_version(db_session)
|
||||
db_session.commit()
|
||||
|
||||
employee.current_checksum = "b" * 64
|
||||
db_session.commit()
|
||||
second = get_or_create_current_version(db_session)
|
||||
|
||||
assert second.hash != first.hash
|
||||
assert second.previous_hash == first.hash
|
||||
|
||||
|
||||
def test_sync_employees_diff_spans_multiple_intermediate_versions(db_session):
|
||||
alpha = _employee("staff:alpha", "a" * 64)
|
||||
db_session.add(alpha)
|
||||
db_session.commit()
|
||||
first = get_or_create_current_version(db_session)
|
||||
db_session.commit()
|
||||
|
||||
beta = _employee("staff:beta", "b" * 64)
|
||||
db_session.add(beta)
|
||||
db_session.commit()
|
||||
get_or_create_current_version(db_session)
|
||||
db_session.commit()
|
||||
|
||||
alpha.current_checksum = "c" * 64
|
||||
alpha.current_data = {"profile_key": "staff:alpha", "changed": True}
|
||||
db_session.commit()
|
||||
|
||||
payload = sync_employees_payload(db_session, client_hash=first.hash, include_data=False)
|
||||
|
||||
assert payload["mode"] == "delta"
|
||||
assert [item["profile_key"] for item in payload["changes"]["added"]] == ["staff:beta"]
|
||||
assert [item["profile_key"] for item in payload["changes"]["updated"]] == ["staff:alpha"]
|
||||
assert payload["changes"]["dismissed"] == []
|
||||
assert payload["changes"]["removed"] == []
|
||||
|
||||
|
||||
def test_sync_employees_reports_dismissed_as_tombstone(db_session):
|
||||
alpha = _employee("staff:alpha", "a" * 64)
|
||||
db_session.add(alpha)
|
||||
db_session.commit()
|
||||
first = get_or_create_current_version(db_session)
|
||||
db_session.commit()
|
||||
|
||||
alpha.status = "dismissed"
|
||||
db_session.commit()
|
||||
payload = sync_employees_payload(db_session, client_hash=first.hash, include_data=False)
|
||||
|
||||
assert payload["changes"]["dismissed"][0]["profile_key"] == "staff:alpha"
|
||||
assert payload["changes"]["dismissed"][0]["status"] == "dismissed"
|
||||
Reference in New Issue
Block a user