Compare commits

...

5 Commits

15 changed files with 30 additions and 395 deletions

View File

@@ -14,13 +14,5 @@ PARSER_USE_PLAYWRIGHT=false
ADMIN_USERNAME=admin ADMIN_USERNAME=admin
ADMIN_PASSWORD=change-me ADMIN_PASSWORD=change-me
SESSION_SECRET=change-me-session-secret SESSION_SECRET=change-me-session-secret
MCP_TOKEN=change-me-mcp-token
MCP_AUTH_MODE=oauth
MCP_RESOURCE_URL=http://localhost:8001/mcp
MCP_OAUTH_ISSUER=
MCP_OAUTH_AUDIENCE=
MCP_OAUTH_JWKS_URL=
MCP_OAUTH_REQUIRED_SCOPE=mcp:tools
API_PORT=8000 API_PORT=8000
MCP_PORT=8001 MCP_PORT=8001

View File

@@ -6,7 +6,7 @@
- `api`: FastAPI, REST API, HTML-админка, healthcheck. - `api`: FastAPI, REST API, HTML-админка, healthcheck.
- `worker`: weekly scheduler, который запускает парсинг по `CRAWL_CRON`. - `worker`: weekly scheduler, который запускает парсинг по `CRAWL_CRON`.
- `mcp`: HTTP MCP endpoint с OAuth/OIDC access token для внешних агентов или legacy static token для локального режима. - `mcp`: открытый HTTP MCP endpoint для ИИ-агентов.
- `postgres`: основная БД. - `postgres`: основная БД.
Парсер использует фиксированный источник сотрудников, по умолчанию `https://miem.hse.ru/persons`. Для каждой карточки сохраняются ФИО, должности, год начала работы, контакты, идентификаторы, вкладки профиля, секции, публикации, курсы, ВКР, JSON-снапшот и сжатый HTML-снапшот. Ссылки обходятся только из меню профиля самого сотрудника (`person-menu`), например `#sci`, `#teaching`, `#main`. Парсер использует фиксированный источник сотрудников, по умолчанию `https://miem.hse.ru/persons`. Для каждой карточки сохраняются ФИО, должности, год начала работы, контакты, идентификаторы, вкладки профиля, секции, публикации, курсы, ВКР, JSON-снапшот и сжатый HTML-снапшот. Ссылки обходятся только из меню профиля самого сотрудника (`person-menu`), например `#sci`, `#teaching`, `#main`.
@@ -27,13 +27,6 @@ cp .env.example .env
- `CRAWL_LIMIT`: опциональный лимит профилей для тестового запуска. - `CRAWL_LIMIT`: опциональный лимит профилей для тестового запуска.
- `ADMIN_USERNAME`, `ADMIN_PASSWORD`: логин и пароль админки. - `ADMIN_USERNAME`, `ADMIN_PASSWORD`: логин и пароль админки.
- `SESSION_SECRET`: секрет подписи cookie. - `SESSION_SECRET`: секрет подписи cookie.
- `MCP_TOKEN`: статический bearer token для legacy/local режима `MCP_AUTH_MODE=token`.
- `MCP_AUTH_MODE`: режим авторизации MCP: `oauth` для внешних агентов или `token` для локальной отладки.
- `MCP_RESOURCE_URL`: публичный URL MCP endpoint, например `https://example.com/mcp`.
- `MCP_OAUTH_ISSUER`: issuer внешнего OIDC-провайдера.
- `MCP_OAUTH_AUDIENCE`: ожидаемый `aud` в OAuth access token.
- `MCP_OAUTH_JWKS_URL`: JWKS endpoint; если не задан, используется `<issuer>/.well-known/jwks.json`.
- `MCP_OAUTH_REQUIRED_SCOPE`: scope для доступа к MCP tools, по умолчанию `mcp:tools`.
- `PARSER_USE_PLAYWRIGHT`: включение Playwright-рендера динамических вкладок. - `PARSER_USE_PLAYWRIGHT`: включение Playwright-рендера динамических вкладок.
## Локальный запуск ## Локальный запуск
@@ -88,9 +81,7 @@ curl -X POST http://localhost:8000/api/crawl-runs --cookie "miem_admin_session=.
## MCP ## MCP
Endpoint: `POST /mcp`, авторизация `Authorization: Bearer <token>`. Endpoint: `POST /mcp`, без авторизации на уровне приложения.
Для внешних ИИ-агентов используйте `MCP_AUTH_MODE=oauth`. В этом режиме статический `MCP_TOKEN` не принимается: клиент должен передать OAuth/OIDC access token с нужным scope.
Поддерживаемые tools: Поддерживаемые tools:
@@ -104,23 +95,11 @@ Endpoint: `POST /mcp`, авторизация `Authorization: Bearer <token>`.
```bash ```bash
curl http://localhost:8001/mcp \ curl http://localhost:8001/mcp \
-H "Authorization: Bearer change-me-mcp-token" \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}' -d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}'
``` ```
Для production OAuth/OIDC настройте внешний authorization server и включите режим `oauth`: Если MCP нужно ограничить, делайте это на сетевом уровне: localhost binding, VPN, firewall, reverse proxy или другой внешний контур доступа.
```env
MCP_AUTH_MODE=oauth
MCP_RESOURCE_URL=https://example.com/mcp
MCP_OAUTH_ISSUER=https://auth.example.com
MCP_OAUTH_AUDIENCE=miem-mcp
MCP_OAUTH_JWKS_URL=https://auth.example.com/.well-known/jwks.json
MCP_OAUTH_REQUIRED_SCOPE=mcp:tools
```
MCP server работает как OAuth protected resource: он не выдает токены, а проверяет JWT access token по JWKS, `issuer`, `audience`, сроку действия и scope. Metadata для MCP-клиентов доступна по `GET /.well-known/oauth-protected-resource`.
## Обслуживание ## Обслуживание
@@ -131,4 +110,4 @@ docker compose exec postgres pg_dump -U miem miem_workers > backup.sql
docker compose down docker compose down
``` ```
Версия сервиса: `0.4.0`. Админка всегда показывает версии backend и frontend в footer. Версия сервиса: `0.4.5`. Админка всегда показывает версии backend и frontend в footer.

View File

@@ -1,6 +1,4 @@
from functools import lru_cache from functools import lru_cache
from typing import Literal
from pydantic import Field, field_validator from pydantic import Field, field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -19,13 +17,6 @@ class Settings(BaseSettings):
admin_username: str = "admin" admin_username: str = "admin"
admin_password: str = "admin" admin_password: str = "admin"
session_secret: str = Field(default="dev-session-secret", min_length=8) session_secret: str = Field(default="dev-session-secret", min_length=8)
mcp_token: str = "dev-mcp-token"
mcp_auth_mode: Literal["token", "oauth"] = "oauth"
mcp_resource_url: str = "http://localhost:8001/mcp"
mcp_oauth_issuer: str = ""
mcp_oauth_audience: str = ""
mcp_oauth_jwks_url: str = ""
mcp_oauth_required_scope: str = "mcp:tools"
@field_validator("crawl_limit", mode="before") @field_validator("crawl_limit", mode="before")
@classmethod @classmethod
@@ -34,15 +25,6 @@ class Settings(BaseSettings):
return None return None
return value return value
def oauth_jwks_url(self) -> str:
if self.mcp_oauth_jwks_url:
return self.mcp_oauth_jwks_url
issuer = self.mcp_oauth_issuer.rstrip("/")
if not issuer:
return ""
return f"{issuer}/.well-known/jwks.json"
@lru_cache @lru_cache
def get_settings() -> Settings: def get_settings() -> Settings:
return Settings() return Settings()

View File

@@ -4,7 +4,6 @@ from fastapi.staticfiles import StaticFiles
from app.admin import router as admin_router from app.admin import router as admin_router
from app.api import router as api_router from app.api import router as api_router
from app.db import init_db from app.db import init_db
from app.mcp import metadata_router as mcp_metadata_router
from app.mcp import router as mcp_router from app.mcp import router as mcp_router
from app.version import BACKEND_VERSION from app.version import BACKEND_VERSION
@@ -13,7 +12,6 @@ app.mount("/static", StaticFiles(directory="app/static"), name="static")
app.include_router(api_router) app.include_router(api_router)
app.include_router(admin_router) app.include_router(admin_router)
app.include_router(mcp_router) app.include_router(mcp_router)
app.include_router(mcp_metadata_router)
@app.on_event("startup") @app.on_event("startup")

View File

@@ -4,15 +4,12 @@ from fastapi import APIRouter, Depends, Request
from sqlalchemy import desc, or_, select from sqlalchemy import desc, or_, select
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.config import Settings, get_settings
from app.db import get_db from app.db import get_db
from app.models import CrawlRun, Employee from app.models import CrawlRun, Employee
from app.security import mcp_protected_resource_metadata, require_mcp_auth
from app.services.admin_data import run_detail_payload from app.services.admin_data import run_detail_payload
from app.version import BACKEND_VERSION from app.version import BACKEND_VERSION
router = APIRouter(prefix="/mcp") router = APIRouter(prefix="/mcp")
metadata_router = APIRouter()
TOOLS = [ TOOLS = [
@@ -65,9 +62,7 @@ TOOLS = [
async def mcp_http( async def mcp_http(
request: Request, request: Request,
db: Session = Depends(get_db), db: Session = Depends(get_db),
settings: Settings = Depends(get_settings),
) -> dict: ) -> dict:
require_mcp_auth(request, settings)
payload = await request.json() payload = await request.json()
method = payload.get("method") method = payload.get("method")
request_id = payload.get("id") request_id = payload.get("id")
@@ -183,8 +178,3 @@ def _run_payload(run: CrawlRun) -> dict:
def _tool_response(data: object) -> dict: def _tool_response(data: object) -> dict:
return {"content": [{"type": "text", "text": json.dumps(data, ensure_ascii=False, default=str)}]} return {"content": [{"type": "text", "text": json.dumps(data, ensure_ascii=False, default=str)}]}
@metadata_router.get("/.well-known/oauth-protected-resource")
def oauth_protected_resource(settings: Settings = Depends(get_settings)) -> dict:
return mcp_protected_resource_metadata(settings)

View File

@@ -3,10 +3,7 @@ import hashlib
import hmac import hmac
import json import json
import time import time
from functools import lru_cache
import jwt
from jwt import PyJWKClient, PyJWTError
from fastapi import HTTPException, Request, status from fastapi import HTTPException, Request, status
from app.config import Settings from app.config import Settings
@@ -47,93 +44,3 @@ def require_admin(request: Request, settings: Settings) -> str:
if not username: if not username:
raise HTTPException(status_code=status.HTTP_303_SEE_OTHER, headers={"Location": "/admin/login"}) raise HTTPException(status_code=status.HTTP_303_SEE_OTHER, headers={"Location": "/admin/login"})
return username return username
def require_mcp_auth(request: Request, settings: Settings) -> None:
auth = request.headers.get("authorization", "")
if not auth.startswith("Bearer "):
raise _mcp_unauthorized(settings, "Missing bearer token")
token = auth.removeprefix("Bearer ").strip()
if _mcp_static_token_allowed(settings) and hmac.compare_digest(token, settings.mcp_token):
return
if _mcp_oauth_allowed(settings):
_validate_mcp_oauth_token(token, settings)
return
raise _mcp_unauthorized(settings, "Invalid MCP token")
def require_mcp_token(request: Request, settings: Settings) -> None:
require_mcp_auth(request, settings)
def mcp_protected_resource_metadata(settings: Settings) -> dict:
authorization_servers = [settings.mcp_oauth_issuer.rstrip("/")] if settings.mcp_oauth_issuer else []
return {
"resource": settings.mcp_resource_url,
"authorization_servers": authorization_servers,
"bearer_methods_supported": ["header"],
"scopes_supported": [settings.mcp_oauth_required_scope],
"resource_documentation": settings.mcp_resource_url,
}
def _mcp_static_token_allowed(settings: Settings) -> bool:
return settings.mcp_auth_mode == "token"
def _mcp_oauth_allowed(settings: Settings) -> bool:
return settings.mcp_auth_mode == "oauth"
def _validate_mcp_oauth_token(token: str, settings: Settings) -> None:
if not settings.mcp_oauth_issuer or not settings.mcp_oauth_audience or not settings.oauth_jwks_url():
raise _mcp_unauthorized(settings, "MCP OAuth is not configured")
try:
signing_key = _get_mcp_oauth_signing_key(token, settings).key
claims = jwt.decode(
token,
signing_key,
algorithms=["RS256", "RS384", "RS512", "ES256", "ES384", "ES512"],
audience=settings.mcp_oauth_audience,
issuer=settings.mcp_oauth_issuer.rstrip("/"),
)
except PyJWTError as exc:
raise _mcp_unauthorized(settings, "Invalid OAuth access token") from exc
if not _claims_have_scope(claims, settings.mcp_oauth_required_scope):
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Missing required MCP OAuth scope")
def _claims_have_scope(claims: dict, required_scope: str) -> bool:
scopes: set[str] = set()
scope = claims.get("scope")
if isinstance(scope, str):
scopes.update(scope.split())
scp = claims.get("scp")
if isinstance(scp, str):
scopes.update(scp.split())
elif isinstance(scp, list):
scopes.update(str(item) for item in scp)
return required_scope in scopes
@lru_cache(maxsize=16)
def _get_jwk_client(jwks_url: str) -> PyJWKClient:
return PyJWKClient(jwks_url)
def _get_mcp_oauth_signing_key(token: str, settings: Settings):
return _get_jwk_client(settings.oauth_jwks_url()).get_signing_key_from_jwt(token)
def _mcp_unauthorized(settings: Settings, detail: str) -> HTTPException:
headers = {}
if _mcp_oauth_allowed(settings):
headers["WWW-Authenticate"] = f'Bearer resource_metadata="{_mcp_metadata_url(settings)}"'
return HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=detail, headers=headers)
def _mcp_metadata_url(settings: Settings) -> str:
resource_url = settings.mcp_resource_url.rstrip("/")
base_url = resource_url[: -len("/mcp")] if resource_url.endswith("/mcp") else resource_url
return f"{base_url}/.well-known/oauth-protected-resource"

View File

@@ -51,7 +51,7 @@
<thead><tr><th class="table__head">ID</th><th class="table__head">Статус</th><th class="table__head">Обработано</th><th class="table__head">Ошибки</th><th class="table__head">Старт</th></tr></thead> <thead><tr><th class="table__head">ID</th><th class="table__head">Статус</th><th class="table__head">Обработано</th><th class="table__head">Ошибки</th><th class="table__head">Старт</th></tr></thead>
<tbody> <tbody>
{% for run in runs %} {% for run in runs %}
<tr class="table__row" data-row-href="/admin/runs/{{ run.id }}" role="link" tabindex="0"><td class="table__cell">{{ run.id }}</td><td class="table__cell">{{ run.status_display }}</td><td class="table__cell">{{ run.parsed_count }}</td><td class="table__cell">{{ run.error_count }}</td><td class="table__cell">{{ run.started_display }}</td></tr> <tr class="table__row" onclick="window.location.href='/admin/runs/{{ run.id }}'" onkeydown="if (event.key === 'Enter' || event.key === ' ') { event.preventDefault(); window.location.href='/admin/runs/{{ run.id }}'; }" role="link" tabindex="0"><td class="table__cell">{{ run.id }}</td><td class="table__cell">{{ run.status_display }}</td><td class="table__cell">{{ run.parsed_count }}</td><td class="table__cell">{{ run.error_count }}</td><td class="table__cell">{{ run.started_display }}</td></tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
</table> </table>

View File

@@ -38,7 +38,7 @@
<thead><tr><th class="table__head">ID</th><th class="table__head">Статус</th><th class="table__head">Найдено</th><th class="table__head">Обработано</th><th class="table__head">Новые</th><th class="table__head">Ошибки</th><th class="table__head">Уволены</th><th class="table__head">Старт</th></tr></thead> <thead><tr><th class="table__head">ID</th><th class="table__head">Статус</th><th class="table__head">Найдено</th><th class="table__head">Обработано</th><th class="table__head">Новые</th><th class="table__head">Ошибки</th><th class="table__head">Уволены</th><th class="table__head">Старт</th></tr></thead>
<tbody> <tbody>
{% for run in runs %} {% for run in runs %}
<tr class="table__row" data-row-href="/admin/runs/{{ run.id }}" role="link" tabindex="0"><td class="table__cell">{{ run.id }}</td><td class="table__cell">{{ run.status_display }}</td><td class="table__cell">{{ run.found_count }}</td><td class="table__cell">{{ run.parsed_count }}</td><td class="table__cell">{{ run.new_count }}</td><td class="table__cell">{{ run.error_count }}</td><td class="table__cell">{{ run.dismissed_count }}</td><td class="table__cell">{{ run.started_display }}</td></tr> <tr class="table__row" onclick="window.location.href='/admin/runs/{{ run.id }}'" onkeydown="if (event.key === 'Enter' || event.key === ' ') { event.preventDefault(); window.location.href='/admin/runs/{{ run.id }}'; }" role="link" tabindex="0"><td class="table__cell">{{ run.id }}</td><td class="table__cell">{{ run.status_display }}</td><td class="table__cell">{{ run.found_count }}</td><td class="table__cell">{{ run.parsed_count }}</td><td class="table__cell">{{ run.new_count }}</td><td class="table__cell">{{ run.error_count }}</td><td class="table__cell">{{ run.dismissed_count }}</td><td class="table__cell">{{ run.started_display }}</td></tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
</table> </table>

View File

@@ -1,3 +1,3 @@
APP_VERSION = "0.4.3" APP_VERSION = "0.4.5"
FRONTEND_VERSION = "0.4.3" FRONTEND_VERSION = "0.4.5"
BACKEND_VERSION = "0.4.3" BACKEND_VERSION = "0.4.5"

View File

@@ -20,7 +20,7 @@ services:
environment: environment:
DATABASE_URL: postgresql+psycopg://${POSTGRES_USER:-miem}:${POSTGRES_PASSWORD:-miem_password}@postgres:5432/${POSTGRES_DB:-miem_workers} DATABASE_URL: postgresql+psycopg://${POSTGRES_USER:-miem}:${POSTGRES_PASSWORD:-miem_password}@postgres:5432/${POSTGRES_DB:-miem_workers}
ports: ports:
- "127.0.0.1:8000:8000" - "127.0.0.1:${API_PORT:-8000}:8000"
depends_on: depends_on:
postgres: postgres:
condition: service_healthy condition: service_healthy
@@ -42,33 +42,7 @@ services:
environment: environment:
DATABASE_URL: postgresql+psycopg://${POSTGRES_USER:-miem}:${POSTGRES_PASSWORD:-miem_password}@postgres:5432/${POSTGRES_DB:-miem_workers} DATABASE_URL: postgresql+psycopg://${POSTGRES_USER:-miem}:${POSTGRES_PASSWORD:-miem_password}@postgres:5432/${POSTGRES_DB:-miem_workers}
ports: ports:
- "127.0.0.1:8001:8000" - "127.0.0.1:${MCP_PORT:-8001}:8000"
depends_on:
postgres:
condition: service_healthy
keycloak:
image: quay.io/keycloak/keycloak:latest
container_name: keycloak
restart: unless-stopped
environment:
KC_DB: postgres
KC_DB_URL: jdbc:postgresql://postgres:5432/${KEYCLOAK_DB_NAME}
KC_DB_USERNAME: ${KEYCLOAK_DB_USER}
KC_DB_PASSWORD: ${KEYCLOAK_DB_PASSWORD}
KEYCLOAK_ADMIN: ${KEYCLOAK_ADMIN}
KEYCLOAK_ADMIN_PASSWORD: ${KEYCLOAK_ADMIN_PASSWORD}
KC_HTTP_ENABLED: true
KC_PROXY_HEADERS: xforwarded
KC_HOSTNAME: ${KEYCLOAK_HOSTNAME}
KC_HEALTH_ENABLED: true
KC_METRICS_ENABLED: true
command: start
ports:
- "127.0.0.1:8080:8080"
depends_on: depends_on:
postgres: postgres:
condition: service_healthy condition: service_healthy

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "miem-workers" name = "miem-workers"
version = "0.4.0" version = "0.4.5"
description = "MIEM employees parser, admin API, and MCP server" description = "MIEM employees parser, admin API, and MCP server"
requires-python = ">=3.11" requires-python = ">=3.11"
dependencies = [ dependencies = [
@@ -12,7 +12,6 @@ dependencies = [
"lxml>=5.2.0", "lxml>=5.2.0",
"psycopg[binary]>=3.2.0", "psycopg[binary]>=3.2.0",
"pydantic-settings>=2.4.0", "pydantic-settings>=2.4.0",
"PyJWT[crypto]>=2.9.0",
"python-multipart>=0.0.9", "python-multipart>=0.0.9",
"requests>=2.32.0", "requests>=2.32.0",
"sqlalchemy>=2.0.32", "sqlalchemy>=2.0.32",

View File

@@ -6,7 +6,6 @@ jinja2>=3.1.4
lxml>=5.2.0 lxml>=5.2.0
psycopg[binary]>=3.2.0 psycopg[binary]>=3.2.0
pydantic-settings>=2.4.0 pydantic-settings>=2.4.0
PyJWT[crypto]>=2.9.0
python-multipart>=0.0.9 python-multipart>=0.0.9
requests>=2.32.0 requests>=2.32.0
sqlalchemy>=2.0.32 sqlalchemy>=2.0.32

View File

@@ -45,9 +45,11 @@ def test_dashboard_limits_latest_runs_to_five():
def test_runs_template_links_to_run_detail(): def test_runs_template_links_to_run_detail():
template = Path("app/templates/runs.html").read_text(encoding="utf-8") template = Path("app/templates/runs.html").read_text(encoding="utf-8")
assert 'data-row-href="/admin/runs/{{ run.id }}"' in template assert 'onclick="window.location.href=\'/admin/runs/{{ run.id }}\'"' in template
assert "onkeydown=\"if (event.key === 'Enter' || event.key === ' ')" in template
assert 'role="link"' in template assert 'role="link"' in template
assert 'tabindex="0"' in template assert 'tabindex="0"' in template
assert 'data-row-href="/admin/runs/{{ run.id }}"' not in template
assert '<a class="admin__link" href="/admin/runs/{{ run.id }}">' not in template assert '<a class="admin__link" href="/admin/runs/{{ run.id }}">' not in template
@@ -75,9 +77,11 @@ def test_dashboard_metric_cards_link_to_admin_targets():
def test_dashboard_latest_run_rows_link_to_run_detail(): def test_dashboard_latest_run_rows_link_to_run_detail():
template = Path("app/templates/dashboard.html").read_text(encoding="utf-8") template = Path("app/templates/dashboard.html").read_text(encoding="utf-8")
assert 'data-row-href="/admin/runs/{{ run.id }}"' in template assert 'onclick="window.location.href=\'/admin/runs/{{ run.id }}\'"' in template
assert "onkeydown=\"if (event.key === 'Enter' || event.key === ' ')" in template
assert 'role="link"' in template assert 'role="link"' in template
assert 'tabindex="0"' in template assert 'tabindex="0"' in template
assert 'data-row-href="/admin/runs/{{ run.id }}"' not in template
assert '<a class="admin__link" href="/admin/runs/{{ run.id }}">' not in template assert '<a class="admin__link" href="/admin/runs/{{ run.id }}">' not in template

View File

@@ -1,15 +1,10 @@
import time
from datetime import datetime, timezone from datetime import datetime, timezone
from types import SimpleNamespace
import jwt
from fastapi.testclient import TestClient from fastapi.testclient import TestClient
from cryptography.hazmat.primitives.asymmetric import rsa
from sqlalchemy import create_engine from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import StaticPool from sqlalchemy.pool import StaticPool
import app.security as security
from app.config import Settings, get_settings from app.config import Settings, get_settings
from app.db import Base, get_db from app.db import Base, get_db
from app.main import app from app.main import app
@@ -23,10 +18,10 @@ def test_health_returns_versions():
response = client.get("/api/health") response = client.get("/api/health")
assert response.status_code == 200 assert response.status_code == 200
assert response.json()["backend_version"] == "0.4.3" assert response.json()["backend_version"] == "0.4.5"
def test_mcp_requires_token_and_lists_tools(): def test_mcp_lists_tools_without_auth_and_ignores_auth_header():
engine = create_engine( engine = create_engine(
"sqlite:///:memory:", "sqlite:///:memory:",
connect_args={"check_same_thread": False}, connect_args={"check_same_thread": False},
@@ -43,22 +38,20 @@ def test_mcp_requires_token_and_lists_tools():
session.close() session.close()
app.dependency_overrides[get_db] = override_db app.dependency_overrides[get_db] = override_db
app.dependency_overrides[get_settings] = lambda: Settings(
mcp_auth_mode="token", mcp_token="secret", session_secret="session-secret"
)
client = TestClient(app) client = TestClient(app)
unauthorized = client.post("/mcp", json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}) without_auth = client.post("/mcp", json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}})
authorized = client.post( with_auth = client.post(
"/mcp", "/mcp",
headers={"Authorization": "Bearer secret"}, headers={"Authorization": "Bearer anything"},
json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}, json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}},
) )
assert unauthorized.status_code == 401 assert without_auth.status_code == 200
assert authorized.status_code == 200 assert with_auth.status_code == 200
assert authorized.json()["result"]["tools"][0]["name"] == "search_employees" assert without_auth.json()["result"]["tools"][0]["name"] == "search_employees"
assert any(tool["name"] == "get_crawl_run_details" for tool in authorized.json()["result"]["tools"]) assert any(tool["name"] == "get_crawl_run_details" for tool in without_auth.json()["result"]["tools"])
assert with_auth.json()["result"]["tools"] == without_auth.json()["result"]["tools"]
app.dependency_overrides.clear() app.dependency_overrides.clear()
@@ -96,14 +89,10 @@ def test_mcp_search_employees_returns_matching_employee():
db.close() db.close()
app.dependency_overrides[get_db] = override_db app.dependency_overrides[get_db] = override_db
app.dependency_overrides[get_settings] = lambda: Settings(
mcp_auth_mode="token", mcp_token="secret", session_secret="session-secret"
)
client = TestClient(app) client = TestClient(app)
response = client.post( response = client.post(
"/mcp", "/mcp",
headers={"Authorization": "Bearer secret"},
json={ json={
"jsonrpc": "2.0", "jsonrpc": "2.0",
"id": 1, "id": 1,
@@ -164,14 +153,10 @@ def test_mcp_get_crawl_run_details_returns_changes():
db.close() db.close()
app.dependency_overrides[get_db] = override_db app.dependency_overrides[get_db] = override_db
app.dependency_overrides[get_settings] = lambda: Settings(
mcp_auth_mode="token", mcp_token="secret", session_secret="session-secret"
)
client = TestClient(app) client = TestClient(app)
response = client.post( response = client.post(
"/mcp", "/mcp",
headers={"Authorization": "Bearer secret"},
json={ json={
"jsonrpc": "2.0", "jsonrpc": "2.0",
"id": 1, "id": 1,
@@ -188,146 +173,12 @@ def test_mcp_get_crawl_run_details_returns_changes():
app.dependency_overrides.clear() app.dependency_overrides.clear()
def test_mcp_oauth_rejects_static_token(): def test_mcp_protected_resource_metadata_route_is_removed():
engine = create_engine(
"sqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
def override_db():
session = Session()
try:
yield session
finally:
session.close()
settings = Settings(
mcp_auth_mode="oauth",
mcp_token="secret",
session_secret="session-secret",
mcp_oauth_issuer="https://auth.example.com",
mcp_oauth_audience="miem-mcp",
mcp_oauth_jwks_url="https://auth.example.com/.well-known/jwks.json",
)
app.dependency_overrides[get_db] = override_db
app.dependency_overrides[get_settings] = lambda: settings
client = TestClient(app)
response = client.post(
"/mcp",
headers={"Authorization": "Bearer secret"},
json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}},
)
assert response.status_code == 401
assert response.headers["www-authenticate"] == (
'Bearer resource_metadata="http://localhost:8001/.well-known/oauth-protected-resource"'
)
app.dependency_overrides.clear()
def test_mcp_oauth_missing_auth_returns_metadata_challenge():
settings = Settings(
mcp_auth_mode="oauth",
mcp_resource_url="https://api.example.com/mcp",
mcp_oauth_issuer="https://auth.example.com",
mcp_oauth_audience="miem-mcp",
mcp_oauth_jwks_url="https://auth.example.com/.well-known/jwks.json",
)
app.dependency_overrides[get_settings] = lambda: settings
client = TestClient(app)
response = client.post("/mcp", json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}})
assert response.status_code == 401
assert response.headers["www-authenticate"] == (
'Bearer resource_metadata="https://api.example.com/.well-known/oauth-protected-resource"'
)
app.dependency_overrides.clear()
def test_mcp_accepts_valid_oauth_jwt(monkeypatch):
public_key, token = _oauth_key_and_token()
monkeypatch.setattr(security, "_get_mcp_oauth_signing_key", lambda _token, _settings: SimpleNamespace(key=public_key))
app.dependency_overrides[get_settings] = lambda: _oauth_settings()
client = TestClient(app)
response = client.post(
"/mcp",
headers={"Authorization": f"Bearer {token}"},
json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}},
)
assert response.status_code == 200
assert response.json()["result"]["tools"][0]["name"] == "search_employees"
app.dependency_overrides.clear()
def test_mcp_rejects_invalid_oauth_jwts(monkeypatch):
public_key, expired_token = _oauth_key_and_token(exp=int(time.time()) - 60)
_, wrong_issuer_token = _oauth_key_and_token(issuer="https://other.example.com")
_, wrong_audience_token = _oauth_key_and_token(audience="other-audience")
_, bad_signature_token = _oauth_key_and_token(public_key=public_key)
monkeypatch.setattr(security, "_get_mcp_oauth_signing_key", lambda _token, _settings: SimpleNamespace(key=public_key))
app.dependency_overrides[get_settings] = lambda: _oauth_settings()
client = TestClient(app)
for token in [expired_token, wrong_issuer_token, wrong_audience_token, bad_signature_token]:
response = client.post(
"/mcp",
headers={"Authorization": f"Bearer {token}"},
json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}},
)
assert response.status_code == 401
app.dependency_overrides.clear()
def test_mcp_rejects_oauth_jwt_without_required_scope(monkeypatch):
public_key, token = _oauth_key_and_token(scope="profile")
monkeypatch.setattr(security, "_get_mcp_oauth_signing_key", lambda _token, _settings: SimpleNamespace(key=public_key))
app.dependency_overrides[get_settings] = lambda: _oauth_settings()
client = TestClient(app)
response = client.post(
"/mcp",
headers={"Authorization": f"Bearer {token}"},
json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}},
)
assert response.status_code == 403
app.dependency_overrides.clear()
def test_mcp_protected_resource_metadata_uses_settings():
settings = Settings(
mcp_resource_url="https://api.example.com/mcp",
mcp_oauth_issuer="https://auth.example.com/",
mcp_oauth_required_scope="mcp:tools",
)
app.dependency_overrides[get_settings] = lambda: settings
client = TestClient(app) client = TestClient(app)
response = client.get("/.well-known/oauth-protected-resource") response = client.get("/.well-known/oauth-protected-resource")
assert response.status_code == 200 assert response.status_code == 404
assert response.json() == {
"resource": "https://api.example.com/mcp",
"authorization_servers": ["https://auth.example.com"],
"bearer_methods_supported": ["header"],
"scopes_supported": ["mcp:tools"],
"resource_documentation": "https://api.example.com/mcp",
}
app.dependency_overrides.clear()
def test_api_employees_and_stats_require_admin_session(): def test_api_employees_and_stats_require_admin_session():
@@ -397,35 +248,3 @@ def test_api_employees_and_stats_require_admin_session():
assert run_details.json()["changes"]["new"][0]["full_name"] == "Alpha Person" assert run_details.json()["changes"]["new"][0]["full_name"] == "Alpha Person"
app.dependency_overrides.clear() app.dependency_overrides.clear()
def _oauth_settings() -> Settings:
return Settings(
mcp_auth_mode="oauth",
mcp_resource_url="https://api.example.com/mcp",
mcp_oauth_issuer="https://auth.example.com",
mcp_oauth_audience="miem-mcp",
mcp_oauth_jwks_url="https://auth.example.com/.well-known/jwks.json",
session_secret="session-secret",
)
def _oauth_key_and_token(
*,
issuer: str = "https://auth.example.com",
audience: str = "miem-mcp",
scope: str = "mcp:tools",
exp: int | None = None,
public_key=None,
):
private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048)
claims = {
"iss": issuer,
"aud": audience,
"scope": scope,
"sub": "mcp-client",
"iat": int(time.time()),
"exp": exp or int(time.time()) + 300,
}
token = jwt.encode(claims, private_key, algorithm="RS256", headers={"kid": "test-key"})
return public_key or private_key.public_key(), token

View File

@@ -1,6 +1,3 @@
import pytest
from pydantic import ValidationError
from app.config import Settings from app.config import Settings
@@ -14,8 +11,3 @@ def test_numeric_crawl_limit_is_parsed():
settings = Settings(crawl_limit="25") settings = Settings(crawl_limit="25")
assert settings.crawl_limit == 25 assert settings.crawl_limit == 25
def test_mcp_auth_mode_rejects_oauth_or_token_fallback():
with pytest.raises(ValidationError):
Settings(mcp_auth_mode="oauth_or_token")