diff --git a/.env.example b/.env.example index 66a29d7..500f59a 100644 --- a/.env.example +++ b/.env.example @@ -15,6 +15,12 @@ ADMIN_USERNAME=admin ADMIN_PASSWORD=change-me SESSION_SECRET=change-me-session-secret MCP_TOKEN=change-me-mcp-token +MCP_AUTH_MODE=token +MCP_RESOURCE_URL=http://localhost:8001/mcp +MCP_OAUTH_ISSUER= +MCP_OAUTH_AUDIENCE= +MCP_OAUTH_JWKS_URL= +MCP_OAUTH_REQUIRED_SCOPE=mcp:tools API_PORT=8000 MCP_PORT=8001 diff --git a/README.md b/README.md index c672721..b24e289 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ - `api`: FastAPI, REST API, HTML-админка, healthcheck. - `worker`: weekly scheduler, который запускает парсинг по `CRAWL_CRON`. -- `mcp`: HTTP MCP endpoint с bearer token. +- `mcp`: HTTP MCP endpoint со статическим bearer token или OAuth/OIDC access token. - `postgres`: основная БД. Парсер использует фиксированный источник сотрудников, по умолчанию `https://miem.hse.ru/persons`. Для каждой карточки сохраняются ФИО, должности, год начала работы, контакты, идентификаторы, вкладки профиля, секции, публикации, курсы, ВКР, JSON-снапшот и сжатый HTML-снапшот. Ссылки обходятся только из меню профиля самого сотрудника (`person-menu`), например `#sci`, `#teaching`, `#main`. @@ -27,7 +27,13 @@ cp .env.example .env - `CRAWL_LIMIT`: опциональный лимит профилей для тестового запуска. - `ADMIN_USERNAME`, `ADMIN_PASSWORD`: логин и пароль админки. - `SESSION_SECRET`: секрет подписи cookie. -- `MCP_TOKEN`: bearer token для `/mcp`. +- `MCP_TOKEN`: статический bearer token для `/mcp`. +- `MCP_AUTH_MODE`: режим авторизации MCP: `token`, `oauth` или `oauth_or_token`. +- `MCP_RESOURCE_URL`: публичный URL MCP endpoint, например `https://example.com/mcp`. +- `MCP_OAUTH_ISSUER`: issuer внешнего OIDC-провайдера. +- `MCP_OAUTH_AUDIENCE`: ожидаемый `aud` в OAuth access token. +- `MCP_OAUTH_JWKS_URL`: JWKS endpoint; если не задан, используется `/.well-known/jwks.json`. +- `MCP_OAUTH_REQUIRED_SCOPE`: scope для доступа к MCP tools, по умолчанию `mcp:tools`. - `PARSER_USE_PLAYWRIGHT`: включение Playwright-рендера динамических вкладок. ## Локальный запуск @@ -82,7 +88,9 @@ curl -X POST http://localhost:8000/api/crawl-runs --cookie "miem_admin_session=. ## MCP -Endpoint: `POST /mcp`, авторизация `Authorization: Bearer `. +Endpoint: `POST /mcp`, авторизация `Authorization: Bearer `. + +По умолчанию используется статический токен из `MCP_TOKEN`: Поддерживаемые tools: @@ -101,6 +109,19 @@ curl http://localhost:8001/mcp \ -d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}' ``` +Для OAuth/OIDC настройте внешний authorization server и включите режим `oauth` или `oauth_or_token`: + +```env +MCP_AUTH_MODE=oauth_or_token +MCP_RESOURCE_URL=https://example.com/mcp +MCP_OAUTH_ISSUER=https://auth.example.com +MCP_OAUTH_AUDIENCE=miem-mcp +MCP_OAUTH_JWKS_URL=https://auth.example.com/.well-known/jwks.json +MCP_OAUTH_REQUIRED_SCOPE=mcp:tools +``` + +MCP server работает как OAuth protected resource: он не выдает токены, а проверяет JWT access token по JWKS, `issuer`, `audience`, сроку действия и scope. Metadata для MCP-клиентов доступна по `GET /.well-known/oauth-protected-resource`. + ## Обслуживание ```bash @@ -110,4 +131,4 @@ docker compose exec postgres pg_dump -U miem miem_workers > backup.sql docker compose down ``` -Версия сервиса: `0.2.8`. Админка всегда показывает версии backend и frontend в footer. +Версия сервиса: `0.3.0`. Админка всегда показывает версии backend и frontend в footer. diff --git a/app/config.py b/app/config.py index 969bc15..6be9d8e 100644 --- a/app/config.py +++ b/app/config.py @@ -1,4 +1,6 @@ from functools import lru_cache +from typing import Literal + from pydantic import Field, field_validator from pydantic_settings import BaseSettings, SettingsConfigDict @@ -18,6 +20,12 @@ class Settings(BaseSettings): admin_password: str = "admin" session_secret: str = Field(default="dev-session-secret", min_length=8) mcp_token: str = "dev-mcp-token" + mcp_auth_mode: Literal["token", "oauth", "oauth_or_token"] = "token" + mcp_resource_url: str = "http://localhost:8001/mcp" + mcp_oauth_issuer: str = "" + mcp_oauth_audience: str = "" + mcp_oauth_jwks_url: str = "" + mcp_oauth_required_scope: str = "mcp:tools" @field_validator("crawl_limit", mode="before") @classmethod @@ -26,6 +34,14 @@ class Settings(BaseSettings): return None return value + def oauth_jwks_url(self) -> str: + if self.mcp_oauth_jwks_url: + return self.mcp_oauth_jwks_url + issuer = self.mcp_oauth_issuer.rstrip("/") + if not issuer: + return "" + return f"{issuer}/.well-known/jwks.json" + @lru_cache def get_settings() -> Settings: diff --git a/app/main.py b/app/main.py index 7f34d48..915d82b 100644 --- a/app/main.py +++ b/app/main.py @@ -4,6 +4,7 @@ from fastapi.staticfiles import StaticFiles from app.admin import router as admin_router from app.api import router as api_router from app.db import init_db +from app.mcp import metadata_router as mcp_metadata_router from app.mcp import router as mcp_router from app.version import BACKEND_VERSION @@ -12,6 +13,7 @@ app.mount("/static", StaticFiles(directory="app/static"), name="static") app.include_router(api_router) app.include_router(admin_router) app.include_router(mcp_router) +app.include_router(mcp_metadata_router) @app.on_event("startup") diff --git a/app/mcp.py b/app/mcp.py index 22ff32a..ed5a311 100644 --- a/app/mcp.py +++ b/app/mcp.py @@ -7,9 +7,10 @@ from sqlalchemy.orm import Session from app.config import Settings, get_settings from app.db import get_db from app.models import CrawlRun, Employee -from app.security import require_mcp_token +from app.security import mcp_protected_resource_metadata, require_mcp_auth router = APIRouter(prefix="/mcp") +metadata_router = APIRouter() TOOLS = [ @@ -55,7 +56,7 @@ async def mcp_http( db: Session = Depends(get_db), settings: Settings = Depends(get_settings), ) -> dict: - require_mcp_token(request, settings) + require_mcp_auth(request, settings) payload = await request.json() method = payload.get("method") request_id = payload.get("id") @@ -168,3 +169,8 @@ def _run_payload(run: CrawlRun) -> dict: def _tool_response(data: object) -> dict: return {"content": [{"type": "text", "text": json.dumps(data, ensure_ascii=False, default=str)}]} + + +@metadata_router.get("/.well-known/oauth-protected-resource") +def oauth_protected_resource(settings: Settings = Depends(get_settings)) -> dict: + return mcp_protected_resource_metadata(settings) diff --git a/app/security.py b/app/security.py index 474f2f8..09032b9 100644 --- a/app/security.py +++ b/app/security.py @@ -3,7 +3,10 @@ import hashlib import hmac import json import time +from functools import lru_cache +import jwt +from jwt import PyJWKClient, PyJWTError from fastapi import HTTPException, Request, status from app.config import Settings @@ -46,7 +49,91 @@ def require_admin(request: Request, settings: Settings) -> str: return username -def require_mcp_token(request: Request, settings: Settings) -> None: +def require_mcp_auth(request: Request, settings: Settings) -> None: auth = request.headers.get("authorization", "") - if not auth.startswith("Bearer ") or not hmac.compare_digest(auth.removeprefix("Bearer ").strip(), settings.mcp_token): - raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid MCP token") + if not auth.startswith("Bearer "): + raise _mcp_unauthorized(settings, "Missing bearer token") + + token = auth.removeprefix("Bearer ").strip() + if _mcp_static_token_allowed(settings) and hmac.compare_digest(token, settings.mcp_token): + return + if _mcp_oauth_allowed(settings): + _validate_mcp_oauth_token(token, settings) + return + raise _mcp_unauthorized(settings, "Invalid MCP token") + + +def require_mcp_token(request: Request, settings: Settings) -> None: + require_mcp_auth(request, settings) + + +def mcp_protected_resource_metadata(settings: Settings) -> dict: + authorization_servers = [settings.mcp_oauth_issuer.rstrip("/")] if settings.mcp_oauth_issuer else [] + return { + "resource": settings.mcp_resource_url, + "authorization_servers": authorization_servers, + "bearer_methods_supported": ["header"], + "scopes_supported": [settings.mcp_oauth_required_scope], + "resource_documentation": settings.mcp_resource_url, + } + + +def _mcp_static_token_allowed(settings: Settings) -> bool: + return settings.mcp_auth_mode in {"token", "oauth_or_token"} + + +def _mcp_oauth_allowed(settings: Settings) -> bool: + return settings.mcp_auth_mode in {"oauth", "oauth_or_token"} + + +def _validate_mcp_oauth_token(token: str, settings: Settings) -> None: + if not settings.mcp_oauth_issuer or not settings.mcp_oauth_audience or not settings.oauth_jwks_url(): + raise _mcp_unauthorized(settings, "MCP OAuth is not configured") + try: + signing_key = _get_mcp_oauth_signing_key(token, settings).key + claims = jwt.decode( + token, + signing_key, + algorithms=["RS256", "RS384", "RS512", "ES256", "ES384", "ES512"], + audience=settings.mcp_oauth_audience, + issuer=settings.mcp_oauth_issuer.rstrip("/"), + ) + except PyJWTError as exc: + raise _mcp_unauthorized(settings, "Invalid OAuth access token") from exc + if not _claims_have_scope(claims, settings.mcp_oauth_required_scope): + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Missing required MCP OAuth scope") + + +def _claims_have_scope(claims: dict, required_scope: str) -> bool: + scopes: set[str] = set() + scope = claims.get("scope") + if isinstance(scope, str): + scopes.update(scope.split()) + scp = claims.get("scp") + if isinstance(scp, str): + scopes.update(scp.split()) + elif isinstance(scp, list): + scopes.update(str(item) for item in scp) + return required_scope in scopes + + +@lru_cache(maxsize=16) +def _get_jwk_client(jwks_url: str) -> PyJWKClient: + return PyJWKClient(jwks_url) + + +def _get_mcp_oauth_signing_key(token: str, settings: Settings): + return _get_jwk_client(settings.oauth_jwks_url()).get_signing_key_from_jwt(token) + + +def _mcp_unauthorized(settings: Settings, detail: str) -> HTTPException: + headers = {} + if _mcp_oauth_allowed(settings): + headers["WWW-Authenticate"] = f'Bearer resource_metadata="{_mcp_metadata_url(settings)}"' + return HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=detail, headers=headers) + + +def _mcp_metadata_url(settings: Settings) -> str: + resource_url = settings.mcp_resource_url.rstrip("/") + base_url = resource_url[: -len("/mcp")] if resource_url.endswith("/mcp") else resource_url + return f"{base_url}/.well-known/oauth-protected-resource" diff --git a/app/version.py b/app/version.py index 4cbd624..a9d2612 100644 --- a/app/version.py +++ b/app/version.py @@ -1,3 +1,3 @@ -APP_VERSION = "0.2.8" -FRONTEND_VERSION = "0.2.8" -BACKEND_VERSION = "0.2.8" +APP_VERSION = "0.3.0" +FRONTEND_VERSION = "0.3.0" +BACKEND_VERSION = "0.3.0" diff --git a/pyproject.toml b/pyproject.toml index d4271a8..6c46484 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "miem-workers" -version = "0.2.8" +version = "0.3.0" description = "MIEM employees parser, admin API, and MCP server" requires-python = ">=3.11" dependencies = [ @@ -12,6 +12,7 @@ dependencies = [ "lxml>=5.2.0", "psycopg[binary]>=3.2.0", "pydantic-settings>=2.4.0", + "PyJWT[crypto]>=2.9.0", "python-multipart>=0.0.9", "requests>=2.32.0", "sqlalchemy>=2.0.32", diff --git a/requirements.txt b/requirements.txt index e9226e7..072eef4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ jinja2>=3.1.4 lxml>=5.2.0 psycopg[binary]>=3.2.0 pydantic-settings>=2.4.0 +PyJWT[crypto]>=2.9.0 python-multipart>=0.0.9 requests>=2.32.0 sqlalchemy>=2.0.32 diff --git a/tests/test_api_mcp.py b/tests/test_api_mcp.py index 41a69ad..883dee0 100644 --- a/tests/test_api_mcp.py +++ b/tests/test_api_mcp.py @@ -1,10 +1,15 @@ +import time from datetime import datetime, timezone +from types import SimpleNamespace +import jwt from fastapi.testclient import TestClient +from cryptography.hazmat.primitives.asymmetric import rsa from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from sqlalchemy.pool import StaticPool +import app.security as security from app.config import Settings, get_settings from app.db import Base, get_db from app.main import app @@ -18,7 +23,7 @@ def test_health_returns_versions(): response = client.get("/api/health") assert response.status_code == 200 - assert response.json()["backend_version"] == "0.2.8" + assert response.json()["backend_version"] == "0.3.0" def test_mcp_requires_token_and_lists_tools(): @@ -108,6 +113,146 @@ def test_mcp_search_employees_returns_matching_employee(): app.dependency_overrides.clear() +def test_mcp_oauth_or_token_keeps_static_token_fallback(): + engine = create_engine( + "sqlite:///:memory:", + connect_args={"check_same_thread": False}, + poolclass=StaticPool, + ) + Base.metadata.create_all(engine) + Session = sessionmaker(bind=engine) + + def override_db(): + session = Session() + try: + yield session + finally: + session.close() + + settings = Settings( + mcp_auth_mode="oauth_or_token", + mcp_token="secret", + session_secret="session-secret", + mcp_oauth_issuer="https://auth.example.com", + mcp_oauth_audience="miem-mcp", + mcp_oauth_jwks_url="https://auth.example.com/.well-known/jwks.json", + ) + app.dependency_overrides[get_db] = override_db + app.dependency_overrides[get_settings] = lambda: settings + client = TestClient(app) + + response = client.post( + "/mcp", + headers={"Authorization": "Bearer secret"}, + json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}, + ) + + assert response.status_code == 200 + assert response.json()["result"]["tools"][0]["name"] == "search_employees" + + app.dependency_overrides.clear() + + +def test_mcp_oauth_missing_auth_returns_metadata_challenge(): + settings = Settings( + mcp_auth_mode="oauth", + mcp_resource_url="https://api.example.com/mcp", + mcp_oauth_issuer="https://auth.example.com", + mcp_oauth_audience="miem-mcp", + mcp_oauth_jwks_url="https://auth.example.com/.well-known/jwks.json", + ) + app.dependency_overrides[get_settings] = lambda: settings + client = TestClient(app) + + response = client.post("/mcp", json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}) + + assert response.status_code == 401 + assert response.headers["www-authenticate"] == ( + 'Bearer resource_metadata="https://api.example.com/.well-known/oauth-protected-resource"' + ) + + app.dependency_overrides.clear() + + +def test_mcp_accepts_valid_oauth_jwt(monkeypatch): + public_key, token = _oauth_key_and_token() + monkeypatch.setattr(security, "_get_mcp_oauth_signing_key", lambda _token, _settings: SimpleNamespace(key=public_key)) + app.dependency_overrides[get_settings] = lambda: _oauth_settings() + client = TestClient(app) + + response = client.post( + "/mcp", + headers={"Authorization": f"Bearer {token}"}, + json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}, + ) + + assert response.status_code == 200 + assert response.json()["result"]["tools"][0]["name"] == "search_employees" + + app.dependency_overrides.clear() + + +def test_mcp_rejects_invalid_oauth_jwts(monkeypatch): + public_key, expired_token = _oauth_key_and_token(exp=int(time.time()) - 60) + _, wrong_issuer_token = _oauth_key_and_token(issuer="https://other.example.com") + _, wrong_audience_token = _oauth_key_and_token(audience="other-audience") + _, bad_signature_token = _oauth_key_and_token(public_key=public_key) + monkeypatch.setattr(security, "_get_mcp_oauth_signing_key", lambda _token, _settings: SimpleNamespace(key=public_key)) + app.dependency_overrides[get_settings] = lambda: _oauth_settings() + client = TestClient(app) + + for token in [expired_token, wrong_issuer_token, wrong_audience_token, bad_signature_token]: + response = client.post( + "/mcp", + headers={"Authorization": f"Bearer {token}"}, + json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}, + ) + + assert response.status_code == 401 + + app.dependency_overrides.clear() + + +def test_mcp_rejects_oauth_jwt_without_required_scope(monkeypatch): + public_key, token = _oauth_key_and_token(scope="profile") + monkeypatch.setattr(security, "_get_mcp_oauth_signing_key", lambda _token, _settings: SimpleNamespace(key=public_key)) + app.dependency_overrides[get_settings] = lambda: _oauth_settings() + client = TestClient(app) + + response = client.post( + "/mcp", + headers={"Authorization": f"Bearer {token}"}, + json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}, + ) + + assert response.status_code == 403 + + app.dependency_overrides.clear() + + +def test_mcp_protected_resource_metadata_uses_settings(): + settings = Settings( + mcp_resource_url="https://api.example.com/mcp", + mcp_oauth_issuer="https://auth.example.com/", + mcp_oauth_required_scope="mcp:tools", + ) + app.dependency_overrides[get_settings] = lambda: settings + client = TestClient(app) + + response = client.get("/.well-known/oauth-protected-resource") + + assert response.status_code == 200 + assert response.json() == { + "resource": "https://api.example.com/mcp", + "authorization_servers": ["https://auth.example.com"], + "bearer_methods_supported": ["header"], + "scopes_supported": ["mcp:tools"], + "resource_documentation": "https://api.example.com/mcp", + } + + app.dependency_overrides.clear() + + def test_api_employees_and_stats_require_admin_session(): engine = create_engine( "sqlite:///:memory:", @@ -157,3 +302,35 @@ def test_api_employees_and_stats_require_admin_session(): assert stats.json()["new_in_last_run"] == 1 app.dependency_overrides.clear() + + +def _oauth_settings() -> Settings: + return Settings( + mcp_auth_mode="oauth", + mcp_resource_url="https://api.example.com/mcp", + mcp_oauth_issuer="https://auth.example.com", + mcp_oauth_audience="miem-mcp", + mcp_oauth_jwks_url="https://auth.example.com/.well-known/jwks.json", + session_secret="session-secret", + ) + + +def _oauth_key_and_token( + *, + issuer: str = "https://auth.example.com", + audience: str = "miem-mcp", + scope: str = "mcp:tools", + exp: int | None = None, + public_key=None, +): + private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048) + claims = { + "iss": issuer, + "aud": audience, + "scope": scope, + "sub": "mcp-client", + "iat": int(time.time()), + "exp": exp or int(time.time()) + 300, + } + token = jwt.encode(claims, private_key, algorithm="RS256", headers={"kid": "test-key"}) + return public_key or private_key.public_key(), token