Merge pull request 'feat: adds OAuth/OIDC authentication for MCP' (#11) from feature/mcp-oauth-oidc into main

Reviewed-on: #11
This commit was merged in pull request #11.
This commit is contained in:
2026-04-29 11:35:00 +00:00
10 changed files with 331 additions and 14 deletions

View File

@@ -15,6 +15,12 @@ ADMIN_USERNAME=admin
ADMIN_PASSWORD=change-me ADMIN_PASSWORD=change-me
SESSION_SECRET=change-me-session-secret SESSION_SECRET=change-me-session-secret
MCP_TOKEN=change-me-mcp-token MCP_TOKEN=change-me-mcp-token
MCP_AUTH_MODE=token
MCP_RESOURCE_URL=http://localhost:8001/mcp
MCP_OAUTH_ISSUER=
MCP_OAUTH_AUDIENCE=
MCP_OAUTH_JWKS_URL=
MCP_OAUTH_REQUIRED_SCOPE=mcp:tools
API_PORT=8000 API_PORT=8000
MCP_PORT=8001 MCP_PORT=8001

View File

@@ -6,7 +6,7 @@
- `api`: FastAPI, REST API, HTML-админка, healthcheck. - `api`: FastAPI, REST API, HTML-админка, healthcheck.
- `worker`: weekly scheduler, который запускает парсинг по `CRAWL_CRON`. - `worker`: weekly scheduler, который запускает парсинг по `CRAWL_CRON`.
- `mcp`: HTTP MCP endpoint с bearer token. - `mcp`: HTTP MCP endpoint со статическим bearer token или OAuth/OIDC access token.
- `postgres`: основная БД. - `postgres`: основная БД.
Парсер использует фиксированный источник сотрудников, по умолчанию `https://miem.hse.ru/persons`. Для каждой карточки сохраняются ФИО, должности, год начала работы, контакты, идентификаторы, вкладки профиля, секции, публикации, курсы, ВКР, JSON-снапшот и сжатый HTML-снапшот. Ссылки обходятся только из меню профиля самого сотрудника (`person-menu`), например `#sci`, `#teaching`, `#main`. Парсер использует фиксированный источник сотрудников, по умолчанию `https://miem.hse.ru/persons`. Для каждой карточки сохраняются ФИО, должности, год начала работы, контакты, идентификаторы, вкладки профиля, секции, публикации, курсы, ВКР, JSON-снапшот и сжатый HTML-снапшот. Ссылки обходятся только из меню профиля самого сотрудника (`person-menu`), например `#sci`, `#teaching`, `#main`.
@@ -27,7 +27,13 @@ cp .env.example .env
- `CRAWL_LIMIT`: опциональный лимит профилей для тестового запуска. - `CRAWL_LIMIT`: опциональный лимит профилей для тестового запуска.
- `ADMIN_USERNAME`, `ADMIN_PASSWORD`: логин и пароль админки. - `ADMIN_USERNAME`, `ADMIN_PASSWORD`: логин и пароль админки.
- `SESSION_SECRET`: секрет подписи cookie. - `SESSION_SECRET`: секрет подписи cookie.
- `MCP_TOKEN`: bearer token для `/mcp`. - `MCP_TOKEN`: статический bearer token для `/mcp`.
- `MCP_AUTH_MODE`: режим авторизации MCP: `token`, `oauth` или `oauth_or_token`.
- `MCP_RESOURCE_URL`: публичный URL MCP endpoint, например `https://example.com/mcp`.
- `MCP_OAUTH_ISSUER`: issuer внешнего OIDC-провайдера.
- `MCP_OAUTH_AUDIENCE`: ожидаемый `aud` в OAuth access token.
- `MCP_OAUTH_JWKS_URL`: JWKS endpoint; если не задан, используется `<issuer>/.well-known/jwks.json`.
- `MCP_OAUTH_REQUIRED_SCOPE`: scope для доступа к MCP tools, по умолчанию `mcp:tools`.
- `PARSER_USE_PLAYWRIGHT`: включение Playwright-рендера динамических вкладок. - `PARSER_USE_PLAYWRIGHT`: включение Playwright-рендера динамических вкладок.
## Локальный запуск ## Локальный запуск
@@ -82,7 +88,9 @@ curl -X POST http://localhost:8000/api/crawl-runs --cookie "miem_admin_session=.
## MCP ## MCP
Endpoint: `POST /mcp`, авторизация `Authorization: Bearer <MCP_TOKEN>`. Endpoint: `POST /mcp`, авторизация `Authorization: Bearer <token>`.
По умолчанию используется статический токен из `MCP_TOKEN`:
Поддерживаемые tools: Поддерживаемые tools:
@@ -101,6 +109,19 @@ curl http://localhost:8001/mcp \
-d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}' -d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}'
``` ```
Для OAuth/OIDC настройте внешний authorization server и включите режим `oauth` или `oauth_or_token`:
```env
MCP_AUTH_MODE=oauth_or_token
MCP_RESOURCE_URL=https://example.com/mcp
MCP_OAUTH_ISSUER=https://auth.example.com
MCP_OAUTH_AUDIENCE=miem-mcp
MCP_OAUTH_JWKS_URL=https://auth.example.com/.well-known/jwks.json
MCP_OAUTH_REQUIRED_SCOPE=mcp:tools
```
MCP server работает как OAuth protected resource: он не выдает токены, а проверяет JWT access token по JWKS, `issuer`, `audience`, сроку действия и scope. Metadata для MCP-клиентов доступна по `GET /.well-known/oauth-protected-resource`.
## Обслуживание ## Обслуживание
```bash ```bash
@@ -110,4 +131,4 @@ docker compose exec postgres pg_dump -U miem miem_workers > backup.sql
docker compose down docker compose down
``` ```
Версия сервиса: `0.2.8`. Админка всегда показывает версии backend и frontend в footer. Версия сервиса: `0.3.0`. Админка всегда показывает версии backend и frontend в footer.

View File

@@ -1,4 +1,6 @@
from functools import lru_cache from functools import lru_cache
from typing import Literal
from pydantic import Field, field_validator from pydantic import Field, field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -18,6 +20,12 @@ class Settings(BaseSettings):
admin_password: str = "admin" admin_password: str = "admin"
session_secret: str = Field(default="dev-session-secret", min_length=8) session_secret: str = Field(default="dev-session-secret", min_length=8)
mcp_token: str = "dev-mcp-token" mcp_token: str = "dev-mcp-token"
mcp_auth_mode: Literal["token", "oauth", "oauth_or_token"] = "token"
mcp_resource_url: str = "http://localhost:8001/mcp"
mcp_oauth_issuer: str = ""
mcp_oauth_audience: str = ""
mcp_oauth_jwks_url: str = ""
mcp_oauth_required_scope: str = "mcp:tools"
@field_validator("crawl_limit", mode="before") @field_validator("crawl_limit", mode="before")
@classmethod @classmethod
@@ -26,6 +34,14 @@ class Settings(BaseSettings):
return None return None
return value return value
def oauth_jwks_url(self) -> str:
if self.mcp_oauth_jwks_url:
return self.mcp_oauth_jwks_url
issuer = self.mcp_oauth_issuer.rstrip("/")
if not issuer:
return ""
return f"{issuer}/.well-known/jwks.json"
@lru_cache @lru_cache
def get_settings() -> Settings: def get_settings() -> Settings:

View File

@@ -4,6 +4,7 @@ from fastapi.staticfiles import StaticFiles
from app.admin import router as admin_router from app.admin import router as admin_router
from app.api import router as api_router from app.api import router as api_router
from app.db import init_db from app.db import init_db
from app.mcp import metadata_router as mcp_metadata_router
from app.mcp import router as mcp_router from app.mcp import router as mcp_router
from app.version import BACKEND_VERSION from app.version import BACKEND_VERSION
@@ -12,6 +13,7 @@ app.mount("/static", StaticFiles(directory="app/static"), name="static")
app.include_router(api_router) app.include_router(api_router)
app.include_router(admin_router) app.include_router(admin_router)
app.include_router(mcp_router) app.include_router(mcp_router)
app.include_router(mcp_metadata_router)
@app.on_event("startup") @app.on_event("startup")

View File

@@ -7,9 +7,10 @@ from sqlalchemy.orm import Session
from app.config import Settings, get_settings from app.config import Settings, get_settings
from app.db import get_db from app.db import get_db
from app.models import CrawlRun, Employee from app.models import CrawlRun, Employee
from app.security import require_mcp_token from app.security import mcp_protected_resource_metadata, require_mcp_auth
router = APIRouter(prefix="/mcp") router = APIRouter(prefix="/mcp")
metadata_router = APIRouter()
TOOLS = [ TOOLS = [
@@ -55,7 +56,7 @@ async def mcp_http(
db: Session = Depends(get_db), db: Session = Depends(get_db),
settings: Settings = Depends(get_settings), settings: Settings = Depends(get_settings),
) -> dict: ) -> dict:
require_mcp_token(request, settings) require_mcp_auth(request, settings)
payload = await request.json() payload = await request.json()
method = payload.get("method") method = payload.get("method")
request_id = payload.get("id") request_id = payload.get("id")
@@ -168,3 +169,8 @@ def _run_payload(run: CrawlRun) -> dict:
def _tool_response(data: object) -> dict: def _tool_response(data: object) -> dict:
return {"content": [{"type": "text", "text": json.dumps(data, ensure_ascii=False, default=str)}]} return {"content": [{"type": "text", "text": json.dumps(data, ensure_ascii=False, default=str)}]}
@metadata_router.get("/.well-known/oauth-protected-resource")
def oauth_protected_resource(settings: Settings = Depends(get_settings)) -> dict:
return mcp_protected_resource_metadata(settings)

View File

@@ -3,7 +3,10 @@ import hashlib
import hmac import hmac
import json import json
import time import time
from functools import lru_cache
import jwt
from jwt import PyJWKClient, PyJWTError
from fastapi import HTTPException, Request, status from fastapi import HTTPException, Request, status
from app.config import Settings from app.config import Settings
@@ -46,7 +49,91 @@ def require_admin(request: Request, settings: Settings) -> str:
return username return username
def require_mcp_token(request: Request, settings: Settings) -> None: def require_mcp_auth(request: Request, settings: Settings) -> None:
auth = request.headers.get("authorization", "") auth = request.headers.get("authorization", "")
if not auth.startswith("Bearer ") or not hmac.compare_digest(auth.removeprefix("Bearer ").strip(), settings.mcp_token): if not auth.startswith("Bearer "):
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid MCP token") raise _mcp_unauthorized(settings, "Missing bearer token")
token = auth.removeprefix("Bearer ").strip()
if _mcp_static_token_allowed(settings) and hmac.compare_digest(token, settings.mcp_token):
return
if _mcp_oauth_allowed(settings):
_validate_mcp_oauth_token(token, settings)
return
raise _mcp_unauthorized(settings, "Invalid MCP token")
def require_mcp_token(request: Request, settings: Settings) -> None:
require_mcp_auth(request, settings)
def mcp_protected_resource_metadata(settings: Settings) -> dict:
authorization_servers = [settings.mcp_oauth_issuer.rstrip("/")] if settings.mcp_oauth_issuer else []
return {
"resource": settings.mcp_resource_url,
"authorization_servers": authorization_servers,
"bearer_methods_supported": ["header"],
"scopes_supported": [settings.mcp_oauth_required_scope],
"resource_documentation": settings.mcp_resource_url,
}
def _mcp_static_token_allowed(settings: Settings) -> bool:
return settings.mcp_auth_mode in {"token", "oauth_or_token"}
def _mcp_oauth_allowed(settings: Settings) -> bool:
return settings.mcp_auth_mode in {"oauth", "oauth_or_token"}
def _validate_mcp_oauth_token(token: str, settings: Settings) -> None:
if not settings.mcp_oauth_issuer or not settings.mcp_oauth_audience or not settings.oauth_jwks_url():
raise _mcp_unauthorized(settings, "MCP OAuth is not configured")
try:
signing_key = _get_mcp_oauth_signing_key(token, settings).key
claims = jwt.decode(
token,
signing_key,
algorithms=["RS256", "RS384", "RS512", "ES256", "ES384", "ES512"],
audience=settings.mcp_oauth_audience,
issuer=settings.mcp_oauth_issuer.rstrip("/"),
)
except PyJWTError as exc:
raise _mcp_unauthorized(settings, "Invalid OAuth access token") from exc
if not _claims_have_scope(claims, settings.mcp_oauth_required_scope):
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Missing required MCP OAuth scope")
def _claims_have_scope(claims: dict, required_scope: str) -> bool:
scopes: set[str] = set()
scope = claims.get("scope")
if isinstance(scope, str):
scopes.update(scope.split())
scp = claims.get("scp")
if isinstance(scp, str):
scopes.update(scp.split())
elif isinstance(scp, list):
scopes.update(str(item) for item in scp)
return required_scope in scopes
@lru_cache(maxsize=16)
def _get_jwk_client(jwks_url: str) -> PyJWKClient:
return PyJWKClient(jwks_url)
def _get_mcp_oauth_signing_key(token: str, settings: Settings):
return _get_jwk_client(settings.oauth_jwks_url()).get_signing_key_from_jwt(token)
def _mcp_unauthorized(settings: Settings, detail: str) -> HTTPException:
headers = {}
if _mcp_oauth_allowed(settings):
headers["WWW-Authenticate"] = f'Bearer resource_metadata="{_mcp_metadata_url(settings)}"'
return HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=detail, headers=headers)
def _mcp_metadata_url(settings: Settings) -> str:
resource_url = settings.mcp_resource_url.rstrip("/")
base_url = resource_url[: -len("/mcp")] if resource_url.endswith("/mcp") else resource_url
return f"{base_url}/.well-known/oauth-protected-resource"

View File

@@ -1,3 +1,3 @@
APP_VERSION = "0.2.8" APP_VERSION = "0.3.0"
FRONTEND_VERSION = "0.2.8" FRONTEND_VERSION = "0.3.0"
BACKEND_VERSION = "0.2.8" BACKEND_VERSION = "0.3.0"

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "miem-workers" name = "miem-workers"
version = "0.2.8" version = "0.3.0"
description = "MIEM employees parser, admin API, and MCP server" description = "MIEM employees parser, admin API, and MCP server"
requires-python = ">=3.11" requires-python = ">=3.11"
dependencies = [ dependencies = [
@@ -12,6 +12,7 @@ dependencies = [
"lxml>=5.2.0", "lxml>=5.2.0",
"psycopg[binary]>=3.2.0", "psycopg[binary]>=3.2.0",
"pydantic-settings>=2.4.0", "pydantic-settings>=2.4.0",
"PyJWT[crypto]>=2.9.0",
"python-multipart>=0.0.9", "python-multipart>=0.0.9",
"requests>=2.32.0", "requests>=2.32.0",
"sqlalchemy>=2.0.32", "sqlalchemy>=2.0.32",

View File

@@ -6,6 +6,7 @@ jinja2>=3.1.4
lxml>=5.2.0 lxml>=5.2.0
psycopg[binary]>=3.2.0 psycopg[binary]>=3.2.0
pydantic-settings>=2.4.0 pydantic-settings>=2.4.0
PyJWT[crypto]>=2.9.0
python-multipart>=0.0.9 python-multipart>=0.0.9
requests>=2.32.0 requests>=2.32.0
sqlalchemy>=2.0.32 sqlalchemy>=2.0.32

View File

@@ -1,10 +1,15 @@
import time
from datetime import datetime, timezone from datetime import datetime, timezone
from types import SimpleNamespace
import jwt
from fastapi.testclient import TestClient from fastapi.testclient import TestClient
from cryptography.hazmat.primitives.asymmetric import rsa
from sqlalchemy import create_engine from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import StaticPool from sqlalchemy.pool import StaticPool
import app.security as security
from app.config import Settings, get_settings from app.config import Settings, get_settings
from app.db import Base, get_db from app.db import Base, get_db
from app.main import app from app.main import app
@@ -18,7 +23,7 @@ def test_health_returns_versions():
response = client.get("/api/health") response = client.get("/api/health")
assert response.status_code == 200 assert response.status_code == 200
assert response.json()["backend_version"] == "0.2.8" assert response.json()["backend_version"] == "0.3.0"
def test_mcp_requires_token_and_lists_tools(): def test_mcp_requires_token_and_lists_tools():
@@ -108,6 +113,146 @@ def test_mcp_search_employees_returns_matching_employee():
app.dependency_overrides.clear() app.dependency_overrides.clear()
def test_mcp_oauth_or_token_keeps_static_token_fallback():
engine = create_engine(
"sqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
def override_db():
session = Session()
try:
yield session
finally:
session.close()
settings = Settings(
mcp_auth_mode="oauth_or_token",
mcp_token="secret",
session_secret="session-secret",
mcp_oauth_issuer="https://auth.example.com",
mcp_oauth_audience="miem-mcp",
mcp_oauth_jwks_url="https://auth.example.com/.well-known/jwks.json",
)
app.dependency_overrides[get_db] = override_db
app.dependency_overrides[get_settings] = lambda: settings
client = TestClient(app)
response = client.post(
"/mcp",
headers={"Authorization": "Bearer secret"},
json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}},
)
assert response.status_code == 200
assert response.json()["result"]["tools"][0]["name"] == "search_employees"
app.dependency_overrides.clear()
def test_mcp_oauth_missing_auth_returns_metadata_challenge():
settings = Settings(
mcp_auth_mode="oauth",
mcp_resource_url="https://api.example.com/mcp",
mcp_oauth_issuer="https://auth.example.com",
mcp_oauth_audience="miem-mcp",
mcp_oauth_jwks_url="https://auth.example.com/.well-known/jwks.json",
)
app.dependency_overrides[get_settings] = lambda: settings
client = TestClient(app)
response = client.post("/mcp", json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}})
assert response.status_code == 401
assert response.headers["www-authenticate"] == (
'Bearer resource_metadata="https://api.example.com/.well-known/oauth-protected-resource"'
)
app.dependency_overrides.clear()
def test_mcp_accepts_valid_oauth_jwt(monkeypatch):
public_key, token = _oauth_key_and_token()
monkeypatch.setattr(security, "_get_mcp_oauth_signing_key", lambda _token, _settings: SimpleNamespace(key=public_key))
app.dependency_overrides[get_settings] = lambda: _oauth_settings()
client = TestClient(app)
response = client.post(
"/mcp",
headers={"Authorization": f"Bearer {token}"},
json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}},
)
assert response.status_code == 200
assert response.json()["result"]["tools"][0]["name"] == "search_employees"
app.dependency_overrides.clear()
def test_mcp_rejects_invalid_oauth_jwts(monkeypatch):
public_key, expired_token = _oauth_key_and_token(exp=int(time.time()) - 60)
_, wrong_issuer_token = _oauth_key_and_token(issuer="https://other.example.com")
_, wrong_audience_token = _oauth_key_and_token(audience="other-audience")
_, bad_signature_token = _oauth_key_and_token(public_key=public_key)
monkeypatch.setattr(security, "_get_mcp_oauth_signing_key", lambda _token, _settings: SimpleNamespace(key=public_key))
app.dependency_overrides[get_settings] = lambda: _oauth_settings()
client = TestClient(app)
for token in [expired_token, wrong_issuer_token, wrong_audience_token, bad_signature_token]:
response = client.post(
"/mcp",
headers={"Authorization": f"Bearer {token}"},
json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}},
)
assert response.status_code == 401
app.dependency_overrides.clear()
def test_mcp_rejects_oauth_jwt_without_required_scope(monkeypatch):
public_key, token = _oauth_key_and_token(scope="profile")
monkeypatch.setattr(security, "_get_mcp_oauth_signing_key", lambda _token, _settings: SimpleNamespace(key=public_key))
app.dependency_overrides[get_settings] = lambda: _oauth_settings()
client = TestClient(app)
response = client.post(
"/mcp",
headers={"Authorization": f"Bearer {token}"},
json={"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}},
)
assert response.status_code == 403
app.dependency_overrides.clear()
def test_mcp_protected_resource_metadata_uses_settings():
settings = Settings(
mcp_resource_url="https://api.example.com/mcp",
mcp_oauth_issuer="https://auth.example.com/",
mcp_oauth_required_scope="mcp:tools",
)
app.dependency_overrides[get_settings] = lambda: settings
client = TestClient(app)
response = client.get("/.well-known/oauth-protected-resource")
assert response.status_code == 200
assert response.json() == {
"resource": "https://api.example.com/mcp",
"authorization_servers": ["https://auth.example.com"],
"bearer_methods_supported": ["header"],
"scopes_supported": ["mcp:tools"],
"resource_documentation": "https://api.example.com/mcp",
}
app.dependency_overrides.clear()
def test_api_employees_and_stats_require_admin_session(): def test_api_employees_and_stats_require_admin_session():
engine = create_engine( engine = create_engine(
"sqlite:///:memory:", "sqlite:///:memory:",
@@ -157,3 +302,35 @@ def test_api_employees_and_stats_require_admin_session():
assert stats.json()["new_in_last_run"] == 1 assert stats.json()["new_in_last_run"] == 1
app.dependency_overrides.clear() app.dependency_overrides.clear()
def _oauth_settings() -> Settings:
return Settings(
mcp_auth_mode="oauth",
mcp_resource_url="https://api.example.com/mcp",
mcp_oauth_issuer="https://auth.example.com",
mcp_oauth_audience="miem-mcp",
mcp_oauth_jwks_url="https://auth.example.com/.well-known/jwks.json",
session_secret="session-secret",
)
def _oauth_key_and_token(
*,
issuer: str = "https://auth.example.com",
audience: str = "miem-mcp",
scope: str = "mcp:tools",
exp: int | None = None,
public_key=None,
):
private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048)
claims = {
"iss": issuer,
"aud": audience,
"scope": scope,
"sub": "mcp-client",
"iat": int(time.time()),
"exp": exp or int(time.time()) + 300,
}
token = jwt.encode(claims, private_key, algorithm="RS256", headers={"kid": "test-key"})
return public_key or private_key.public_key(), token