feat: add dataset checkpoint sync for MCP

This commit is contained in:
Anton
2026-05-14 11:00:46 +03:00
parent a3ff9c6e9c
commit 29451ccee1
9 changed files with 558 additions and 8 deletions

View File

@@ -13,6 +13,7 @@ from app.models import CrawlError, CrawlRun, CrawlRunEmployeeChange, Employee, E
from app.parser.collector import collect_profile_links
from app.parser.profile import parse_person_profile
from app.parser.profile_url import profile_key
from app.services.dataset_versions import get_or_create_current_version
HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; MIEMEmployeesBot/0.1.0; +https://miem.hse.ru/)"
@@ -70,6 +71,7 @@ def run_crawl(db: Session, settings: Settings) -> CrawlRun:
run.dismissed_count = _mark_dismissed(db, run, found_keys, session, settings.request_timeout)
run.status = "completed"
get_or_create_current_version(db, crawl_run_id=run.id)
except Exception as exc:
run.status = "failed"
run.message = str(exc)
@@ -103,6 +105,7 @@ def refresh_employee(db: Session, employee: Employee, settings: Settings) -> Cra
_upsert_employee(db, run, parsed)
run.parsed_count = 1
run.status = "completed"
get_or_create_current_version(db, crawl_run_id=run.id)
except Exception as exc:
run.status = "failed"
run.error_count = 1