from sqlalchemy import create_engine, inspect, text from app.db import _ensure_runtime_schema def test_runtime_schema_adds_skipped_count_to_existing_crawl_runs_table(monkeypatch): engine = create_engine("sqlite:///:memory:") with engine.begin() as connection: connection.execute( text( """ CREATE TABLE crawl_runs ( id INTEGER PRIMARY KEY, source_url TEXT NOT NULL, status VARCHAR(32) NOT NULL DEFAULT 'running', found_count INTEGER NOT NULL DEFAULT 0, parsed_count INTEGER NOT NULL DEFAULT 0 ) """ ) ) monkeypatch.setattr("app.db.engine", engine) _ensure_runtime_schema() columns = {column["name"] for column in inspect(engine).get_columns("crawl_runs")} assert "skipped_count" in columns def test_runtime_schema_creates_employee_publications_table_when_employees_exist(monkeypatch): engine = create_engine("sqlite:///:memory:") with engine.begin() as connection: connection.execute( text( """ CREATE TABLE employees ( id INTEGER PRIMARY KEY, profile_key VARCHAR(255) NOT NULL UNIQUE, canonical_url TEXT NOT NULL, status VARCHAR(32) NOT NULL DEFAULT 'active', first_seen_at DATETIME NOT NULL, last_seen_at DATETIME NOT NULL, created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL ) """ ) ) connection.execute( text( """ CREATE TABLE crawl_runs ( id INTEGER PRIMARY KEY, source_url TEXT NOT NULL, status VARCHAR(32) NOT NULL DEFAULT 'running', found_count INTEGER NOT NULL DEFAULT 0, parsed_count INTEGER NOT NULL DEFAULT 0, skipped_count INTEGER NOT NULL DEFAULT 0 ) """ ) ) monkeypatch.setattr("app.db.engine", engine) _ensure_runtime_schema() _ensure_runtime_schema() inspector = inspect(engine) assert "employee_publications" in inspector.get_table_names() columns = {column["name"] for column in inspector.get_columns("employee_publications")} assert {"employee_id", "publication_id", "doi_url", "authors", "raw_data", "source_hash"}.issubset(columns) def test_runtime_schema_creates_employee_news_links_table_when_employees_exist(monkeypatch): engine = create_engine("sqlite:///:memory:") with engine.begin() as connection: connection.execute( text( """ CREATE TABLE employees ( id INTEGER PRIMARY KEY, profile_key VARCHAR(255) NOT NULL UNIQUE, canonical_url TEXT NOT NULL, status VARCHAR(32) NOT NULL DEFAULT 'active', first_seen_at DATETIME NOT NULL, last_seen_at DATETIME NOT NULL, created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL ) """ ) ) connection.execute( text( """ CREATE TABLE crawl_runs ( id INTEGER PRIMARY KEY, source_url TEXT NOT NULL, status VARCHAR(32) NOT NULL DEFAULT 'running', found_count INTEGER NOT NULL DEFAULT 0, parsed_count INTEGER NOT NULL DEFAULT 0, skipped_count INTEGER NOT NULL DEFAULT 0 ) """ ) ) monkeypatch.setattr("app.db.engine", engine) _ensure_runtime_schema() _ensure_runtime_schema() inspector = inspect(engine) assert "employee_news_links" in inspector.get_table_names() columns = {column["name"] for column in inspector.get_columns("employee_news_links")} assert {"employee_id", "title", "url", "summary", "published_at", "published_year", "source_hash", "raw_data"}.issubset(columns)