miem_workers/tests/test_db_schema.py

from sqlalchemy import create_engine, inspect, text

from app.db import _ensure_runtime_schema


def test_runtime_schema_adds_skipped_count_to_existing_crawl_runs_table(monkeypatch):
    engine = create_engine("sqlite:///:memory:")
    with engine.begin() as connection:
        connection.execute(
            text(
                """
                CREATE TABLE crawl_runs (
                  id INTEGER PRIMARY KEY,
                  source_url TEXT NOT NULL,
                  status VARCHAR(32) NOT NULL DEFAULT 'running',
                  found_count INTEGER NOT NULL DEFAULT 0,
                  parsed_count INTEGER NOT NULL DEFAULT 0
                )
                """
            )
        )
    monkeypatch.setattr("app.db.engine", engine)

    _ensure_runtime_schema()

    columns = {column["name"] for column in inspect(engine).get_columns("crawl_runs")}
    assert "skipped_count" in columns


def test_runtime_schema_creates_employee_publications_table_when_employees_exist(monkeypatch):
    engine = create_engine("sqlite:///:memory:")
    with engine.begin() as connection:
        connection.execute(
            text(
                """
                CREATE TABLE employees (
                  id INTEGER PRIMARY KEY,
                  profile_key VARCHAR(255) NOT NULL UNIQUE,
                  canonical_url TEXT NOT NULL,
                  status VARCHAR(32) NOT NULL DEFAULT 'active',
                  first_seen_at DATETIME NOT NULL,
                  last_seen_at DATETIME NOT NULL,
                  created_at DATETIME NOT NULL,
                  updated_at DATETIME NOT NULL
                )
                """
            )
        )
        connection.execute(
            text(
                """
                CREATE TABLE crawl_runs (
                  id INTEGER PRIMARY KEY,
                  source_url TEXT NOT NULL,
                  status VARCHAR(32) NOT NULL DEFAULT 'running',
                  found_count INTEGER NOT NULL DEFAULT 0,
                  parsed_count INTEGER NOT NULL DEFAULT 0,
                  skipped_count INTEGER NOT NULL DEFAULT 0
                )
                """
            )
        )
    monkeypatch.setattr("app.db.engine", engine)

    _ensure_runtime_schema()
    _ensure_runtime_schema()

    inspector = inspect(engine)
    assert "employee_publications" in inspector.get_table_names()
    columns = {column["name"] for column in inspector.get_columns("employee_publications")}
    assert {"employee_id", "publication_id", "doi_url", "authors", "raw_data", "source_hash"}.issubset(columns)