72 lines
2.5 KiB
Python
72 lines
2.5 KiB
Python
from sqlalchemy import create_engine, inspect, text
|
|
|
|
from app.db import _ensure_runtime_schema
|
|
|
|
|
|
def test_runtime_schema_adds_skipped_count_to_existing_crawl_runs_table(monkeypatch):
|
|
engine = create_engine("sqlite:///:memory:")
|
|
with engine.begin() as connection:
|
|
connection.execute(
|
|
text(
|
|
"""
|
|
CREATE TABLE crawl_runs (
|
|
id INTEGER PRIMARY KEY,
|
|
source_url TEXT NOT NULL,
|
|
status VARCHAR(32) NOT NULL DEFAULT 'running',
|
|
found_count INTEGER NOT NULL DEFAULT 0,
|
|
parsed_count INTEGER NOT NULL DEFAULT 0
|
|
)
|
|
"""
|
|
)
|
|
)
|
|
monkeypatch.setattr("app.db.engine", engine)
|
|
|
|
_ensure_runtime_schema()
|
|
|
|
columns = {column["name"] for column in inspect(engine).get_columns("crawl_runs")}
|
|
assert "skipped_count" in columns
|
|
|
|
|
|
def test_runtime_schema_creates_employee_publications_table_when_employees_exist(monkeypatch):
|
|
engine = create_engine("sqlite:///:memory:")
|
|
with engine.begin() as connection:
|
|
connection.execute(
|
|
text(
|
|
"""
|
|
CREATE TABLE employees (
|
|
id INTEGER PRIMARY KEY,
|
|
profile_key VARCHAR(255) NOT NULL UNIQUE,
|
|
canonical_url TEXT NOT NULL,
|
|
status VARCHAR(32) NOT NULL DEFAULT 'active',
|
|
first_seen_at DATETIME NOT NULL,
|
|
last_seen_at DATETIME NOT NULL,
|
|
created_at DATETIME NOT NULL,
|
|
updated_at DATETIME NOT NULL
|
|
)
|
|
"""
|
|
)
|
|
)
|
|
connection.execute(
|
|
text(
|
|
"""
|
|
CREATE TABLE crawl_runs (
|
|
id INTEGER PRIMARY KEY,
|
|
source_url TEXT NOT NULL,
|
|
status VARCHAR(32) NOT NULL DEFAULT 'running',
|
|
found_count INTEGER NOT NULL DEFAULT 0,
|
|
parsed_count INTEGER NOT NULL DEFAULT 0,
|
|
skipped_count INTEGER NOT NULL DEFAULT 0
|
|
)
|
|
"""
|
|
)
|
|
)
|
|
monkeypatch.setattr("app.db.engine", engine)
|
|
|
|
_ensure_runtime_schema()
|
|
_ensure_runtime_schema()
|
|
|
|
inspector = inspect(engine)
|
|
assert "employee_publications" in inspector.get_table_names()
|
|
columns = {column["name"] for column in inspector.get_columns("employee_publications")}
|
|
assert {"employee_id", "publication_id", "doi_url", "authors", "raw_data", "source_hash"}.issubset(columns)
|