96 lines
3.1 KiB
SQL
96 lines
3.1 KiB
SQL
CREATE TABLE IF NOT EXISTS parser_sources (
|
|
id SERIAL PRIMARY KEY,
|
|
source_url TEXT NOT NULL UNIQUE,
|
|
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS crawl_runs (
|
|
id SERIAL PRIMARY KEY,
|
|
source_url TEXT NOT NULL,
|
|
status VARCHAR(32) NOT NULL DEFAULT 'running',
|
|
started_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
finished_at TIMESTAMPTZ,
|
|
found_count INTEGER NOT NULL DEFAULT 0,
|
|
parsed_count INTEGER NOT NULL DEFAULT 0,
|
|
skipped_count INTEGER NOT NULL DEFAULT 0,
|
|
new_count INTEGER NOT NULL DEFAULT 0,
|
|
error_count INTEGER NOT NULL DEFAULT 0,
|
|
dismissed_count INTEGER NOT NULL DEFAULT 0,
|
|
message TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS employees (
|
|
id SERIAL PRIMARY KEY,
|
|
profile_key VARCHAR(255) NOT NULL UNIQUE,
|
|
profile_type VARCHAR(50),
|
|
profile_id VARCHAR(255),
|
|
canonical_url TEXT NOT NULL,
|
|
full_name TEXT,
|
|
status VARCHAR(32) NOT NULL DEFAULT 'active',
|
|
first_seen_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
last_seen_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
dismissed_at TIMESTAMPTZ,
|
|
parser_version VARCHAR(32),
|
|
current_data JSONB,
|
|
current_checksum VARCHAR(64),
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS ix_employees_full_name ON employees (full_name);
|
|
CREATE INDEX IF NOT EXISTS ix_employees_status ON employees (status);
|
|
|
|
CREATE TABLE IF NOT EXISTS employee_snapshots (
|
|
id SERIAL PRIMARY KEY,
|
|
employee_id INTEGER NOT NULL REFERENCES employees(id),
|
|
crawl_run_id INTEGER REFERENCES crawl_runs(id),
|
|
parsed_data JSONB NOT NULL,
|
|
html_snapshot BYTEA,
|
|
checksum VARCHAR(64) NOT NULL,
|
|
parser_version VARCHAR(32),
|
|
captured_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS ix_employee_snapshots_employee_id ON employee_snapshots (employee_id);
|
|
|
|
CREATE TABLE IF NOT EXISTS crawl_errors (
|
|
id SERIAL PRIMARY KEY,
|
|
crawl_run_id INTEGER NOT NULL REFERENCES crawl_runs(id),
|
|
profile_url TEXT,
|
|
error_type VARCHAR(255) NOT NULL,
|
|
message TEXT NOT NULL,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS ix_crawl_errors_run_id ON crawl_errors (crawl_run_id);
|
|
|
|
CREATE TABLE IF NOT EXISTS profile_tabs (
|
|
id SERIAL PRIMARY KEY,
|
|
employee_id INTEGER NOT NULL REFERENCES employees(id),
|
|
title TEXT NOT NULL,
|
|
href TEXT NOT NULL,
|
|
data_index VARCHAR(64)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS ix_profile_tabs_employee_id ON profile_tabs (employee_id);
|
|
|
|
CREATE TABLE IF NOT EXISTS parse_resource_cache (
|
|
id SERIAL PRIMARY KEY,
|
|
profile_key VARCHAR(255) NOT NULL,
|
|
resource_key VARCHAR(255) NOT NULL,
|
|
method VARCHAR(16) NOT NULL,
|
|
url TEXT NOT NULL,
|
|
request_fingerprint VARCHAR(64) NOT NULL,
|
|
etag TEXT,
|
|
last_modified TEXT,
|
|
body_hash VARCHAR(64) NOT NULL,
|
|
body_snapshot BYTEA NOT NULL,
|
|
parser_version VARCHAR(32),
|
|
fetched_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
CONSTRAINT uq_parse_resource_cache_resource UNIQUE (profile_key, resource_key, request_fingerprint)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS ix_parse_resource_cache_profile_key
|
|
ON parse_resource_cache (profile_key);
|