feat: adds crawl resource cache

This commit is contained in:
Anton
2026-05-14 12:21:44 +03:00
parent 5180b89b81
commit 6724b3f369
20 changed files with 1192 additions and 73 deletions

View File

@@ -13,6 +13,7 @@ CREATE TABLE IF NOT EXISTS crawl_runs (
finished_at TIMESTAMPTZ,
found_count INTEGER NOT NULL DEFAULT 0,
parsed_count INTEGER NOT NULL DEFAULT 0,
skipped_count INTEGER NOT NULL DEFAULT 0,
new_count INTEGER NOT NULL DEFAULT 0,
error_count INTEGER NOT NULL DEFAULT 0,
dismissed_count INTEGER NOT NULL DEFAULT 0,
@@ -73,3 +74,22 @@ CREATE TABLE IF NOT EXISTS profile_tabs (
);
CREATE INDEX IF NOT EXISTS ix_profile_tabs_employee_id ON profile_tabs (employee_id);
CREATE TABLE IF NOT EXISTS parse_resource_cache (
id SERIAL PRIMARY KEY,
profile_key VARCHAR(255) NOT NULL,
resource_key VARCHAR(255) NOT NULL,
method VARCHAR(16) NOT NULL,
url TEXT NOT NULL,
request_fingerprint VARCHAR(64) NOT NULL,
etag TEXT,
last_modified TEXT,
body_hash VARCHAR(64) NOT NULL,
body_snapshot BYTEA NOT NULL,
parser_version VARCHAR(32),
fetched_at TIMESTAMPTZ NOT NULL DEFAULT now(),
CONSTRAINT uq_parse_resource_cache_resource UNIQUE (profile_key, resource_key, request_fingerprint)
);
CREATE INDEX IF NOT EXISTS ix_parse_resource_cache_profile_key
ON parse_resource_cache (profile_key);

View File

@@ -0,0 +1,21 @@
ALTER TABLE crawl_runs
ADD COLUMN IF NOT EXISTS skipped_count INTEGER NOT NULL DEFAULT 0;
CREATE TABLE IF NOT EXISTS parse_resource_cache (
id SERIAL PRIMARY KEY,
profile_key VARCHAR(255) NOT NULL,
resource_key VARCHAR(255) NOT NULL,
method VARCHAR(16) NOT NULL,
url TEXT NOT NULL,
request_fingerprint VARCHAR(64) NOT NULL,
etag TEXT,
last_modified TEXT,
body_hash VARCHAR(64) NOT NULL,
body_snapshot BYTEA NOT NULL,
parser_version VARCHAR(32),
fetched_at TIMESTAMPTZ NOT NULL DEFAULT now(),
CONSTRAINT uq_parse_resource_cache_resource UNIQUE (profile_key, resource_key, request_fingerprint)
);
CREATE INDEX IF NOT EXISTS ix_parse_resource_cache_profile_key
ON parse_resource_cache (profile_key);