148 lines
4.5 KiB
Python
148 lines
4.5 KiB
Python
from __future__ import annotations
|
|
|
|
import gzip
|
|
import hashlib
|
|
import json
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timezone
|
|
from typing import Any
|
|
|
|
import requests
|
|
from sqlalchemy import select
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.models import ParseResourceCache
|
|
from app.version import BACKEND_VERSION
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class CachedResource:
|
|
text: str
|
|
body_hash: str
|
|
from_cache: bool
|
|
status_code: int
|
|
|
|
|
|
class ResourceCache:
|
|
def __init__(self, db: Session):
|
|
self.db = db
|
|
|
|
def fetch_text(
|
|
self,
|
|
session: requests.Session,
|
|
*,
|
|
profile_key: str,
|
|
resource_key: str,
|
|
method: str,
|
|
url: str,
|
|
headers: dict[str, str],
|
|
timeout: int,
|
|
json_payload: Any | None = None,
|
|
params: dict[str, Any] | None = None,
|
|
) -> CachedResource:
|
|
method = method.upper()
|
|
fingerprint = _request_fingerprint(method=method, url=url, json_payload=json_payload, params=params)
|
|
cached = self.db.scalar(
|
|
select(ParseResourceCache).where(
|
|
ParseResourceCache.profile_key == profile_key,
|
|
ParseResourceCache.resource_key == resource_key,
|
|
ParseResourceCache.request_fingerprint == fingerprint,
|
|
)
|
|
)
|
|
|
|
request_headers = dict(headers)
|
|
if cached:
|
|
if cached.etag:
|
|
request_headers["If-None-Match"] = cached.etag
|
|
if cached.last_modified:
|
|
request_headers["If-Modified-Since"] = cached.last_modified
|
|
|
|
response = _send(
|
|
session,
|
|
method=method,
|
|
url=url,
|
|
headers=request_headers,
|
|
timeout=timeout,
|
|
json_payload=json_payload,
|
|
params=params,
|
|
)
|
|
if response.status_code == 304 and cached:
|
|
cached.fetched_at = datetime.now(timezone.utc)
|
|
self.db.flush()
|
|
return CachedResource(
|
|
text=gzip.decompress(cached.body_snapshot).decode("utf-8"),
|
|
body_hash=cached.body_hash,
|
|
from_cache=True,
|
|
status_code=response.status_code,
|
|
)
|
|
|
|
response.raise_for_status()
|
|
text = response.text
|
|
body_hash = _body_hash(text)
|
|
etag = response.headers.get("ETag") if hasattr(response, "headers") else None
|
|
last_modified = response.headers.get("Last-Modified") if hasattr(response, "headers") else None
|
|
|
|
if cached:
|
|
cached.method = method
|
|
cached.url = url
|
|
cached.etag = etag
|
|
cached.last_modified = last_modified
|
|
cached.body_hash = body_hash
|
|
cached.body_snapshot = gzip.compress(text.encode("utf-8"))
|
|
cached.parser_version = BACKEND_VERSION
|
|
cached.fetched_at = datetime.now(timezone.utc)
|
|
else:
|
|
self.db.add(
|
|
ParseResourceCache(
|
|
profile_key=profile_key,
|
|
resource_key=resource_key,
|
|
method=method,
|
|
url=url,
|
|
request_fingerprint=fingerprint,
|
|
etag=etag,
|
|
last_modified=last_modified,
|
|
body_hash=body_hash,
|
|
body_snapshot=gzip.compress(text.encode("utf-8")),
|
|
parser_version=BACKEND_VERSION,
|
|
fetched_at=datetime.now(timezone.utc),
|
|
)
|
|
)
|
|
self.db.flush()
|
|
return CachedResource(text=text, body_hash=body_hash, from_cache=False, status_code=response.status_code)
|
|
|
|
|
|
def _send(
|
|
session: requests.Session,
|
|
*,
|
|
method: str,
|
|
url: str,
|
|
headers: dict[str, str],
|
|
timeout: int,
|
|
json_payload: Any | None,
|
|
params: dict[str, Any] | None,
|
|
) -> requests.Response:
|
|
if method == "POST":
|
|
return session.post(url, json=json_payload, headers=headers, timeout=timeout, params=params)
|
|
return session.get(url, headers=headers, timeout=timeout, params=params)
|
|
|
|
|
|
def _request_fingerprint(
|
|
*,
|
|
method: str,
|
|
url: str,
|
|
json_payload: Any | None,
|
|
params: dict[str, Any] | None,
|
|
) -> str:
|
|
payload = {
|
|
"method": method,
|
|
"url": url,
|
|
"json": json_payload,
|
|
"params": params,
|
|
}
|
|
encoded = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
|
|
return hashlib.sha256(encoded.encode("utf-8")).hexdigest()
|
|
|
|
|
|
def _body_hash(text: str) -> str:
|
|
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|