Files
miem_workers/app/models.py
2026-05-14 11:00:46 +03:00

177 lines
8.8 KiB
Python

from datetime import datetime, timezone
from sqlalchemy import DateTime, ForeignKey, Index, Integer, LargeBinary, String, Text, UniqueConstraint
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column, relationship
from sqlalchemy.types import JSON
from app.db import Base
def utcnow() -> datetime:
return datetime.now(timezone.utc)
json_type = JSON().with_variant(JSONB, "postgresql")
class Employee(Base):
__tablename__ = "employees"
__table_args__ = (
UniqueConstraint("profile_key", name="uq_employees_profile_key"),
Index("ix_employees_full_name", "full_name"),
Index("ix_employees_status", "status"),
)
id: Mapped[int] = mapped_column(Integer, primary_key=True)
profile_key: Mapped[str] = mapped_column(String(255), nullable=False)
profile_type: Mapped[str | None] = mapped_column(String(50))
profile_id: Mapped[str | None] = mapped_column(String(255))
canonical_url: Mapped[str] = mapped_column(Text, nullable=False)
full_name: Mapped[str | None] = mapped_column(Text)
status: Mapped[str] = mapped_column(String(32), default="active", nullable=False)
first_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
last_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
dismissed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
parser_version: Mapped[str | None] = mapped_column(String(32))
current_data: Mapped[dict | None] = mapped_column(json_type)
current_checksum: Mapped[str | None] = mapped_column(String(64))
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow, nullable=False)
snapshots: Mapped[list["EmployeeSnapshot"]] = relationship(back_populates="employee")
tabs: Mapped[list["ProfileTab"]] = relationship(back_populates="employee", cascade="all, delete-orphan")
crawl_run_changes: Mapped[list["CrawlRunEmployeeChange"]] = relationship(back_populates="employee")
class EmployeeSnapshot(Base):
__tablename__ = "employee_snapshots"
__table_args__ = (Index("ix_employee_snapshots_employee_id", "employee_id"),)
id: Mapped[int] = mapped_column(Integer, primary_key=True)
employee_id: Mapped[int] = mapped_column(ForeignKey("employees.id"), nullable=False)
crawl_run_id: Mapped[int | None] = mapped_column(ForeignKey("crawl_runs.id"))
parsed_data: Mapped[dict] = mapped_column(json_type, nullable=False)
html_snapshot: Mapped[bytes | None] = mapped_column(LargeBinary)
checksum: Mapped[str] = mapped_column(String(64), nullable=False)
parser_version: Mapped[str | None] = mapped_column(String(32))
captured_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
employee: Mapped[Employee] = relationship(back_populates="snapshots")
class CrawlRun(Base):
__tablename__ = "crawl_runs"
id: Mapped[int] = mapped_column(Integer, primary_key=True)
source_url: Mapped[str] = mapped_column(Text, nullable=False)
status: Mapped[str] = mapped_column(String(32), default="running", nullable=False)
started_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
found_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
parsed_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
new_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
error_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
dismissed_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
message: Mapped[str | None] = mapped_column(Text)
employee_changes: Mapped[list["CrawlRunEmployeeChange"]] = relationship(back_populates="crawl_run")
dataset_versions: Mapped[list["DatasetVersion"]] = relationship(back_populates="crawl_run")
class CrawlRunEmployeeChange(Base):
__tablename__ = "crawl_run_employee_changes"
__table_args__ = (
Index("ix_crawl_run_employee_changes_run_id", "crawl_run_id"),
Index("ix_crawl_run_employee_changes_employee_id", "employee_id"),
Index("ix_crawl_run_employee_changes_change_type", "change_type"),
)
id: Mapped[int] = mapped_column(Integer, primary_key=True)
crawl_run_id: Mapped[int] = mapped_column(ForeignKey("crawl_runs.id"), nullable=False)
employee_id: Mapped[int | None] = mapped_column(ForeignKey("employees.id"))
profile_key: Mapped[str] = mapped_column(String(255), nullable=False)
profile_url: Mapped[str] = mapped_column(Text, nullable=False)
full_name: Mapped[str | None] = mapped_column(Text)
change_type: Mapped[str] = mapped_column(String(32), nullable=False)
profile_available: Mapped[bool | None] = mapped_column()
message: Mapped[str | None] = mapped_column(Text)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
crawl_run: Mapped[CrawlRun] = relationship(back_populates="employee_changes")
employee: Mapped[Employee | None] = relationship(back_populates="crawl_run_changes")
class CrawlError(Base):
__tablename__ = "crawl_errors"
__table_args__ = (Index("ix_crawl_errors_run_id", "crawl_run_id"),)
id: Mapped[int] = mapped_column(Integer, primary_key=True)
crawl_run_id: Mapped[int] = mapped_column(ForeignKey("crawl_runs.id"), nullable=False)
profile_url: Mapped[str | None] = mapped_column(Text)
error_type: Mapped[str] = mapped_column(String(255), nullable=False)
message: Mapped[str] = mapped_column(Text, nullable=False)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
class ProfileTab(Base):
__tablename__ = "profile_tabs"
__table_args__ = (Index("ix_profile_tabs_employee_id", "employee_id"),)
id: Mapped[int] = mapped_column(Integer, primary_key=True)
employee_id: Mapped[int] = mapped_column(ForeignKey("employees.id"), nullable=False)
title: Mapped[str] = mapped_column(Text, nullable=False)
href: Mapped[str] = mapped_column(Text, nullable=False)
data_index: Mapped[str | None] = mapped_column(String(64))
employee: Mapped[Employee] = relationship(back_populates="tabs")
class ParserSource(Base):
__tablename__ = "parser_sources"
__table_args__ = (UniqueConstraint("source_url", name="uq_parser_sources_source_url"),)
id: Mapped[int] = mapped_column(Integer, primary_key=True)
source_url: Mapped[str] = mapped_column(Text, nullable=False)
enabled: Mapped[bool] = mapped_column(default=True, nullable=False)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
class DatasetVersion(Base):
__tablename__ = "dataset_versions"
__table_args__ = (
UniqueConstraint("hash", name="uq_dataset_versions_hash"),
Index("ix_dataset_versions_created_at", "created_at"),
)
id: Mapped[int] = mapped_column(Integer, primary_key=True)
hash: Mapped[str] = mapped_column(String(64), nullable=False)
previous_hash: Mapped[str | None] = mapped_column(String(64))
crawl_run_id: Mapped[int | None] = mapped_column(ForeignKey("crawl_runs.id"))
employee_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
active_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
dismissed_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
crawl_run: Mapped[CrawlRun | None] = relationship(back_populates="dataset_versions")
items: Mapped[list["DatasetVersionItem"]] = relationship(back_populates="dataset_version", cascade="all, delete-orphan")
class DatasetVersionItem(Base):
__tablename__ = "dataset_version_items"
__table_args__ = (
UniqueConstraint("dataset_version_id", "profile_key", name="uq_dataset_version_items_version_profile"),
Index("ix_dataset_version_items_hash", "dataset_version_id"),
Index("ix_dataset_version_items_profile_key", "profile_key"),
)
id: Mapped[int] = mapped_column(Integer, primary_key=True)
dataset_version_id: Mapped[int] = mapped_column(ForeignKey("dataset_versions.id"), nullable=False)
profile_key: Mapped[str] = mapped_column(String(255), nullable=False)
employee_id: Mapped[int | None] = mapped_column(ForeignKey("employees.id"))
status: Mapped[str] = mapped_column(String(32), nullable=False)
checksum: Mapped[str] = mapped_column(String(64), nullable=False)
dataset_version: Mapped[DatasetVersion] = relationship(back_populates="items")
employee: Mapped[Employee | None] = relationship()