from datetime import datetime, timezone from sqlalchemy import DateTime, ForeignKey, Index, Integer, LargeBinary, String, Text, UniqueConstraint from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import Mapped, mapped_column, relationship from sqlalchemy.types import JSON from app.db import Base def utcnow() -> datetime: return datetime.now(timezone.utc) json_type = JSON().with_variant(JSONB, "postgresql") class Employee(Base): __tablename__ = "employees" __table_args__ = ( UniqueConstraint("profile_key", name="uq_employees_profile_key"), Index("ix_employees_full_name", "full_name"), Index("ix_employees_status", "status"), ) id: Mapped[int] = mapped_column(Integer, primary_key=True) profile_key: Mapped[str] = mapped_column(String(255), nullable=False) profile_type: Mapped[str | None] = mapped_column(String(50)) profile_id: Mapped[str | None] = mapped_column(String(255)) canonical_url: Mapped[str] = mapped_column(Text, nullable=False) full_name: Mapped[str | None] = mapped_column(Text) status: Mapped[str] = mapped_column(String(32), default="active", nullable=False) first_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False) last_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False) dismissed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) parser_version: Mapped[str | None] = mapped_column(String(32)) current_data: Mapped[dict | None] = mapped_column(json_type) current_checksum: Mapped[str | None] = mapped_column(String(64)) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False) updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow, nullable=False) snapshots: Mapped[list["EmployeeSnapshot"]] = relationship(back_populates="employee") tabs: Mapped[list["ProfileTab"]] = relationship(back_populates="employee", cascade="all, delete-orphan") class EmployeeSnapshot(Base): __tablename__ = "employee_snapshots" __table_args__ = (Index("ix_employee_snapshots_employee_id", "employee_id"),) id: Mapped[int] = mapped_column(Integer, primary_key=True) employee_id: Mapped[int] = mapped_column(ForeignKey("employees.id"), nullable=False) crawl_run_id: Mapped[int | None] = mapped_column(ForeignKey("crawl_runs.id")) parsed_data: Mapped[dict] = mapped_column(json_type, nullable=False) html_snapshot: Mapped[bytes | None] = mapped_column(LargeBinary) checksum: Mapped[str] = mapped_column(String(64), nullable=False) parser_version: Mapped[str | None] = mapped_column(String(32)) captured_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False) employee: Mapped[Employee] = relationship(back_populates="snapshots") class CrawlRun(Base): __tablename__ = "crawl_runs" id: Mapped[int] = mapped_column(Integer, primary_key=True) source_url: Mapped[str] = mapped_column(Text, nullable=False) status: Mapped[str] = mapped_column(String(32), default="running", nullable=False) started_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False) finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) found_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False) parsed_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False) new_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False) error_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False) dismissed_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False) message: Mapped[str | None] = mapped_column(Text) class CrawlError(Base): __tablename__ = "crawl_errors" __table_args__ = (Index("ix_crawl_errors_run_id", "crawl_run_id"),) id: Mapped[int] = mapped_column(Integer, primary_key=True) crawl_run_id: Mapped[int] = mapped_column(ForeignKey("crawl_runs.id"), nullable=False) profile_url: Mapped[str | None] = mapped_column(Text) error_type: Mapped[str] = mapped_column(String(255), nullable=False) message: Mapped[str] = mapped_column(Text, nullable=False) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False) class ProfileTab(Base): __tablename__ = "profile_tabs" __table_args__ = (Index("ix_profile_tabs_employee_id", "employee_id"),) id: Mapped[int] = mapped_column(Integer, primary_key=True) employee_id: Mapped[int] = mapped_column(ForeignKey("employees.id"), nullable=False) title: Mapped[str] = mapped_column(Text, nullable=False) href: Mapped[str] = mapped_column(Text, nullable=False) data_index: Mapped[str | None] = mapped_column(String(64)) employee: Mapped[Employee] = relationship(back_populates="tabs") class ParserSource(Base): __tablename__ = "parser_sources" __table_args__ = (UniqueConstraint("source_url", name="uq_parser_sources_source_url"),) id: Mapped[int] = mapped_column(Integer, primary_key=True) source_url: Mapped[str] = mapped_column(Text, nullable=False) enabled: Mapped[bool] = mapped_column(default=True, nullable=False) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)