111 lines
5.4 KiB
Python
111 lines
5.4 KiB
Python
from datetime import datetime, timezone
|
|
|
|
from sqlalchemy import DateTime, ForeignKey, Index, Integer, LargeBinary, String, Text, UniqueConstraint
|
|
from sqlalchemy.dialects.postgresql import JSONB
|
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
from sqlalchemy.types import JSON
|
|
|
|
from app.db import Base
|
|
|
|
|
|
def utcnow() -> datetime:
|
|
return datetime.now(timezone.utc)
|
|
|
|
|
|
json_type = JSON().with_variant(JSONB, "postgresql")
|
|
|
|
|
|
class Employee(Base):
|
|
__tablename__ = "employees"
|
|
__table_args__ = (
|
|
UniqueConstraint("profile_key", name="uq_employees_profile_key"),
|
|
Index("ix_employees_full_name", "full_name"),
|
|
Index("ix_employees_status", "status"),
|
|
)
|
|
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
|
profile_key: Mapped[str] = mapped_column(String(255), nullable=False)
|
|
profile_type: Mapped[str | None] = mapped_column(String(50))
|
|
profile_id: Mapped[str | None] = mapped_column(String(255))
|
|
canonical_url: Mapped[str] = mapped_column(Text, nullable=False)
|
|
full_name: Mapped[str | None] = mapped_column(Text)
|
|
status: Mapped[str] = mapped_column(String(32), default="active", nullable=False)
|
|
first_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
|
|
last_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
|
|
dismissed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
|
parser_version: Mapped[str | None] = mapped_column(String(32))
|
|
current_data: Mapped[dict | None] = mapped_column(json_type)
|
|
current_checksum: Mapped[str | None] = mapped_column(String(64))
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
|
|
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow, nullable=False)
|
|
|
|
snapshots: Mapped[list["EmployeeSnapshot"]] = relationship(back_populates="employee")
|
|
tabs: Mapped[list["ProfileTab"]] = relationship(back_populates="employee", cascade="all, delete-orphan")
|
|
|
|
|
|
class EmployeeSnapshot(Base):
|
|
__tablename__ = "employee_snapshots"
|
|
__table_args__ = (Index("ix_employee_snapshots_employee_id", "employee_id"),)
|
|
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
|
employee_id: Mapped[int] = mapped_column(ForeignKey("employees.id"), nullable=False)
|
|
crawl_run_id: Mapped[int | None] = mapped_column(ForeignKey("crawl_runs.id"))
|
|
parsed_data: Mapped[dict] = mapped_column(json_type, nullable=False)
|
|
html_snapshot: Mapped[bytes | None] = mapped_column(LargeBinary)
|
|
checksum: Mapped[str] = mapped_column(String(64), nullable=False)
|
|
parser_version: Mapped[str | None] = mapped_column(String(32))
|
|
captured_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
|
|
|
|
employee: Mapped[Employee] = relationship(back_populates="snapshots")
|
|
|
|
|
|
class CrawlRun(Base):
|
|
__tablename__ = "crawl_runs"
|
|
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
|
source_url: Mapped[str] = mapped_column(Text, nullable=False)
|
|
status: Mapped[str] = mapped_column(String(32), default="running", nullable=False)
|
|
started_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
|
|
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
|
found_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
|
parsed_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
|
new_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
|
error_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
|
dismissed_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
|
message: Mapped[str | None] = mapped_column(Text)
|
|
|
|
|
|
class CrawlError(Base):
|
|
__tablename__ = "crawl_errors"
|
|
__table_args__ = (Index("ix_crawl_errors_run_id", "crawl_run_id"),)
|
|
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
|
crawl_run_id: Mapped[int] = mapped_column(ForeignKey("crawl_runs.id"), nullable=False)
|
|
profile_url: Mapped[str | None] = mapped_column(Text)
|
|
error_type: Mapped[str] = mapped_column(String(255), nullable=False)
|
|
message: Mapped[str] = mapped_column(Text, nullable=False)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
|
|
|
|
|
|
class ProfileTab(Base):
|
|
__tablename__ = "profile_tabs"
|
|
__table_args__ = (Index("ix_profile_tabs_employee_id", "employee_id"),)
|
|
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
|
employee_id: Mapped[int] = mapped_column(ForeignKey("employees.id"), nullable=False)
|
|
title: Mapped[str] = mapped_column(Text, nullable=False)
|
|
href: Mapped[str] = mapped_column(Text, nullable=False)
|
|
data_index: Mapped[str | None] = mapped_column(String(64))
|
|
|
|
employee: Mapped[Employee] = relationship(back_populates="tabs")
|
|
|
|
|
|
class ParserSource(Base):
|
|
__tablename__ = "parser_sources"
|
|
__table_args__ = (UniqueConstraint("source_url", name="uq_parser_sources_source_url"),)
|
|
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
|
source_url: Mapped[str] = mapped_column(Text, nullable=False)
|
|
enabled: Mapped[bool] = mapped_column(default=True, nullable=False)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
|