feat: add employee news links parsing and storage

This commit is contained in:
Anton
2026-05-22 18:50:25 +03:00
parent 680ac6e980
commit 4d2a071ec0
19 changed files with 636 additions and 16 deletions

View File

@@ -42,6 +42,7 @@ class Employee(Base):
snapshots: Mapped[list["EmployeeSnapshot"]] = relationship(back_populates="employee")
tabs: Mapped[list["ProfileTab"]] = relationship(back_populates="employee", cascade="all, delete-orphan")
publications: Mapped[list["EmployeePublication"]] = relationship(back_populates="employee", cascade="all, delete-orphan")
news_links: Mapped[list["EmployeeNewsLink"]] = relationship(back_populates="employee", cascade="all, delete-orphan")
crawl_run_changes: Mapped[list["CrawlRunEmployeeChange"]] = relationship(back_populates="employee")
@@ -97,6 +98,32 @@ class EmployeePublication(Base):
employee: Mapped[Employee] = relationship(back_populates="publications")
class EmployeeNewsLink(Base):
__tablename__ = "employee_news_links"
__table_args__ = (
UniqueConstraint("employee_id", "url", name="uq_employee_news_links_employee_url"),
UniqueConstraint("employee_id", "source_hash", name="uq_employee_news_links_employee_source_hash"),
Index("ix_employee_news_links_employee_id", "employee_id"),
Index("ix_employee_news_links_url", "url"),
Index("ix_employee_news_links_published_at", "published_at"),
Index("ix_employee_news_links_published_year", "published_year"),
)
id: Mapped[int] = mapped_column(Integer, primary_key=True)
employee_id: Mapped[int] = mapped_column(ForeignKey("employees.id", ondelete="CASCADE"), nullable=False)
title: Mapped[str] = mapped_column(Text, nullable=False)
url: Mapped[str | None] = mapped_column(Text)
summary: Mapped[str | None] = mapped_column(Text)
published_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
published_year: Mapped[int | None] = mapped_column(Integer)
source_hash: Mapped[str] = mapped_column(String(64), nullable=False)
raw_data: Mapped[dict | None] = mapped_column(json_type)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow, nullable=False)
employee: Mapped[Employee] = relationship(back_populates="news_links")
class CrawlRun(Base):
__tablename__ = "crawl_runs"