feature: add MIEM employees parser service with admin UI and MCP
This commit is contained in:
109
app/models.py
Normal file
109
app/models.py
Normal file
@@ -0,0 +1,109 @@
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, Index, Integer, LargeBinary, String, Text, UniqueConstraint
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
from sqlalchemy.types import JSON
|
||||
|
||||
from app.db import Base
|
||||
|
||||
|
||||
def utcnow() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
json_type = JSON().with_variant(JSONB, "postgresql")
|
||||
|
||||
|
||||
class Employee(Base):
|
||||
__tablename__ = "employees"
|
||||
__table_args__ = (
|
||||
UniqueConstraint("profile_key", name="uq_employees_profile_key"),
|
||||
Index("ix_employees_full_name", "full_name"),
|
||||
Index("ix_employees_status", "status"),
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
profile_key: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
profile_type: Mapped[str | None] = mapped_column(String(50))
|
||||
profile_id: Mapped[str | None] = mapped_column(String(255))
|
||||
canonical_url: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
full_name: Mapped[str | None] = mapped_column(Text)
|
||||
status: Mapped[str] = mapped_column(String(32), default="active", nullable=False)
|
||||
first_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
|
||||
last_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
|
||||
dismissed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
parser_version: Mapped[str | None] = mapped_column(String(32))
|
||||
current_data: Mapped[dict | None] = mapped_column(json_type)
|
||||
current_checksum: Mapped[str | None] = mapped_column(String(64))
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow, nullable=False)
|
||||
|
||||
snapshots: Mapped[list["EmployeeSnapshot"]] = relationship(back_populates="employee")
|
||||
tabs: Mapped[list["ProfileTab"]] = relationship(back_populates="employee", cascade="all, delete-orphan")
|
||||
|
||||
|
||||
class EmployeeSnapshot(Base):
|
||||
__tablename__ = "employee_snapshots"
|
||||
__table_args__ = (Index("ix_employee_snapshots_employee_id", "employee_id"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
employee_id: Mapped[int] = mapped_column(ForeignKey("employees.id"), nullable=False)
|
||||
crawl_run_id: Mapped[int | None] = mapped_column(ForeignKey("crawl_runs.id"))
|
||||
parsed_data: Mapped[dict] = mapped_column(json_type, nullable=False)
|
||||
html_snapshot: Mapped[bytes | None] = mapped_column(LargeBinary)
|
||||
checksum: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
parser_version: Mapped[str | None] = mapped_column(String(32))
|
||||
captured_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
|
||||
|
||||
employee: Mapped[Employee] = relationship(back_populates="snapshots")
|
||||
|
||||
|
||||
class CrawlRun(Base):
|
||||
__tablename__ = "crawl_runs"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
source_url: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
status: Mapped[str] = mapped_column(String(32), default="running", nullable=False)
|
||||
started_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
|
||||
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
found_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
||||
parsed_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
||||
error_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
||||
dismissed_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
||||
message: Mapped[str | None] = mapped_column(Text)
|
||||
|
||||
|
||||
class CrawlError(Base):
|
||||
__tablename__ = "crawl_errors"
|
||||
__table_args__ = (Index("ix_crawl_errors_run_id", "crawl_run_id"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
crawl_run_id: Mapped[int] = mapped_column(ForeignKey("crawl_runs.id"), nullable=False)
|
||||
profile_url: Mapped[str | None] = mapped_column(Text)
|
||||
error_type: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
message: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
|
||||
|
||||
|
||||
class ProfileTab(Base):
|
||||
__tablename__ = "profile_tabs"
|
||||
__table_args__ = (Index("ix_profile_tabs_employee_id", "employee_id"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
employee_id: Mapped[int] = mapped_column(ForeignKey("employees.id"), nullable=False)
|
||||
title: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
href: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
data_index: Mapped[str | None] = mapped_column(String(64))
|
||||
|
||||
employee: Mapped[Employee] = relationship(back_populates="tabs")
|
||||
|
||||
|
||||
class ParserSource(Base):
|
||||
__tablename__ = "parser_sources"
|
||||
__table_args__ = (UniqueConstraint("source_url", name="uq_parser_sources_source_url"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
source_url: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
enabled: Mapped[bool] = mapped_column(default=True, nullable=False)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, nullable=False)
|
||||
Reference in New Issue
Block a user