feature: improve admin directory and crawl progress
This commit is contained in:
98
tests/test_admin_data.py
Normal file
98
tests/test_admin_data.py
Normal file
@@ -0,0 +1,98 @@
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from app.models import CrawlRun, Employee
|
||||
from app.services.admin_data import employee_display_payload, list_employees_page, run_payload, stats_payload
|
||||
|
||||
|
||||
def test_employee_display_payload_extracts_common_fields(db_session):
|
||||
employee = Employee(
|
||||
profile_key="staff:person",
|
||||
canonical_url="https://www.hse.ru/staff/person",
|
||||
full_name="Person Name",
|
||||
status="active",
|
||||
first_seen_at=datetime.now(timezone.utc),
|
||||
last_seen_at=datetime.now(timezone.utc),
|
||||
current_data={
|
||||
"positions": ["Professor"],
|
||||
"hse_start_year": 2024,
|
||||
"contacts": {"emails": ["person@hse.ru"], "phones": ["+79990000000"], "address": "Moscow"},
|
||||
"sections": [
|
||||
{"type": "publications", "publications": [{"title": "Paper"}]},
|
||||
{"type": "courses_by_year", "courses": [{"title": "Course"}]},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
payload = employee_display_payload(employee)
|
||||
|
||||
assert payload["positions_text"] == "Professor"
|
||||
assert payload["email_text"] == "person@hse.ru"
|
||||
assert payload["publications_count"] == 1
|
||||
assert payload["courses_count"] == 1
|
||||
|
||||
|
||||
def test_list_employees_page_filters_sorts_and_paginates(db_session):
|
||||
db_session.add(
|
||||
Employee(
|
||||
profile_key="staff:b",
|
||||
canonical_url="https://www.hse.ru/staff/b",
|
||||
full_name="Beta",
|
||||
status="dismissed",
|
||||
first_seen_at=datetime.now(timezone.utc),
|
||||
last_seen_at=datetime.now(timezone.utc),
|
||||
current_data={"contacts": {"emails": []}},
|
||||
)
|
||||
)
|
||||
db_session.add(
|
||||
Employee(
|
||||
profile_key="staff:a",
|
||||
canonical_url="https://www.hse.ru/staff/a",
|
||||
full_name="Alpha",
|
||||
status="active",
|
||||
first_seen_at=datetime.now(timezone.utc),
|
||||
last_seen_at=datetime.now(timezone.utc),
|
||||
current_data={"contacts": {"emails": ["alpha@hse.ru"]}},
|
||||
)
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
page = list_employees_page(db_session, status="active", sort="full_name", direction="asc", limit=10)
|
||||
|
||||
assert page["total"] == 1
|
||||
assert page["items"][0]["full_name"] == "Alpha"
|
||||
|
||||
|
||||
def test_stats_payload_uses_latest_run_new_count(db_session):
|
||||
db_session.add(
|
||||
Employee(
|
||||
profile_key="staff:a",
|
||||
canonical_url="https://www.hse.ru/staff/a",
|
||||
full_name="Alpha",
|
||||
status="active",
|
||||
first_seen_at=datetime.now(timezone.utc),
|
||||
last_seen_at=datetime.now(timezone.utc),
|
||||
)
|
||||
)
|
||||
db_session.add(CrawlRun(source_url="https://miem.hse.ru/persons", status="completed", new_count=3))
|
||||
db_session.commit()
|
||||
|
||||
payload = stats_payload(db_session)
|
||||
|
||||
assert payload["total"] == 1
|
||||
assert payload["active"] == 1
|
||||
assert payload["new_in_last_run"] == 3
|
||||
|
||||
|
||||
def test_run_payload_calculates_progress():
|
||||
run = CrawlRun(
|
||||
source_url="https://miem.hse.ru/persons",
|
||||
status="running",
|
||||
found_count=10,
|
||||
parsed_count=4,
|
||||
error_count=1,
|
||||
)
|
||||
|
||||
payload = run_payload(run)
|
||||
|
||||
assert payload["processed_count"] == 5
|
||||
assert payload["progress_percent"] == 50.0
|
||||
@@ -8,7 +8,8 @@ from sqlalchemy.pool import StaticPool
|
||||
from app.config import Settings, get_settings
|
||||
from app.db import Base, get_db
|
||||
from app.main import app
|
||||
from app.models import Employee
|
||||
from app.models import CrawlRun, Employee
|
||||
from app.security import SESSION_COOKIE, sign_session
|
||||
|
||||
|
||||
def test_health_returns_versions():
|
||||
@@ -17,7 +18,7 @@ def test_health_returns_versions():
|
||||
response = client.get("/api/health")
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.json()["backend_version"] == "0.1.0"
|
||||
assert response.json()["backend_version"] == "0.2.0"
|
||||
|
||||
|
||||
def test_mcp_requires_token_and_lists_tools():
|
||||
@@ -105,3 +106,54 @@ def test_mcp_search_employees_returns_matching_employee():
|
||||
assert "Сергеев Алексей Викторович" in response.json()["result"]["content"][0]["text"]
|
||||
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
def test_api_employees_and_stats_require_admin_session():
|
||||
engine = create_engine(
|
||||
"sqlite:///:memory:",
|
||||
connect_args={"check_same_thread": False},
|
||||
poolclass=StaticPool,
|
||||
)
|
||||
Base.metadata.create_all(engine)
|
||||
Session = sessionmaker(bind=engine)
|
||||
db = Session()
|
||||
db.add(
|
||||
Employee(
|
||||
profile_key="staff:alpha",
|
||||
profile_type="staff",
|
||||
profile_id="alpha",
|
||||
canonical_url="https://www.hse.ru/staff/alpha",
|
||||
full_name="Alpha Person",
|
||||
status="active",
|
||||
first_seen_at=datetime.now(timezone.utc),
|
||||
last_seen_at=datetime.now(timezone.utc),
|
||||
current_data={"contacts": {"emails": ["alpha@hse.ru"]}, "sections": []},
|
||||
)
|
||||
)
|
||||
db.add(CrawlRun(source_url="https://miem.hse.ru/persons", status="completed", new_count=1))
|
||||
db.commit()
|
||||
db.close()
|
||||
|
||||
settings = Settings(admin_username="admin", admin_password="password", session_secret="session-secret")
|
||||
|
||||
def override_db():
|
||||
session = Session()
|
||||
try:
|
||||
yield session
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
app.dependency_overrides[get_db] = override_db
|
||||
app.dependency_overrides[get_settings] = lambda: settings
|
||||
client = TestClient(app)
|
||||
client.cookies.set(SESSION_COOKIE, sign_session("admin", settings))
|
||||
|
||||
employees = client.get("/api/employees", params={"q": "Alpha", "has_email": True})
|
||||
stats = client.get("/api/stats")
|
||||
|
||||
assert employees.status_code == 200
|
||||
assert employees.json()["total"] == 1
|
||||
assert stats.status_code == 200
|
||||
assert stats.json()["new_in_last_run"] == 1
|
||||
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from app.models import Employee
|
||||
from app.services.crawler import _mark_dismissed
|
||||
from app.models import CrawlRun, Employee
|
||||
from app.services.crawler import _mark_dismissed, _upsert_employee
|
||||
|
||||
|
||||
def test_mark_dismissed_only_marks_missing_active_employees(db_session):
|
||||
@@ -32,3 +32,27 @@ def test_mark_dismissed_only_marks_missing_active_employees(db_session):
|
||||
gone = db_session.query(Employee).filter_by(profile_key="staff:gone").one()
|
||||
assert gone.status == "dismissed"
|
||||
assert gone.dismissed_at is not None
|
||||
|
||||
|
||||
def test_upsert_employee_increments_new_count_for_new_employee(db_session):
|
||||
run = CrawlRun(source_url="https://miem.hse.ru/persons", status="running")
|
||||
db_session.add(run)
|
||||
db_session.commit()
|
||||
|
||||
_upsert_employee(
|
||||
db_session,
|
||||
run,
|
||||
{
|
||||
"source_url": "https://www.hse.ru/staff/newperson",
|
||||
"profile_type": "staff",
|
||||
"profile_id": "newperson",
|
||||
"full_name": "New Person",
|
||||
"tabs": [],
|
||||
"sections": [],
|
||||
"parser_version": "0.2.0",
|
||||
"_html": "<html></html>",
|
||||
},
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
assert run.new_count == 1
|
||||
|
||||
Reference in New Issue
Block a user