feature: add MIEM employees parser service with admin UI and MCP

This commit is contained in:
Anton
2026-04-28 16:20:51 +03:00
parent 6480f31e8f
commit d512580960
29 changed files with 1883 additions and 0 deletions

19
app/parser/collector.py Normal file
View File

@@ -0,0 +1,19 @@
from bs4 import BeautifulSoup
from requests import Session
from app.parser.profile_url import normalize_profile_url
def collect_profile_links(session: Session, source_url: str, headers: dict[str, str], timeout: int) -> list[str]:
response = session.get(source_url, headers=headers, timeout=timeout)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
seen: set[str] = set()
collected: list[str] = []
for anchor in soup.find_all("a", href=True):
normalized = normalize_profile_url(anchor["href"])
if normalized and normalized not in seen:
seen.add(normalized)
collected.append(normalized)
return collected