from bs4 import BeautifulSoup from requests import Session from app.parser.profile_url import normalize_profile_url def collect_profile_links(session: Session, source_url: str, headers: dict[str, str], timeout: int) -> list[str]: response = session.get(source_url, headers=headers, timeout=timeout) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") seen: set[str] = set() collected: list[str] = [] for anchor in soup.find_all("a", href=True): normalized = normalize_profile_url(anchor["href"]) if normalized and normalized not in seen: seen.add(normalized) collected.append(normalized) return collected