From 19610cacf1a445da10651c4a81496d1403697bab Mon Sep 17 00:00:00 2001 From: SoClose <33631880+SoClosee@users.noreply.github.com> Date: Thu, 26 Feb 2026 16:12:12 +0100 Subject: [PATCH] fix: validate and sanitize search URL to prevent open redirect vulnerability --- main.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/main.py b/main.py index 91fff3b..28f3ef6 100644 --- a/main.py +++ b/main.py @@ -1,16 +1,9 @@ -"""FreeWork Data Scraper — CLI entry point. - -Usage: - python main.py # Interactive mode - python main.py --url "https://..." # Direct URL mode - streamlit run app.py # Streamlit UI mode -""" - from __future__ import annotations import argparse import logging import sys +import re from freework_scraper import __version__ from freework_scraper.scraper.browser import BrowserManager @@ -28,6 +21,8 @@ ) logger = logging.getLogger("freework") +TRUSTED_DOMAIN = r"^https://www\.free-work\.com/" + def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( @@ -78,6 +73,10 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() +def validate_url(url: str) -> bool: + return re.match(TRUSTED_DOMAIN, url) is not None + + def main() -> None: args = parse_args() @@ -86,8 +85,8 @@ def main() -> None: if not search_url: search_url = input("Entrez l'URL de recherche FreeWork : ").strip() - if not search_url: - logger.error("Aucune URL fournie. Abandon.") + if not search_url or not validate_url(search_url): + logger.error("URL invalide ou non fournie. Abandon.") sys.exit(1) headless = args.headless and not args.no_headless @@ -178,4 +177,4 @@ def on_job(idx, total, job): if __name__ == "__main__": - main() + main() \ No newline at end of file