#!/usr/bin/env python3
"""
Site Scanner v3.0 — website vulnerability scanner.
Author: qpus.su

Usage:
    python scanner.py https://example.com
    python scanner.py urls.txt
    python scanner.py https://site1.com https://site2.com --full
    python scanner.py qpus.su --full --json report.json
"""

import argparse
import base64
import io
import json
import re
import socket
import ssl
import sys
import threading
import time

if sys.stdout.encoding and sys.stdout.encoding.lower().replace("-", "") not in ("utf8", "utf8"):
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")
import urllib.parse
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass, field
from datetime import datetime, timezone
from enum import Enum
from pathlib import Path

try:
    import requests
    from requests.adapters import HTTPAdapter
    from urllib3.util.retry import Retry
except ImportError:
    print("Required: pip install requests")
    sys.exit(1)

try:
    import dns.resolver
    HAS_DNS = True
except ImportError:
    HAS_DNS = False



class Severity(Enum):
    INFO = "INFO"
    LOW = "LOW"
    MEDIUM = "MEDIUM"
    HIGH = "HIGH"
    CRITICAL = "CRITICAL"

    @property
    def color(self):
        return {
            "INFO": "\033[94m",
            "LOW": "\033[96m",
            "MEDIUM": "\033[93m",
            "HIGH": "\033[91m",
            "CRITICAL": "\033[95m",
        }[self.value]


class Confidence(Enum):
    """How confident we are that the finding is a real issue (not a false positive).

    Confirmed — deterministic check, no doubt (header missing, cert expired, port open).
    Firm      — strong evidence but some edge cases possible (CSP weakness, cookie flags).
    Tentative — heuristic / indirect evidence, manual verification recommended
                (DOM XSS static analysis, timing attacks, bucket guessing).
    """
    CONFIRMED = "Confirmed"
    FIRM = "Firm"
    TENTATIVE = "Tentative"

    @property
    def weight(self) -> float:
        """Multiplier applied to severity penalty in scoring.

        Confirmed findings pay full penalty.
        Firm findings pay 75% of the penalty.
        Tentative findings pay 30% — still visible, but don't tank the score.
        """
        return {
            "Confirmed": 1.0,
            "Firm": 0.75,
            "Tentative": 0.3,
        }[self.value]


RESET = "\033[0m"
BOLD = "\033[1m"
GREEN = "\033[92m"
RED = "\033[91m"
GRAY = "\033[90m"
YELLOW = "\033[93m"


@dataclass
class Finding:
    category: str
    title: str
    severity: Severity
    description: str
    recommendation: str = ""
    evidence: str = ""
    confidence: Confidence = Confidence.CONFIRMED


@dataclass
class ScanResult:
    url: str
    timestamp: str = ""
    duration: float = 0.0
    findings: list = field(default_factory=list)
    errors: list = field(default_factory=list)
    spa_hash: str = ""
    spa_size: int = 0
    _lock: threading.Lock = field(default_factory=threading.Lock, repr=False)

    def add(self, f: Finding):
        with self._lock:
            self.findings.append(f)

    @property
    def summary(self):
        counts = defaultdict(int)
        for f in self.findings:
            counts[f.severity.value] += 1
        return dict(counts)


def _is_spa_response(resp: requests.Response, result: ScanResult, *, threshold: float = 0.95) -> bool:
    """Check if response is the same SPA shell page as the homepage.

    Returns True if the response body hash matches the homepage hash exactly,
    or if the size is very close (within threshold) and Content-Type is HTML.
    This helps filter out false positives on SPA sites that return index.html for all routes.
    """
    if not result.spa_hash:
        return False
    import hashlib
    body = resp.content
    body_hash = hashlib.sha256(body).hexdigest()
    if body_hash == result.spa_hash:
        return True
    # Fuzzy match: same size range + HTML content type (SPA with dynamic nonces etc.)
    if result.spa_size > 0 and len(body) > 1000:
        ct = resp.headers.get("Content-Type", "")
        if "text/html" in ct and abs(len(body) - result.spa_size) / result.spa_size < (1 - threshold):
            return True
    return False




class _RateLimiter:
    """Global rate limiter: max N concurrent requests + min delay between requests."""
    def __init__(self, max_concurrent: int = 5, min_delay: float = 0.1):
        self._sem = threading.Semaphore(max_concurrent)
        self._delay = min_delay
        self._lock = threading.Lock()
        self._last = 0.0

    def acquire(self):
        self._sem.acquire()
        with self._lock:
            now = time.time()
            wait = self._last + self._delay - now
            if wait > 0:
                time.sleep(wait)
            self._last = time.time()

    def release(self):
        self._sem.release()


# Global rate limiter — shared across all threads in a scan
_rate_limiter = _RateLimiter(max_concurrent=5, min_delay=0.08)


class _ThrottledSession(requests.Session):
    """Session wrapper that rate-limits all HTTP requests."""
    def __init__(self, base_session: requests.Session, limiter: _RateLimiter):
        super().__init__()
        self.headers.update(base_session.headers)
        self.adapters = base_session.adapters.copy()
        self._limiter = limiter

    def request(self, method, url, **kwargs):
        self._limiter.acquire()
        try:
            return super().request(method, url, **kwargs)
        finally:
            self._limiter.release()


def make_session(throttle: bool = True) -> requests.Session:
    s = requests.Session()
    retry = Retry(total=2, backoff_factor=0.5, status_forcelist=[502, 503, 504])
    adapter = HTTPAdapter(max_retries=retry)
    s.mount("https://", adapter)
    s.mount("http://", adapter)
    s.headers["User-Agent"] = (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
        "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
    )
    if throttle:
        return _ThrottledSession(s, _rate_limiter)
    return s







WEAK_CIPHERS = {
    "RC4", "DES", "3DES", "NULL", "EXPORT", "anon", "MD5",
    "RC2", "IDEA", "SEED", "CAMELLIA128",
}

def check_ssl(url: str, result: ScanResult):
    """Check SSL/TLS certificate, protocol, ciphers, chain, SAN, OCSP."""
    parsed = urllib.parse.urlparse(url)
    host = parsed.hostname
    port = parsed.port or (443 if parsed.scheme == "https" else 80)

    if parsed.scheme != "https":
        result.add(Finding(
            "SSL/TLS", "Site uses plain HTTP",
            Severity.HIGH,
            "Site does not use HTTPS. All traffic is unencrypted.",
            "Configure an SSL certificate (Let's Encrypt) and redirect HTTP to HTTPS.",
            evidence=f"URL scheme: {parsed.scheme}://",
            confidence=Confidence.CONFIRMED
        ))
        return

    try:
        ctx = ssl.create_default_context()
        with socket.create_connection((host, port), timeout=10) as sock:
            with ctx.wrap_socket(sock, server_hostname=host) as ssock:
                cert = ssock.getpeercert()
                cipher = ssock.cipher()  # (name, protocol, bits)
                protocol = ssock.version()
                not_after = ssl.cert_time_to_seconds(cert["notAfter"])
                days_left = (not_after - time.time()) / 86400
                if days_left < 0:
                    result.add(Finding("SSL/TLS", "Certificate expired", Severity.CRITICAL,
                        f"SSL certificate expired on {cert['notAfter']}.",
                        "Renew the certificate immediately.",
                        evidence=f"notAfter: {cert['notAfter']} | Days: {int(days_left)}",
                        confidence=Confidence.CONFIRMED))
                elif days_left < 14:
                    result.add(Finding("SSL/TLS", "Certificate expiring soon", Severity.HIGH,
                        f"Certificate expires in {int(days_left)} days ({cert['notAfter']}).",
                        "Renew the certificate or set up auto-renewal.",
                        evidence=f"notAfter: {cert['notAfter']} | Days left: {int(days_left)}",
                        confidence=Confidence.CONFIRMED))
                elif days_left < 30:
                    result.add(Finding("SSL/TLS", "Certificate expires in < 30 days", Severity.MEDIUM,
                        f"{int(days_left)} days until expiration ({cert['notAfter']}).",
                        "Schedule certificate renewal.",
                        evidence=f"notAfter: {cert['notAfter']} | Days left: {int(days_left)}",
                        confidence=Confidence.CONFIRMED))
                else:
                    result.add(Finding("SSL/TLS", "Certificate valid", Severity.INFO,
                        f"Valid for {int(days_left)} more days (until {cert['notAfter']}).",
                        evidence=f"notAfter: {cert['notAfter']} | Days left: {int(days_left)}",
                        confidence=Confidence.CONFIRMED))
                if protocol in ("TLSv1", "TLSv1.1"):
                    result.add(Finding("SSL/TLS", f"Outdated protocol {protocol}", Severity.HIGH,
                        f"{protocol} is considered insecure.",
                        "Disable TLS 1.0/1.1, use TLS 1.2+.",
                        evidence=f"Negotiated protocol: {protocol}",
                        confidence=Confidence.CONFIRMED))
                elif protocol == "TLSv1.3":
                    result.add(Finding("SSL/TLS", "TLS 1.3", Severity.INFO, "TLS 1.3 in use.",
                        confidence=Confidence.CONFIRMED))
                else:
                    result.add(Finding("SSL/TLS", f"Protocol: {protocol}", Severity.INFO, f"{protocol} in use.",
                        confidence=Confidence.CONFIRMED))
                if cipher:
                    cipher_name, _, bits = cipher
                    weak = [w for w in WEAK_CIPHERS if w.lower() in cipher_name.lower()]
                    if weak:
                        result.add(Finding("SSL/TLS", f"Weak cipher: {cipher_name}", Severity.HIGH,
                            f"Cipher {cipher_name} ({bits}-bit) contains weak components: {', '.join(weak)}.",
                            "Configure server to use strong ciphers only (AES-GCM, ChaCha20).",
                            evidence=f"Cipher: {cipher_name} | Bits: {bits} | Weak: {', '.join(weak)}",
                            confidence=Confidence.CONFIRMED))
                    elif bits and bits < 128:
                        result.add(Finding("SSL/TLS", f"Short cipher key: {bits}-bit", Severity.HIGH,
                            f"Cipher {cipher_name} uses a {bits}-bit key.",
                            "Use ciphers with at least 128-bit keys.",
                            confidence=Confidence.CONFIRMED))
                    else:
                        result.add(Finding("SSL/TLS", f"Cipher: {cipher_name} ({bits}-bit)", Severity.INFO,
                            f"Cipher: {cipher_name}.",
                            confidence=Confidence.CONFIRMED))
                sans = []
                for _type, value in cert.get("subjectAltName", []):
                    sans.append(value)
                if sans:
                    if host not in sans and not any(
                        s.startswith("*.") and host.endswith(s[1:]) for s in sans
                    ):
                        result.add(Finding("SSL/TLS", "Host not in certificate SAN", Severity.HIGH,
                            f"Host {host} not found in SAN: {', '.join(sans[:5])}.",
                            "Issue a certificate that covers all used domains.",
                            evidence=f"Host: {host} | SAN: {', '.join(sans[:5])}",
                            confidence=Confidence.CONFIRMED))
                issuer = dict(x[0] for x in cert.get("issuer", []))
                issuer_cn = issuer.get("commonName", "")
                issuer_o = issuer.get("organizationName", "")
                subject = dict(x[0] for x in cert.get("subject", []))
                subject_cn = subject.get("commonName", "")
                if issuer_cn == subject_cn and "let's encrypt" not in issuer_o.lower():
                    result.add(Finding("SSL/TLS", "Possibly self-signed certificate", Severity.HIGH,
                        f"Issuer CN ({issuer_cn}) matches Subject CN ({subject_cn}).",
                        "Use a certificate from a trusted CA (Let's Encrypt, etc.).",
                        evidence=f"Issuer: {issuer_cn} ({issuer_o}) | Subject: {subject_cn}",
                        confidence=Confidence.FIRM))

    except ssl.SSLCertVerificationError as e:
        result.add(Finding("SSL/TLS", "Certificate verification error", Severity.CRITICAL,
            f"Certificate validation failed: {e}",
            "Ensure the certificate is issued by a trusted CA and matches the domain.",
            evidence=f"SSL error: {str(e)[:200]}",
            confidence=Confidence.CONFIRMED))
    except Exception as e:
        result.errors.append(f"SSL check error: {e}")
    import warnings
    for proto_name, proto_const in [("TLSv1", ssl.TLSVersion.TLSv1), ("TLSv1.1", ssl.TLSVersion.TLSv1_1)]:
        try:
            ctx2 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
            ctx2.check_hostname = False
            ctx2.verify_mode = ssl.CERT_NONE
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", DeprecationWarning)
                ctx2.minimum_version = proto_const
                ctx2.maximum_version = proto_const
            with socket.create_connection((host, port), timeout=5) as s2:
                with ctx2.wrap_socket(s2, server_hostname=host) as ss2:
                    result.add(Finding("SSL/TLS", f"Server supports {proto_name}", Severity.HIGH,
                        f"Server accepts {proto_name} connections.",
                        f"Disable {proto_name} support on the server.",
                        evidence=f"Successfully connected with {proto_name}",
                        confidence=Confidence.CONFIRMED))
        except Exception:
            pass




def check_headers(session: requests.Session, url: str, result: ScanResult):
    """Check security headers, CSP, cookie flags, cache-control."""
    try:
        resp = session.get(url, timeout=15, allow_redirects=True)
    except requests.RequestException as e:
        result.errors.append(f"Headers check error: {e}")
        return

    headers = resp.headers
    headers_lower = {k.lower(): v for k, v in headers.items()}
    security_headers = {
        "Strict-Transport-Security": {
            "severity": Severity.MEDIUM,
            "desc": "HSTS not configured — on first visit, browser may load site over HTTP.",
            "rec": "Add: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload"
        },
        "X-Content-Type-Options": {
            "severity": Severity.MEDIUM,
            "desc": "Browser may interpret files as a different MIME type (MIME sniffing).",
            "rec": "Add: X-Content-Type-Options: nosniff"
        },
            "Content-Security-Policy": {
            "severity": Severity.MEDIUM,
            "desc": "No CSP — increased XSS risk.",
            "rec": "Configure Content-Security-Policy to restrict script sources."
        },
        "X-XSS-Protection": {
            "severity": Severity.INFO,
            "desc": "X-XSS-Protection not set (deprecated — modern browsers ignore it, use CSP instead).",
            "rec": ""
        },
        "Referrer-Policy": {
            "severity": Severity.LOW,
            "desc": "No Referrer-Policy — URLs may leak on navigation.",
            "rec": "Add: Referrer-Policy: strict-origin-when-cross-origin"
        },
        "Permissions-Policy": {
            "severity": Severity.LOW,
            "desc": "Browser permissions not restricted (camera, mic, geolocation).",
            "rec": "Add Permissions-Policy to restrict browser API access."
        },
        "Cross-Origin-Opener-Policy": {
            "severity": Severity.INFO,
            "desc": "No COOP — cross-origin isolation not configured.",
            "rec": "Recommended: Cross-Origin-Opener-Policy: same-origin"
        },
        "Cross-Origin-Resource-Policy": {
            "severity": Severity.INFO,
            "desc": "No CORP — cross-origin resource isolation not configured.",
            "rec": "Recommended: Cross-Origin-Resource-Policy: same-origin"
        },
        "Cross-Origin-Embedder-Policy": {
            "severity": Severity.INFO,
            "desc": "No COEP — cross-origin embedder isolation not configured.",
            "rec": "Recommended: Cross-Origin-Embedder-Policy: require-corp"
        },
    }

    present_headers_str = ", ".join(k for k in headers_lower.keys())[:200]
    # Headers that need value validation, not just presence check
    _validated_headers = {"strict-transport-security", "x-content-type-options"}
    for header, info in security_headers.items():
        h_lower = header.lower()
        if h_lower not in headers_lower:
            result.add(Finding("HTTP Headers", f"Missing {header}",
                info["severity"], info["desc"], info["rec"],
                evidence=f"Response headers: {present_headers_str}",
                confidence=Confidence.CONFIRMED))
        elif h_lower not in _validated_headers:
            result.add(Finding("HTTP Headers", f"{header} present",
                Severity.INFO, f"{header} header is set.",
                evidence=f"{header}: {headers_lower[h_lower][:150]}",
                confidence=Confidence.CONFIRMED))
    csp = headers_lower.get("content-security-policy", "")
    if csp:
        csp_issues = []
        if "'unsafe-inline'" in csp:
            csp_issues.append("unsafe-inline (allows inline scripts/styles)")
        if "'unsafe-eval'" in csp:
            csp_issues.append("unsafe-eval (allows eval())")
        if "data:" in csp:
            csp_issues.append("data: URI (XSS possible via data:)")
        if "*" in csp.split():
            csp_issues.append("wildcard (*) in directive")
        if "default-src" not in csp and "script-src" not in csp:
            csp_issues.append("missing default-src and script-src")
        if "frame-ancestors" not in csp:
            csp_issues.append("missing frame-ancestors (clickjacking)")
        if "upgrade-insecure-requests" not in csp:
            csp_issues.append("missing upgrade-insecure-requests")
        if "base-uri" not in csp:
            csp_issues.append("missing base-uri (base tag injection)")
        if "form-action" not in csp:
            csp_issues.append("missing form-action (forms can submit anywhere)")
        if "object-src" not in csp and "'none'" not in csp:
            csp_issues.append("missing object-src (Flash/Java plugins)")

        if csp_issues:
            result.add(Finding("CSP Analysis", f"CSP issues ({len(csp_issues)})", Severity.MEDIUM,
                "Weaknesses found: " + "; ".join(csp_issues) + ".",
                "Tighten CSP: remove unsafe-inline/unsafe-eval, add missing directives.",
                confidence=Confidence.FIRM))
    hsts = headers_lower.get("strict-transport-security", "")
    if hsts:
        max_age_match = re.search(r"max-age=(\d+)", hsts)
        if not max_age_match:
            result.add(Finding("HTTP Headers", "HSTS invalid: no max-age", Severity.HIGH,
                f"Strict-Transport-Security header present but has no valid max-age directive. "
                "Browser will ignore it.",
                "Add: Strict-Transport-Security: max-age=31536000; includeSubDomains; preload",
                evidence=f"Strict-Transport-Security: {hsts[:200]}",
                confidence=Confidence.CONFIRMED))
        else:
            max_age = int(max_age_match.group(1))
            if max_age == 0:
                result.add(Finding("HTTP Headers", "HSTS max-age=0 (disabled)", Severity.HIGH,
                    "max-age=0 effectively disables HSTS.",
                    "Set max-age=31536000 (1 year) or higher.",
                    evidence=f"Strict-Transport-Security: {hsts[:200]}",
                    confidence=Confidence.CONFIRMED))
            else:
                result.add(Finding("HTTP Headers", "Strict-Transport-Security present",
                    Severity.INFO, "HSTS configured.",
                    evidence=f"Strict-Transport-Security: {hsts[:200]}",
                    confidence=Confidence.CONFIRMED))
                if max_age < 31536000:
                    result.add(Finding("HTTP Headers", f"HSTS max-age too short: {max_age}", Severity.LOW,
                        f"max-age={max_age} (less than 1 year).",
                        "Set max-age=31536000 (1 year) or higher.",
                        evidence=f"Strict-Transport-Security: {hsts[:200]}",
                        confidence=Confidence.CONFIRMED))
            if "includesubdomains" not in hsts.lower():
                result.add(Finding("HTTP Headers", "HSTS without includeSubDomains", Severity.LOW,
                    "HSTS does not cover subdomains.",
                    "Add includeSubDomains to HSTS.",
                    confidence=Confidence.CONFIRMED))
            if "preload" not in hsts.lower():
                result.add(Finding("HTTP Headers", "HSTS without preload", Severity.LOW,
                    "Site cannot be added to browser HSTS Preload List.",
                    "Add preload and submit to hstspreload.org.",
                    confidence=Confidence.CONFIRMED))
    leak_headers = {
        "Server": "server software",
        "X-Powered-By": "server framework",
        "X-AspNet-Version": "ASP.NET version",
        "X-AspNetMvc-Version": "ASP.NET MVC version",
        "X-Generator": "generator/CMS",
        "X-Drupal-Cache": "Drupal CMS",
        "X-Varnish": "Varnish cache",
        "X-Runtime": "processing time (Ruby/Python)",
        "X-Debug-Token": "debug token (Symfony)",
        "X-Debug-Token-Link": "profiler link",
    }
    for hdr, what in leak_headers.items():
        val = headers.get(hdr)
        if val:
            sev = Severity.MEDIUM if "debug" in hdr.lower() else Severity.LOW
            result.add(Finding("HTTP Headers", f"Leaks: {hdr}: {val}", sev,
                f"Header {hdr} exposes {what}.",
                f"Remove or mask the {hdr} header.",
                evidence=f"{hdr}: {val[:150]}",
                confidence=Confidence.CONFIRMED))
    cc = headers_lower.get("cache-control", "")
    if not cc:
        result.add(Finding("HTTP Headers", "Missing Cache-Control", Severity.LOW,
            "No Cache-Control set — proxies may cache the page.",
            "For sensitive pages: Cache-Control: no-store, no-cache, must-revalidate",
            confidence=Confidence.CONFIRMED))
    elif "no-store" not in cc and "private" not in cc:
        result.add(Finding("HTTP Headers", "Cache-Control without no-store/private", Severity.LOW,
            f"Cache-Control: {cc} — responses may be cached publicly.",
            "For sensitive pages, add no-store or private.",
            confidence=Confidence.CONFIRMED))
    xcto = headers_lower.get("x-content-type-options", "")
    if xcto:
        if xcto.strip().lower() == "nosniff":
            result.add(Finding("HTTP Headers", "X-Content-Type-Options present",
                Severity.INFO, "X-Content-Type-Options: nosniff — MIME sniffing blocked.",
                evidence=f"X-Content-Type-Options: {xcto}",
                confidence=Confidence.CONFIRMED))
        else:
            result.add(Finding("HTTP Headers", f"Invalid X-Content-Type-Options: {xcto}", Severity.MEDIUM,
                f"Value '{xcto.strip()}' is not recognized by browsers. Must be exactly 'nosniff'.",
                "Set: X-Content-Type-Options: nosniff",
                evidence=f"X-Content-Type-Options: {xcto}",
                confidence=Confidence.CONFIRMED))
    set_cookie_headers = resp.raw.headers.getlist("Set-Cookie") if hasattr(resp.raw.headers, 'getlist') else []
    if not set_cookie_headers:
        sc = headers.get("Set-Cookie")
        if sc:
            set_cookie_headers = [sc]

    seen_cookies = set()
    for sc_header in set_cookie_headers:
        sc_lower = sc_header.lower()
        name = sc_header.split("=")[0].strip()
        if name in seen_cookies:
            continue
        seen_cookies.add(name)
        issues = []
        if "secure" not in sc_lower:
            issues.append("Secure")
        if "httponly" not in sc_lower:
            issues.append("HttpOnly")
        if "samesite" not in sc_lower:
            issues.append("SameSite")

        if issues:
            result.add(Finding("Cookies", f"Cookie '{name}' missing: {', '.join(issues)}", Severity.MEDIUM,
                f"Cookie {name} lacks attributes: {', '.join(issues)}.",
                f"Set {', '.join(issues)} attributes on this cookie.",
                evidence=f"Set-Cookie: {sc_header[:150]}",
                confidence=Confidence.CONFIRMED))

        if name.startswith("__Host-"):
            if "secure" not in sc_lower or "path=/" not in sc_lower.replace(" ", ""):
                result.add(Finding("Cookies", f"__Host- cookie '{name}' misconfigured", Severity.MEDIUM,
                    "__Host- cookie must have Secure, Path=/ and no Domain.",
                    "Fix __Host- cookie attributes.",
                    evidence=f"Set-Cookie: {sc_header[:150]}",
                    confidence=Confidence.CONFIRMED))
        elif name.startswith("__Secure-"):
            if "secure" not in sc_lower:
                result.add(Finding("Cookies", f"__Secure- cookie '{name}' without Secure", Severity.MEDIUM,
                    "__Secure- cookie must have the Secure attribute.",
                    "Add Secure to __Secure- cookie.",
                    evidence=f"Set-Cookie: {sc_header[:150]}",
                    confidence=Confidence.CONFIRMED))




def check_http_redirect(session: requests.Session, url: str, result: ScanResult):
    """Check HTTP->HTTPS redirect."""
    parsed = urllib.parse.urlparse(url)
    if parsed.scheme != "https":
        return

    http_url = url.replace("https://", "http://", 1)
    try:
        resp = session.get(http_url, timeout=10, allow_redirects=False)
        if resp.status_code in (301, 302, 307, 308):
            location = resp.headers.get("Location", "")
            if location.startswith("https://"):
                result.add(Finding("Redirects", "HTTP->HTTPS redirect configured", Severity.INFO,
                    f"HTTP redirects to {location}.",
                    evidence=f"HTTP {resp.status_code} | Location: {location[:150]}",
                    confidence=Confidence.CONFIRMED))
                if resp.status_code != 301:
                    result.add(Finding("Redirects", f"Redirect {resp.status_code} instead of 301", Severity.LOW,
                        "301 (Permanent) is recommended for HTTP->HTTPS.",
                        "Change to 301.",
                        evidence=f"HTTP {resp.status_code} | Location: {location[:150]}",
                        confidence=Confidence.CONFIRMED))
            elif location.startswith("/") or not location.startswith("http"):
                # Relative redirect — follow the chain to check if it ends up on HTTPS
                try:
                    resp2 = session.get(http_url, timeout=10, allow_redirects=True)
                    final_url = resp2.url
                    if final_url.startswith("https://"):
                        result.add(Finding("Redirects", "HTTP->HTTPS redirect configured", Severity.INFO,
                            f"HTTP redirects to HTTPS (via {location}).",
                            evidence=f"HTTP {resp.status_code} -> {location} -> {final_url[:100]}",
                            confidence=Confidence.CONFIRMED))
                    else:
                        result.add(Finding("Redirects", "HTTP redirect not to HTTPS", Severity.MEDIUM,
                            f"HTTP redirects to {final_url}, not HTTPS.",
                            "Configure HTTP->HTTPS redirect.",
                            evidence=f"HTTP {resp.status_code} -> {location} -> {final_url[:100]}",
                            confidence=Confidence.CONFIRMED))
                except requests.RequestException:
                    result.add(Finding("Redirects", "HTTP redirect not to HTTPS", Severity.MEDIUM,
                        f"HTTP redirects to {location}, not HTTPS.",
                        "Configure HTTP->HTTPS redirect.",
                        evidence=f"HTTP {resp.status_code} | Location: {location[:150]}",
                        confidence=Confidence.CONFIRMED))
            else:
                result.add(Finding("Redirects", "HTTP redirect not to HTTPS", Severity.MEDIUM,
                    f"HTTP redirects to {location}, not HTTPS.",
                    "Configure HTTP->HTTPS redirect.",
                    evidence=f"HTTP {resp.status_code} | Location: {location[:150]}",
                    confidence=Confidence.CONFIRMED))
        else:
            result.add(Finding("Redirects", "No HTTP->HTTPS redirect", Severity.HIGH,
                f"HTTP version returns {resp.status_code} without redirect.",
                "Configure HTTP->HTTPS redirect (301).",
                evidence=f"HTTP {resp.status_code} | No Location header",
                confidence=Confidence.CONFIRMED))
    except requests.RequestException:
        pass




def check_sensitive_paths(session: requests.Session, url: str, result: ScanResult):
    """Check ~80 sensitive files and paths."""
    sensitive = [
        (".env", "Environment variables — passwords and keys", "CRITICAL"),
        (".env.bak", "Backup .env", "CRITICAL"),
        (".env.old", "Old .env", "CRITICAL"),
        (".env.local", "Local .env", "CRITICAL"),
        (".env.production", "Production .env", "CRITICAL"),
        (".git/config", "Git configuration", "CRITICAL"),
        (".git/HEAD", "Git HEAD", "CRITICAL"),
        (".git/logs/HEAD", "Git reflog — commit history", "CRITICAL"),
        (".gitignore", "Gitignore — reveals project structure", "LOW"),
        (".svn/entries", "SVN metadata", "HIGH"),
        (".hg/dirstate", "Mercurial metadata", "HIGH"),
        (".bzr/README", "Bazaar repository", "HIGH"),
        (".htaccess", "Apache config", "MEDIUM"),
        (".htpasswd", "Apache passwords", "CRITICAL"),
        ("wp-config.php", "WordPress config", "CRITICAL"),
        ("wp-config.php.bak", "WordPress config backup", "CRITICAL"),
        ("wp-config.php~", "WordPress editor backup", "CRITICAL"),
        ("configuration.php", "Joomla config", "CRITICAL"),
        ("config.php", "PHP config", "HIGH"),
        ("config.yml", "YAML config", "HIGH"),
        ("config.yaml", "YAML config", "HIGH"),
        ("config.json", "JSON config", "HIGH"),
        ("config.xml", "XML config", "HIGH"),
        ("config.inc.php", "PHP include config", "CRITICAL"),
        ("settings.py", "Django settings", "CRITICAL"),
        ("local_settings.py", "Django local settings", "CRITICAL"),
        ("web.config", "IIS/ASP.NET config", "HIGH"),
        ("appsettings.json", "ASP.NET config", "HIGH"),
        ("appsettings.Development.json", "ASP.NET dev config", "CRITICAL"),
        ("database.yml", "Database config (Rails)", "CRITICAL"),
        (".env.development", "Dev environment variables", "CRITICAL"),
        ("composer.json", "PHP dependencies", "LOW"),
        ("composer.lock", "PHP lock file", "LOW"),
        ("package.json", "Node.js dependencies", "LOW"),
        ("package-lock.json", "Node.js lock file", "LOW"),
        ("yarn.lock", "Yarn lock file", "LOW"),
        ("Gemfile", "Ruby dependencies", "LOW"),
        ("requirements.txt", "Python dependencies", "LOW"),
        ("Pipfile", "Python Pipenv", "LOW"),
        ("go.mod", "Go modules", "LOW"),
        ("Cargo.toml", "Rust dependencies", "LOW"),
        ("Dockerfile", "Docker config", "MEDIUM"),
        ("docker-compose.yml", "Docker Compose", "MEDIUM"),
        ("docker-compose.yaml", "Docker Compose", "MEDIUM"),
        (".dockerignore", "Docker ignore", "LOW"),
        ("Makefile", "Makefile — build commands", "LOW"),
        ("Vagrantfile", "Vagrant config", "MEDIUM"),
        (".travis.yml", "Travis CI", "MEDIUM"),
        (".gitlab-ci.yml", "GitLab CI", "MEDIUM"),
        ("Jenkinsfile", "Jenkins pipeline", "MEDIUM"),
        (".circleci/config.yml", "CircleCI config", "MEDIUM"),

        ("phpinfo.php", "PHP info", "HIGH"),
        ("info.php", "PHP info", "HIGH"),
        ("test.php", "Test PHP file", "MEDIUM"),
        ("server-status", "Apache status", "HIGH"),
        ("server-info", "Apache info", "HIGH"),
        ("nginx.conf", "Nginx config", "HIGH"),
        ("crossdomain.xml", "Flash crossdomain policy (Flash is EOL)", "LOW"),
        ("clientaccesspolicy.xml", "Silverlight access policy (Silverlight is EOL)", "LOW"),

        ("dump.sql", "SQL dump", "CRITICAL"),
        ("database.sql", "SQL dump", "CRITICAL"),
        ("db.sql", "SQL dump", "CRITICAL"),
        ("backup.sql", "SQL backup", "CRITICAL"),
        ("data.sql", "SQL data", "CRITICAL"),
        ("backup.zip", "ZIP backup", "CRITICAL"),
        ("backup.tar.gz", "TAR backup", "CRITICAL"),
        ("backup.tar", "TAR backup", "CRITICAL"),
        ("site.zip", "Site ZIP archive", "CRITICAL"),
        ("www.zip", "Site ZIP archive", "CRITICAL"),
        ("web.zip", "Site ZIP archive", "CRITICAL"),

        ("admin/", "Admin panel", "MEDIUM"),
        ("admin/login", "Admin login", "MEDIUM"),
        ("administrator/", "Admin panel", "MEDIUM"),
        ("wp-admin/", "WordPress admin", "MEDIUM"),
        ("wp-login.php", "WordPress login", "MEDIUM"),
        ("user/login", "Login page", "MEDIUM"),
        ("login", "Login page", "MEDIUM"),
        ("panel/", "Control panel", "MEDIUM"),
        ("dashboard/", "Dashboard", "MEDIUM"),
        ("cpanel/", "cPanel", "MEDIUM"),
        ("phpmyadmin/", "phpMyAdmin", "HIGH"),
        ("pma/", "phpMyAdmin", "HIGH"),
        ("adminer.php", "Adminer (DB tool)", "HIGH"),
        ("adminer/", "Adminer", "HIGH"),

        ("backup/", "Backup directory", "MEDIUM"),
        ("backups/", "Backup directory", "MEDIUM"),
        ("debug/", "Debug mode", "MEDIUM"),
        ("test/", "Test directory", "LOW"),
        ("temp/", "Temp files", "LOW"),
        ("tmp/", "Temp files", "LOW"),
        ("logs/", "Log directory", "MEDIUM"),
        ("log/", "Log directory", "MEDIUM"),
        ("uploads/", "Upload directory", "LOW"),
        ("private/", "Private directory", "MEDIUM"),
        ("secret/", "Secret directory", "MEDIUM"),
        ("api/", "API endpoint", "INFO"),
        ("api/v1/", "API v1", "INFO"),
        ("api/v2/", "API v2", "INFO"),
        ("graphql", "GraphQL endpoint", "INFO"),
        ("swagger/", "Swagger UI", "MEDIUM"),
        ("swagger.json", "Swagger JSON", "MEDIUM"),
        ("swagger.yaml", "Swagger YAML", "MEDIUM"),
        ("api-docs/", "API documentation", "MEDIUM"),
        ("openapi.json", "OpenAPI spec", "MEDIUM"),
        ("docs/", "Documentation", "LOW"),
        ("ftp/", "FTP directory listing", "MEDIUM"),
        (".well-known/openid-configuration", "OpenID configuration", "INFO"),

        (".DS_Store", "macOS metadata", "MEDIUM"),
        ("Thumbs.db", "Windows metadata", "LOW"),
        ("robots.txt", "robots.txt", "INFO"),
        ("sitemap.xml", "Sitemap", "INFO"),
        (".well-known/security.txt", "security.txt", "INFO"),
        ("humans.txt", "humans.txt", "INFO"),
        ("favicon.ico", "Favicon", "INFO"),
        ("elmah.axd", "ELMAH error log (ASP.NET)", "HIGH"),
        ("trace.axd", "ASP.NET trace", "HIGH"),
        ("error_log", "Error log", "HIGH"),
        ("errors.log", "Error log", "HIGH"),
        ("debug.log", "Debug log", "HIGH"),
        ("access.log", "Access log", "HIGH"),

        ("main.js.map", "JavaScript source map", "MEDIUM"),
        ("app.js.map", "JavaScript source map", "MEDIUM"),
        ("bundle.js.map", "JavaScript source map", "MEDIUM"),
        ("main.css.map", "CSS source map", "MEDIUM"),
    ]

    base = url.rstrip("/")
    info_paths = {"robots.txt", "sitemap.xml", ".well-known/security.txt", "humans.txt",
                  "favicon.ico", "api/", "api/v1/", "api/v2/", "graphql",
                  ".well-known/openid-configuration"}
    dep_files = {"composer.json", "composer.lock", "package.json", "package-lock.json",
                 "yarn.lock", "Gemfile", "requirements.txt", "Pipfile", "go.mod", "Cargo.toml",
                 ".gitignore", ".dockerignore", "Makefile"}

    sev_map = {"CRITICAL": Severity.CRITICAL, "HIGH": Severity.HIGH,
               "MEDIUM": Severity.MEDIUM, "LOW": Severity.LOW, "INFO": Severity.INFO}

    # Fetch a random non-existent path to detect soft-404 patterns
    import hashlib as _hl
    _soft404_hash = None
    _soft404_size = 0
    try:
        _r404 = session.get(f"{base}/qxz_nonexistent_8471/", timeout=8, allow_redirects=False)
        if _r404.status_code == 200 and len(_r404.content) > 500:
            _soft404_hash = _hl.sha256(_r404.content).hexdigest()
            _soft404_size = len(_r404.content)
    except requests.RequestException:
        pass

    # Paths that are too generic — only flag if content differs from soft-404
    generic_paths = {"admin/", "administrator/", "admin/login", "panel/", "dashboard/",
                     "cpanel/", "backup/", "backups/", "debug/", "test/", "temp/",
                     "tmp/", "logs/", "log/", "uploads/", "private/", "secret/",
                     "swagger/", "api-docs/", "docs/", "user/login", "login",
                     "wp-admin/", "wp-login.php", "phpmyadmin/", "pma/"}

    def check_one(entry):
        path, desc, sev_str = entry
        try:
            resp = session.get(f"{base}/{path}", timeout=8, allow_redirects=False)
            return (path, desc, sev_str, resp.status_code, len(resp.content), resp)
        except requests.RequestException:
            return None

    def _is_soft_404(resp):
        if not _soft404_hash:
            return False
        body = resp.content
        if _hl.sha256(body).hexdigest() == _soft404_hash:
            return True
        if _soft404_size > 500 and len(body) > 500:
            ct = resp.headers.get("Content-Type", "")
            if "text/html" in ct and abs(len(body) - _soft404_size) / _soft404_size < 0.05:
                return True
        return False

    with ThreadPoolExecutor(max_workers=15) as pool:
        futures = {pool.submit(check_one, e): e for e in sensitive}
        for future in as_completed(futures):
            r = future.result()
            if r is None:
                continue
            path, desc, sev_str, status, size, resp = r

            if status == 200 and size > 0:
                if _is_spa_response(resp, result):
                    continue
                if _is_soft_404(resp):
                    continue

                ct = resp.headers.get("Content-Type", "").lower()

                # Non-HTML files served as HTML = SPA catch-all, not real file
                non_html_exts = (".json", ".yml", ".yaml", ".xml", ".conf",
                                 ".env", ".sql", ".lock", ".toml", ".mod",
                                 ".php", ".py", ".rb", ".map")
                if any(path.endswith(ext) for ext in non_html_exts):
                    if "text/html" in ct:
                        continue

                # Generic paths need actual sensitive content, not just a 200
                if path in generic_paths:
                    if "text/html" in ct and size > 500:
                        body_lower = resp.text[:5000].lower()
                        deny_words = ["forbidden", "denied", "not found", "404",
                                      "no permission", "access denied", "login required",
                                      "please log in", "you must", "unauthorized",
                                      "you need to", "sign in", "log in to"]
                        if any(w in body_lower for w in deny_words):
                            continue
                        if result.spa_size > 0 and abs(size - result.spa_size) / max(result.spa_size, 1) < 0.15:
                            continue
                if path in info_paths:
                    result.add(Finding("Files", f"Found {path}", Severity.INFO, f"{desc}.",
                        evidence=f"HTTP {status} | Content-Type: {ct} | Size: {size} bytes",
                        confidence=Confidence.CONFIRMED))
                elif path in dep_files:
                    result.add(Finding("Dependency Files", f"Accessible: {path}", Severity.LOW,
                        f"{desc} — exposes project dependencies ({size} bytes).",
                        f"Restrict access to /{path} if not needed publicly.",
                        evidence=f"HTTP {status} | Content-Type: {ct} | Size: {size} bytes",
                        confidence=Confidence.FIRM))
                else:
                    sev = sev_map.get(sev_str, Severity.MEDIUM)
                    result.add(Finding("Sensitive Files", f"Accessible: {path}", sev,
                        f"{desc}. File is publicly accessible ({size} bytes)!",
                        f"Immediately restrict access to /{path}.",
                        evidence=f"HTTP {status} | Content-Type: {ct} | Size: {size} bytes",
                        confidence=Confidence.FIRM))
            elif status == 403:
                result.add(Finding("Files", f"{path} — 403 Forbidden", Severity.INFO,
                    f"Path /{path} exists but access is denied.",
                    confidence=Confidence.CONFIRMED))




def check_cors(session: requests.Session, url: str, result: ScanResult):
    """Check CORS: wildcard, origin reflection, null origin, credentials."""
    origins_to_test = [
        ("https://evil.example.com", "arbitrary domain"),
        ("null", "null origin"),
    ]

    parsed = urllib.parse.urlparse(url)
    if parsed.hostname:
        origins_to_test.append(
            (f"https://{parsed.hostname}.evil.com", "subdomain spoofing")
        )

    for origin, desc in origins_to_test:
        try:
            resp = session.get(url, headers={"Origin": origin}, timeout=10)
            acao = resp.headers.get("Access-Control-Allow-Origin", "")
            acac = resp.headers.get("Access-Control-Allow-Credentials", "").lower()

            if acao == "*":
                result.add(Finding("CORS", "Access-Control-Allow-Origin: *", Severity.MEDIUM,
                    "Server allows requests from any domain.",
                    "Restrict CORS to trusted domains only.",
                    evidence=f"Access-Control-Allow-Origin: {acao}",
                    confidence=Confidence.CONFIRMED))
                break
            elif origin in acao and origin != "null":
                sev = Severity.HIGH if acac == "true" else Severity.MEDIUM
                result.add(Finding("CORS", f"CORS reflects {desc}", sev,
                    f"Server reflects Origin '{origin}'"
                    + (" with Allow-Credentials!" if acac == "true" else "."),
                    "Configure an Origin whitelist.",
                    evidence=f"Origin: {origin} -> ACAO: {acao} | ACAC: {acac}",
                    confidence=Confidence.FIRM))
            elif acao == "null" and origin == "null":
                sev = Severity.HIGH if acac == "true" else Severity.MEDIUM
                result.add(Finding("CORS", "CORS accepts null Origin", sev,
                    "Server allows Origin: null"
                    + (" with credentials!" if acac == "true" else "") +
                    ". Bypassable via iframe sandbox.",
                    "Do not allow null in ACAO.",
                    evidence=f"Origin: null -> ACAO: {acao} | ACAC: {acac}",
                    confidence=Confidence.FIRM))
        except requests.RequestException:
            pass




def check_open_ports(url: str, result: ScanResult):
    """Extended port scan."""
    parsed = urllib.parse.urlparse(url)
    host = parsed.hostname

    ports = {
        21: "FTP", 22: "SSH", 23: "Telnet", 25: "SMTP", 53: "DNS",
        80: "HTTP", 110: "POP3", 111: "RPCbind", 135: "MSRPC",
        139: "NetBIOS", 143: "IMAP", 389: "LDAP", 443: "HTTPS",
        445: "SMB", 465: "SMTPS", 587: "SMTP-Submission",
        636: "LDAPS", 993: "IMAPS", 995: "POP3S",
        1080: "SOCKS", 1433: "MSSQL", 1521: "Oracle",
        2049: "NFS", 2375: "Docker-API", 2376: "Docker-TLS",
        3000: "Grafana/Dev", 3306: "MySQL", 3389: "RDP",
        4443: "HTTPS-alt", 5000: "Flask/Docker-Registry",
        5432: "PostgreSQL", 5601: "Kibana", 5900: "VNC",
        5984: "CouchDB", 6379: "Redis", 6443: "Kubernetes-API",
        7001: "WebLogic", 8000: "HTTP-alt", 8008: "HTTP-alt",
        8080: "HTTP-proxy", 8081: "HTTP-alt", 8443: "HTTPS-alt",
        8888: "HTTP-alt", 9000: "PHP-FPM/SonarQube",
        9090: "Prometheus", 9200: "Elasticsearch", 9300: "Elasticsearch-transport",
        9418: "Git", 11211: "Memcached", 15672: "RabbitMQ-Mgmt",
        27017: "MongoDB", 27018: "MongoDB-shard",
        28017: "MongoDB-HTTP", 50000: "Jenkins-agent",
    }

    risky_ports = {
        21, 23, 25, 111, 135, 139, 389, 445, 1080, 1433, 1521,
        2049, 2375, 2376, 3306, 3389, 5432, 5900, 5984, 6379, 6443,
        7001, 9200, 9300, 9418, 11211, 15672, 27017, 27018, 28017, 50000,
    }
    open_ports = []

    def scan_port(port_info):
        port, name = port_info
        try:
            with socket.create_connection((host, port), timeout=2):
                return (port, name)
        except (socket.timeout, ConnectionRefusedError, OSError):
            return None

    with ThreadPoolExecutor(max_workers=30) as pool:
        futures = {pool.submit(scan_port, (p, n)): p for p, n in ports.items()}
        for future in as_completed(futures):
            r = future.result()
            if r:
                open_ports.append(r)

    for port, name in sorted(open_ports):
        if port in risky_ports:
            result.add(Finding("Ports", f"Open port {port} ({name})", Severity.HIGH,
                f"Port {port} ({name}) is exposed to the internet.",
                f"Block port {port} with a firewall or restrict by IP.",
                confidence=Confidence.CONFIRMED))
        elif port not in (80, 443):
            result.add(Finding("Ports", f"Open port {port} ({name})", Severity.LOW,
                f"Port {port} ({name}) is open.",
                "Verify this port needs to be exposed.",
                confidence=Confidence.CONFIRMED))
        else:
            result.add(Finding("Ports", f"Port {port} ({name}) open", Severity.INFO,
                f"Standard web port {port}.",
                confidence=Confidence.CONFIRMED))




def check_methods(session: requests.Session, url: str, result: ScanResult):
    """Check dangerous HTTP methods."""
    dangerous = ["PUT", "DELETE", "TRACE", "CONNECT", "PATCH", "PROPFIND", "MKCOL", "COPY", "MOVE"]
    found = []
    for method in dangerous:
        try:
            resp = session.request(method, url, timeout=8)
            if resp.status_code in (405, 501, 404, 400, 403, 418):
                continue
            # SPA catch-all returns same HTML for any method — not a real finding
            if _is_spa_response(resp, result):
                continue
            found.append((method, resp.status_code))
        except requests.RequestException:
            pass

    # If most methods return same status, likely SPA/CDN — not real method support
    if len(found) >= 3:
        codes = [c for _, c in found]
        most_common = max(set(codes), key=codes.count)
        if codes.count(most_common) >= 3:
            found = []

    webdav_methods = {"PROPFIND", "MKCOL", "COPY", "MOVE"}
    webdav_found = [m for m, _ in found if m in webdav_methods]
    other_found = [f for f in found if f[0] not in webdav_methods]

    if webdav_found:
        webdav_evidence = ", ".join(f"{m} ({c})" for m, c in found if m in webdav_methods)
        result.add(Finding("HTTP Methods", f"WebDAV enabled: {', '.join(webdav_found)}", Severity.HIGH,
            f"Server accepts WebDAV methods: {', '.join(webdav_found)}.",
            "Disable WebDAV if not needed.",
            evidence=f"Methods: {webdav_evidence}",
            confidence=Confidence.CONFIRMED))

    if other_found:
        methods_str = ", ".join(f"{m} ({c})" for m, c in other_found)
        result.add(Finding("HTTP Methods", f"Allowed: {methods_str}", Severity.MEDIUM,
            f"Server accepts: {methods_str}.",
            "Disable unnecessary HTTP methods.",
            evidence=f"Methods: {methods_str}",
            confidence=Confidence.CONFIRMED))

    # TRACE (XST)
    try:
        resp = session.request("TRACE", url, timeout=8)
        if resp.status_code == 200 and "TRACE" in resp.text:
            result.add(Finding("HTTP Methods", "TRACE enabled (XST)", Severity.MEDIUM,
                "TRACE allows Cross-Site Tracing attack.",
                "Disable the TRACE method.",
                evidence=f"TRACE HTTP {resp.status_code} | Body: {resp.text[:100]}",
                confidence=Confidence.CONFIRMED))
    except requests.RequestException:
        pass

    # OPTIONS — check what server advertises
    try:
        resp = session.request("OPTIONS", url, timeout=8)
        allow = resp.headers.get("Allow", "")
        if allow:
            allowed = [m.strip().upper() for m in allow.split(",")]
            risky = [m for m in allowed if m in ("PUT", "DELETE", "TRACE", "CONNECT",
                                                   "PROPFIND", "MKCOL", "COPY", "MOVE")]
            if risky:
                result.add(Finding("HTTP Methods",
                    f"OPTIONS advertises risky methods: {', '.join(risky)}", Severity.LOW,
                    f"Server Allow header lists: {allow}.",
                    "Remove unnecessary methods from Allow header.",
                    evidence=f"Allow: {allow}",
                    confidence=Confidence.FIRM))
    except requests.RequestException:
        pass




def check_info_disclosure(session: requests.Session, url: str, result: ScanResult):
    """Information disclosure: error pages, technology signatures, debug mode."""
    base = url.rstrip("/")

    try:
        resp = session.get(f"{base}/nonexistent_path_qscan_12345", timeout=10)
        is_spa = _is_spa_response(resp, result)

        if not is_spa:
            body = resp.text.lower()
            signatures = {
                "apache/": "Apache (version)", "nginx/": "nginx (version)",
                "microsoft-iis/": "IIS (version)", "tomcat/": "Tomcat (version)",
                "php/": "PHP (version)", "python/": "Python (version)",
                "django": "Django", "flask": "Flask", "laravel": "Laravel",
                "express": "Express", "rails": "Rails", "spring": "Spring",
                "asp.net": "ASP.NET", "next.js": "Next.js", "nuxt": "Nuxt.js",
                "stack trace": "Stack trace", "traceback (most recent": "Python traceback",
                "fatal error": "Fatal error", "unhandled exception": "Unhandled exception",
                "syntax error": "Syntax error", "runtime error": "Runtime error",
                "at /": "Server path", "c:\\": "Windows path",
                "debug = true": "Debug mode", "debug mode": "Debug mode",
                "mysql_": "MySQL function", "pg_query": "PostgreSQL function",
                "sqlite3": "SQLite3",
            }
            disclosed = [name for key, name in signatures.items() if key in body]
            if disclosed:
                result.add(Finding("Info Disclosure", "Error page leaks technology info", Severity.MEDIUM,
                    f"404 page contains: {', '.join(set(disclosed))}.",
                    "Configure custom error pages.",
                    evidence=f"Detected: {', '.join(set(disclosed))} | HTTP {resp.status_code}",
                    confidence=Confidence.FIRM))

            # Stack trace / path disclosure
            path_patterns = [
                r'/home/\w+/', r'/var/www/', r'/usr/share/', r'/opt/',
                r'[A-Z]:\\[\w\\]+', r'/app/', r'/srv/',
            ]
            for pattern in path_patterns:
                match = re.search(pattern, resp.text)
                if match:
                    result.add(Finding("Info Disclosure", f"Path disclosure: {match.group()}", Severity.MEDIUM,
                        f"Server path found in response: {match.group()}.",
                        "Remove server paths from error pages.",
                        confidence=Confidence.FIRM))
                    break

        if resp.status_code == 200 and not is_spa:
            result.add(Finding("Info Disclosure", "404 returns 200", Severity.INFO,
                "Non-existent pages return 200 (custom 404 or SPA).",
                "",
                confidence=Confidence.CONFIRMED))
        elif resp.status_code == 200 and is_spa:
            result.add(Finding("Info Disclosure", "SPA: all routes return 200 + index.html", Severity.INFO,
                "SPA detected — all routes return the same HTML page.",
                "Normal SPA behavior. Client-side routing.",
                confidence=Confidence.FIRM))
    except requests.RequestException:
        pass
    error_urls = [
        f"{base}/%00", f"{base}/'" , f"{base}/<>",
        f"{base}/{{{{}}}}",  f"{base}/../../../etc/passwd",
    ]
    for err_url in error_urls:
        try:
            resp = session.get(err_url, timeout=8)
            if resp.status_code == 500:
                body = resp.text.lower()
                if any(kw in body for kw in ["traceback", "stack trace", "exception",
                                              "error in", "fatal", "debug"]):
                    result.add(Finding("Info Disclosure", "500 reveals debug info", Severity.HIGH,
                        f"URL {err_url} triggers 500 with debug info.",
                        "Disable debug mode and configure custom error pages.",
                        evidence=f"HTTP 500 | URL: {err_url} | Body snippet: {resp.text[:100]}",
                        confidence=Confidence.FIRM))
                    break
        except requests.RequestException:
            pass




def check_injection_points(session: requests.Session, url: str, result: ScanResult):
    """Check XSS, SQL Injection, SSTI, path traversal."""
    base = url.rstrip("/")
    xss_payloads = [
        ("<script>alert(1)</script>", "script tag"),
        ("<img src=x onerror=alert(1)>", "img onerror"),
        ("'\"><svg/onload=alert(1)>", "svg onload"),
        ("javascript:alert(1)", "javascript: URI"),
    ]
    for payload, name in xss_payloads:
        try:
            resp = session.get(f"{base}/?q={urllib.parse.quote(payload)}", timeout=8)
            if _is_spa_response(resp, result):
                continue
            if payload in resp.text:
                result.add(Finding("XSS", f"Reflected XSS ({name})", Severity.HIGH,
                    f"Payload '{payload}' is reflected without encoding.",
                    "Encode all user input.",
                    confidence=Confidence.TENTATIVE))
                break
        except requests.RequestException:
            pass
    sqli_payloads = [
        ("'", "single quote"),
        ("' OR '1'='1", "OR 1=1"),
        ("1 UNION SELECT NULL--", "UNION SELECT"),
        ("1' AND SLEEP(0)--", "blind SQLi"),
        ("1; SELECT 1--", "stacked queries"),
    ]
    sql_errors = [
        "sql syntax", "mariadb", "sqlite", "postgresql", "ora-",
        "sql error", "unclosed quotation", "database error",
        "query failed", "odbc", "sqlstate", "pg_query", "mysql_fetch",
        "you have an error in your sql", "warning: mysql", "unterminated",
        "microsoft ole db", "jet database engine",
    ]
    # Baseline: fetch page with a benign parameter
    try:
        baseline_resp = session.get(f"{base}/?id=1", timeout=8)
        baseline_body = baseline_resp.text.lower()
        baseline_errors = {e for e in sql_errors if e in baseline_body}
    except requests.RequestException:
        baseline_body = ""
        baseline_errors = set()
    for payload, name in sqli_payloads:
        try:
            resp = session.get(f"{base}/?id={urllib.parse.quote(payload)}", timeout=8)
            if _is_spa_response(resp, result):
                continue
            body = resp.text.lower()
            # Only errors absent from baseline
            found_errors = [e for e in sql_errors if e in body and e not in baseline_errors]
            if found_errors:
                result.add(Finding("SQL Injection", f"SQL injection ({name})", Severity.CRITICAL,
                    f"Parameter ?id={payload} triggers errors: {', '.join(found_errors[:3])}.",
                    "Use parameterized queries.",
                    confidence=Confidence.TENTATIVE))
                break
        except requests.RequestException:
            pass
    ssti_payloads = [
        ("{{7*7}}", "49"),
        ("${7*7}", "49"),
        ("<%= 7*7 %>", "49"),
        ("#{7*7}", "49"),
    ]
    # Baseline: check if "49" appears in normal response
    try:
        baseline_ssti = session.get(f"{base}/?q=test_safe_string", timeout=8)
        baseline_has_49 = "49" in baseline_ssti.text
    except requests.RequestException:
        baseline_has_49 = False
    for payload, expected in ssti_payloads:
        try:
            resp = session.get(f"{base}/?q={urllib.parse.quote(payload)}", timeout=8)
            if _is_spa_response(resp, result):
                continue
            # expected must not appear in baseline (otherwise it's normal content)
            if expected in resp.text and payload not in resp.text and not baseline_has_49:
                result.add(Finding("SSTI", f"Server-Side Template Injection ({payload})", Severity.CRITICAL,
                    f"Template {payload} was evaluated (result: {expected}).",
                    "Do not pass user input to template engines without sanitization.",
                    confidence=Confidence.TENTATIVE))
                break
        except requests.RequestException:
            pass
    traversal_payloads = [
        ("....//....//....//etc/passwd", "/etc/passwd"),
        ("..%2f..%2f..%2fetc/passwd", "/etc/passwd encoded"),
        ("..\\..\\..\\windows\\win.ini", "windows win.ini"),
        ("....//....//....//windows/win.ini", "windows"),
    ]
    for payload, name in traversal_payloads:
        try:
            resp = session.get(f"{base}/{payload}", timeout=8)
            if _is_spa_response(resp, result):
                continue
            if ("root:" in resp.text and "/bin" in resp.text) or \
               ("[fonts]" in resp.text and "[extensions]" in resp.text):
                result.add(Finding("Path Traversal", f"Path traversal ({name})", Severity.CRITICAL,
                    f"Payload {payload} allows reading system files.",
                    "Validate paths and reject ../ in user input.",
                    confidence=Confidence.TENTATIVE))
                break
        except requests.RequestException:
            pass




def check_crlf(session: requests.Session, url: str, result: ScanResult):
    """CRLF injection in response headers."""
    base = url.rstrip("/")
    payloads = [
        "%0d%0aX-Injected: qscan",
        "%0d%0a%0d%0a<script>alert(1)</script>",
        "%E5%98%8A%E5%98%8DX-Injected: qscan",  # Unicode CRLF
    ]
    for payload in payloads:
        try:
            resp = session.get(f"{base}/?redirect={payload}", timeout=8, allow_redirects=False)
            if "x-injected" in {k.lower() for k in resp.headers}:
                result.add(Finding("CRLF Injection", "CRLF injection in headers", Severity.HIGH,
                    "User input is injected into HTTP headers without filtering.",
                    "Filter \\r\\n in user input when constructing headers.",
                    confidence=Confidence.TENTATIVE))
                return
        except requests.RequestException:
            pass




def check_host_header(session: requests.Session, url: str, result: ScanResult):
    """Host header injection, X-Forwarded-Host."""
    tests = [
        {"Host": "evil.qscan.test"},
        {"X-Forwarded-Host": "evil.qscan.test"},
        {"X-Forwarded-For": "127.0.0.1"},
        {"X-Original-URL": "/admin"},
        {"X-Rewrite-URL": "/admin"},
    ]
    for hdr_set in tests:
        try:
            resp = session.get(url, headers=hdr_set, timeout=8)
            header_name = list(hdr_set.keys())[0]
            header_val = list(hdr_set.values())[0]

            if header_name in ("Host", "X-Forwarded-Host"):
                if "evil.qscan.test" in resp.text:
                    result.add(Finding("Host Header Injection",
                        f"{header_name} reflected in response", Severity.HIGH,
                        f"{header_name}: {header_val} reflected in response body. "
                        "Cache poisoning and password reset hijacking possible.",
                        f"Do not trust {header_name}. Use a host whitelist.",
                    confidence=Confidence.TENTATIVE))
                if "evil.qscan.test" in resp.headers.get("Location", ""):
                    result.add(Finding("Host Header Injection",
                        f"{header_name} in Location redirect", Severity.HIGH,
                        f"{header_name} header injected into Location. Open redirect.",
                        "Validate the Host header on the server.",
                    confidence=Confidence.TENTATIVE))
            elif header_name in ("X-Original-URL", "X-Rewrite-URL"):
                if resp.status_code == 200 and "/admin" not in url:
                    if _is_spa_response(resp, result):
                        continue
                    # Establish baseline variance: two normal requests to measure natural jitter
                    try:
                        normal1 = session.get(url, timeout=8)
                        normal2 = session.get(url, timeout=8)
                        n1_len = max(len(normal1.content), 1)
                        n2_len = max(len(normal2.content), 1)
                        baseline_var = abs(n1_len - n2_len) / max(n1_len, n2_len)
                        # Compare header response against baseline
                        resp_diff = abs(len(resp.content) - n1_len) / n1_len
                        # Only flag if diff exceeds baseline variance + 15% margin
                        threshold = max(0.20, baseline_var * 2 + 0.15)
                        if resp_diff < threshold:
                            continue
                        ct = resp.headers.get("Content-Type", "")
                        if "text/html" in ct:
                            continue  # HTML response to /admin header = SPA/catch-all, not real bypass
                    except requests.RequestException:
                        pass
                    result.add(Finding("Host Header Injection",
                        f"{header_name} bypasses routing", Severity.HIGH,
                        f"{header_name} header may bypass access controls.",
                        f"Block {header_name} header at the proxy level.",
                    confidence=Confidence.TENTATIVE))
        except requests.RequestException:
            pass




def check_open_redirect(session: requests.Session, url: str, result: ScanResult):
    """Open redirect via common parameters."""
    base = url.rstrip("/")
    params = ["url", "redirect", "next", "return", "returnUrl", "return_to",
              "redirect_uri", "redirect_url", "go", "dest", "destination",
              "rurl", "target", "view", "link", "forward", "continue"]
    evil = "https://evil.qscan.test/pwned"

    for param in params:
        try:
            resp = session.get(f"{base}/?{param}={urllib.parse.quote(evil)}",
                             timeout=8, allow_redirects=False)
            if resp.status_code in (301, 302, 303, 307, 308):
                location = resp.headers.get("Location", "")
                # Check that evil domain is the actual redirect target, not just a query param
                parsed_loc = urllib.parse.urlparse(location)
                if parsed_loc.hostname and "evil.qscan.test" in parsed_loc.hostname:
                    result.add(Finding("Open Redirect",
                        f"Open redirect via ?{param}=", Severity.MEDIUM,
                        f"Parameter {param} redirects to external URL: {location}.",
                        "Validate redirect URLs — allow only relative paths or a domain whitelist.",
                    confidence=Confidence.TENTATIVE))
                    return
                # Also check for protocol-relative redirects
                if location.startswith("//evil.qscan.test"):
                    result.add(Finding("Open Redirect",
                        f"Open redirect via ?{param}=", Severity.MEDIUM,
                        f"Parameter {param} redirects to external URL: {location}.",
                        "Validate redirect URLs — allow only relative paths or a domain whitelist.",
                    confidence=Confidence.TENTATIVE))
                    return
        except requests.RequestException:
            pass




def check_directory_listing(session: requests.Session, url: str, result: ScanResult):
    """Directory listing check."""
    base = url.rstrip("/")
    dirs = ["", "/images", "/static", "/uploads", "/media", "/assets",
            "/css", "/js", "/files", "/data", "/docs", "/img",
            "/content", "/public", "/storage", "/include", "/includes"]

    markers = ["index of /", "directory listing", "<title>index of",
               "parent directory", "[to parent directory]",
               "directory listing for", "name</a>", "last modified</a>"]

    for d in dirs:
        try:
            resp = session.get(f"{base}{d}/", timeout=8)
            if _is_spa_response(resp, result):
                continue
            body = resp.text.lower()
            if any(m in body for m in markers):
                result.add(Finding("Directory Listing", f"Directory listing: {d or '/'}", Severity.MEDIUM,
                    f"Directory {d or '/'} exposes file listing.",
                    "Disable directory listing (Options -Indexes / autoindex off).",
                    confidence=Confidence.FIRM))
        except requests.RequestException:
            pass




def check_security_txt(session: requests.Session, url: str, result: ScanResult):
    """Check security.txt."""
    base = url.rstrip("/")
    for path in ["/.well-known/security.txt", "/security.txt"]:
        try:
            resp = session.get(f"{base}{path}", timeout=8)
            if resp.status_code == 200 and "contact:" in resp.text.lower():
                result.add(Finding("Best Practices", "security.txt present", Severity.INFO,
                    f"security.txt found at {path}.",
                    confidence=Confidence.CONFIRMED))
                return
        except requests.RequestException:
            pass

    result.add(Finding("Best Practices", "Missing security.txt", Severity.LOW,
        "security.txt file is missing.",
        "Create .well-known/security.txt (RFC 9116).",
        confidence=Confidence.CONFIRMED))


def check_privacy_policy(session: requests.Session, url: str, result: ScanResult):
    """Check if privacy policy page exists."""
    base = url.rstrip("/")
    pp_paths = [
        "/privacy", "/privacy-policy", "/privacy.html",
        "/legal/privacy", "/policy/privacy", "/privacypolicy",
    ]
    try:
        # Also check homepage for links to privacy policy
        resp = session.get(base, timeout=10)
        body = resp.text.lower()
        if any(kw in body for kw in ["privacy policy", "privacy-policy", "privacypolicy",
                                      "политика конфиденциальности", "политике конфиденциальности"]):
            result.add(Finding("Best Practices", "Privacy Policy linked on page",
                Severity.INFO, "Privacy policy page is linked from the homepage.",
                confidence=Confidence.FIRM))
            return
    except requests.RequestException:
        pass

    for path in pp_paths:
        try:
            r = session.get(f"{base}{path}", timeout=5, allow_redirects=True)
            if r.status_code == 200 and not _is_spa_response(r, result) and len(r.content) > 500:
                result.add(Finding("Best Practices", f"Privacy Policy found at {path}",
                    Severity.INFO, f"Privacy policy page exists at {path}.",
                    confidence=Confidence.FIRM))
                return
        except requests.RequestException:
            pass

    result.add(Finding("Best Practices", "No Privacy Policy found", Severity.LOW,
        "No privacy policy page found. May be required by GDPR/CCPA if processing personal data.",
        "Add a privacy policy page and link it from the homepage.",
        confidence=Confidence.FIRM))



def check_html_content(session: requests.Session, url: str, result: ScanResult):
    """HTML analysis: comments, forms, mixed content, JS libs, meta tags, emails."""
    try:
        resp = session.get(url, timeout=15, allow_redirects=True)
    except requests.RequestException as e:
        result.errors.append(f"HTML analysis error: {e}")
        return

    body = resp.text
    comments = re.findall(r'<!--(.*?)-->', body, re.DOTALL)
    sensitive_keywords = [
        "password", "passwd", "secret", "token", "api_key", "apikey",
        "todo", "fixme", "hack", "bug", "debug", "admin", "root",
        "database", "db_", "mysql", "postgres", "internal",
        "deprecated", "remove", "temporary", "temp ",
    ]
    for comment in comments:
        comment_lower = comment.lower().strip()
        if len(comment_lower) < 3:
            continue
        found_kw = [kw for kw in sensitive_keywords if kw in comment_lower]
        if found_kw:
            snippet = comment.strip()[:100]
            result.add(Finding("HTML Analysis", f"Comment with sensitive keywords", Severity.LOW,
                f"HTML comment contains: {', '.join(found_kw)}. Snippet: {snippet}",
                "Remove sensitive comments from production code.",
                confidence=Confidence.FIRM))
    forms = re.findall(r'<form\b[^>]*>(.*?)</form>', body, re.DOTALL | re.IGNORECASE)
    csrf_names = {"csrf", "csrfmiddlewaretoken", "_token", "csrf_token",
                  "authenticity_token", "_csrf", "xsrf", "__requestverificationtoken",
                  "antiforgery"}
    for i, form_html in enumerate(forms):
        form_lower = form_html.lower()
        if "method" in form_lower and ("post" in form_lower or "put" in form_lower):
            has_csrf = any(name in form_lower for name in csrf_names)
            if not has_csrf:
                result.add(Finding("CSRF", f"Form #{i+1} without CSRF token", Severity.MEDIUM,
                    "POST/PUT form has no CSRF token.",
                    "Add a CSRF token to forms for CSRF protection.",
                    confidence=Confidence.FIRM))
    password_inputs = re.findall(r'<input[^>]*type=["\']password["\'][^>]*>', body, re.IGNORECASE)
    for inp in password_inputs:
        if 'autocomplete' not in inp.lower() or 'autocomplete="on"' in inp.lower():
            result.add(Finding("HTML Analysis", "Password field with autocomplete=on", Severity.LOW,
                "Browser may save entered passwords.",
                'Add autocomplete="off" or autocomplete="new-password" to password fields.',
                confidence=Confidence.FIRM))
            break
    parsed = urllib.parse.urlparse(url)
    if parsed.scheme == "https":
        http_resources = re.findall(
            r'(?:src|href|action)\s*=\s*["\']http://[^"\']+["\']',
            body, re.IGNORECASE
        )
        if http_resources:
            examples = http_resources[:3]
            result.add(Finding("Mixed Content", f"HTTP resources on HTTPS page ({len(http_resources)})", Severity.MEDIUM,
                f"HTTP resources found: {'; '.join(examples)}.",
                "Load all resources over HTTPS.",
                confidence=Confidence.CONFIRMED))
    # NOTE: JS library CVE checks moved to check_js_framework_cve() to avoid double-counting
    external_scripts = re.findall(
        r'<script[^>]*src=["\']https?://[^"\']+["\'][^>]*>',
        body, re.IGNORECASE
    )
    scripts_without_sri = [s for s in external_scripts if "integrity=" not in s.lower()]
    if scripts_without_sri:
        result.add(Finding("SRI", f"External scripts without SRI ({len(scripts_without_sri)})", Severity.LOW,
            "External scripts loaded without Subresource Integrity.",
            "Add integrity= and crossorigin= to external <script> and <link> tags.",
            confidence=Confidence.FIRM))
    emails = set(re.findall(r'[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}', body))
    emails = {e for e in emails if not e.endswith(('.png', '.jpg', '.gif', '.svg', '.css', '.js'))}
    if emails:
        result.add(Finding("HTML Analysis", f"Email addresses on page ({len(emails)})", Severity.INFO,
            f"Emails found: {', '.join(list(emails)[:5])}.",
            "Obfuscate emails to protect against spam bots.",
            confidence=Confidence.CONFIRMED))
    generator = re.search(r'<meta[^>]*name=["\']generator["\'][^>]*content=["\']([^"\']+)', body, re.IGNORECASE)
    if generator:
        result.add(Finding("HTML Analysis", f"Meta generator: {generator.group(1)}", Severity.LOW,
            f"<meta generator> exposes CMS/framework: {generator.group(1)}.",
            "Remove the <meta name=\"generator\"> tag.",
            confidence=Confidence.FIRM))
    inline_handlers = re.findall(r'\bon\w+\s*=\s*["\']', body, re.IGNORECASE)
    if len(inline_handlers) > 5:
        result.add(Finding("HTML Analysis", f"Inline event handlers ({len(inline_handlers)})", Severity.LOW,
            "Many inline event handlers (onclick, onerror...) increase the XSS attack surface.",
            "Use addEventListener() instead of inline handlers.",
            confidence=Confidence.FIRM))
    if "sourceMappingURL=" in body:
        result.add(Finding("HTML Analysis", "Source maps accessible", Severity.LOW,
            "Source map references found — they expose source code.",
            "Remove sourceMappingURL from production builds.",
            confidence=Confidence.FIRM))
    base_tags = re.findall(r'<base\s+[^>]*href\s*=', body, re.IGNORECASE)
    if base_tags:
        result.add(Finding("HTML Analysis", "<base> tag detected", Severity.INFO,
            "<base> tag sets the base URL. Combined with XSS, it can hijack resource loading.",
            confidence=Confidence.CONFIRMED))
    blank_links = re.findall(r'<a\s[^>]*target=["\']_blank["\'][^>]*>', body, re.IGNORECASE)
    unsafe_blanks = [l for l in blank_links if "noopener" not in l.lower() and "noreferrer" not in l.lower()]
    if unsafe_blanks:
        result.add(Finding("HTML Analysis", f"target=_blank without noopener ({len(unsafe_blanks)})", Severity.LOW,
            "Links with target=_blank without rel=noopener are vulnerable to tab-nabbing.",
            'Add rel="noopener noreferrer" to links with target="_blank".',
            confidence=Confidence.FIRM))


def _version_lt(v1: str, v2: str) -> bool:
    """Compare version strings."""
    try:
        p1 = [int(x) for x in v1.split(".")]
        p2 = [int(x) for x in v2.split(".")]
        return p1 < p2
    except ValueError:
        return False




def check_waf(session: requests.Session, url: str, result: ScanResult):
    """WAF detection."""
    waf_signatures = {
        "cloudflare": ["cf-ray", "cf-cache-status", "__cfduid", "cf-request-id"],
        "AWS WAF": ["x-amzn-requestid", "x-amz-cf-id"],
        "Akamai": ["x-akamai-transformed", "akamai-origin-hop"],
        "Sucuri": ["x-sucuri-id", "x-sucuri-cache"],
        "Imperva/Incapsula": ["x-cdn", "incap_ses", "visid_incap"],
        "F5 BIG-IP": ["x-wa-info", "bigipserver"],
        "Barracuda": ["barra_counter_session"],
        "DenyAll": ["sessioncookie"],
        "Fortinet": ["fortiwafsid"],
        "ModSecurity": ["mod_security", "modsecurity"],
    }

    try:
        resp_normal = session.get(url, timeout=10)
        resp_attack = session.get(
            f"{url}/?q=<script>alert(1)</script>&id=' OR 1=1--",
            timeout=10
        )

        headers_all = {}
        headers_all.update({k.lower(): v for k, v in resp_normal.headers.items()})
        headers_all.update({k.lower(): v for k, v in resp_attack.headers.items()})
        cookies_str = str(resp_normal.cookies) + str(resp_attack.cookies)
        all_text = " ".join(headers_all.keys()) + " " + " ".join(headers_all.values()) + " " + cookies_str

        detected = []
        for waf_name, indicators in waf_signatures.items():
            for indicator in indicators:
                if indicator.lower() in all_text.lower():
                    detected.append(waf_name)
                    break

        # Blocked response (WAF typically returns 403/406/429)
        if resp_attack.status_code in (403, 406, 429, 503) and resp_normal.status_code == 200:
            if not detected:
                detected.append("Unknown WAF")

        if detected:
            result.add(Finding("WAF", f"WAF detected: {', '.join(set(detected))}", Severity.INFO,
                f"Site is protected by WAF: {', '.join(set(detected))}.",
                confidence=Confidence.FIRM))
        else:
            result.add(Finding("WAF", "No WAF detected", Severity.INFO,
                "No Web Application Firewall detected.",
                "Consider using a WAF (Cloudflare, ModSecurity, etc.).",
                confidence=Confidence.FIRM))

    except requests.RequestException:
        pass




def check_cms(session: requests.Session, url: str, result: ScanResult):
    """CMS detection and CMS-specific checks."""
    base = url.rstrip("/")
    detected_cms = None

    cms_checks = {
        "WordPress": [
            ("/wp-login.php", 200),
            ("/wp-includes/js/jquery/jquery.js", 200),
            ("/wp-content/", 200),
            ("/xmlrpc.php", 200),
        ],
        "Joomla": [
            ("/administrator/", 200),
            ("/media/system/js/core.js", 200),
            ("/templates/system/css/system.css", 200),
        ],
        "Drupal": [
            ("/core/misc/drupal.js", 200),
            ("/sites/default/files/", 200),
            ("/CHANGELOG.txt", 200),
        ],
        "Bitrix": [
            ("/bitrix/js/main/core/core.js", 200),
            ("/bitrix/admin/", 200),
        ],
    }

    for cms, checks in cms_checks.items():
        score = 0
        for path, expected_status in checks:
            try:
                resp = session.get(f"{base}{path}", timeout=8, allow_redirects=False)
                if resp.status_code == expected_status:
                    score += 1
            except requests.RequestException:
                pass
        if score >= 2:
            detected_cms = cms
            break

    if detected_cms:
        result.add(Finding("CMS", f"CMS detected: {detected_cms}", Severity.INFO,
            f"Site uses {detected_cms}.",
            confidence=Confidence.FIRM))

        # WordPress-specific checks
        if detected_cms == "WordPress":
            # XML-RPC
            try:
                resp = session.post(f"{base}/xmlrpc.php", timeout=8,
                    data='<?xml version="1.0"?><methodCall><methodName>system.listMethods</methodName></methodCall>',
                    headers={"Content-Type": "text/xml"})
                if resp.status_code == 200 and "methodResponse" in resp.text:
                    result.add(Finding("CMS", "WordPress XML-RPC enabled", Severity.MEDIUM,
                        "XML-RPC allows brute-force, DDoS amplification, SSRF.",
                        "Disable XML-RPC or restrict access.",
                        confidence=Confidence.FIRM))
            except requests.RequestException:
                pass

            # WP REST API user enumeration
            try:
                resp = session.get(f"{base}/wp-json/wp/v2/users", timeout=8)
                if resp.status_code == 200:
                    try:
                        users = resp.json()
                        if isinstance(users, list) and users:
                            names = [u.get("slug", "") for u in users[:5]]
                            result.add(Finding("CMS", f"WP REST API exposes users", Severity.MEDIUM,
                                f"Users found: {', '.join(names)}.",
                                "Restrict access to /wp-json/wp/v2/users.",
                                confidence=Confidence.FIRM))
                    except (ValueError, KeyError):
                        pass
            except requests.RequestException:
                pass

            # WP version in meta/RSS
            try:
                resp = session.get(f"{base}/feed/", timeout=8)
                ver = re.search(r'<generator>https?://wordpress\.org/\?v=([\d.]+)</generator>', resp.text)
                if ver:
                    result.add(Finding("CMS", f"WordPress version: {ver.group(1)}", Severity.LOW,
                        f"WP version exposed via RSS: {ver.group(1)}.",
                        "Hide the WordPress version.",
                        confidence=Confidence.FIRM))
            except requests.RequestException:
                pass

            # User enumeration via ?author=
            try:
                resp = session.get(f"{base}/?author=1", timeout=8, allow_redirects=True)
                if "/author/" in resp.url:
                    author = resp.url.split("/author/")[-1].strip("/")
                    result.add(Finding("CMS", f"WP user enumeration: {author}", Severity.LOW,
                        f"User found via ?author=1: {author}.",
                        "Install a plugin to block user enumeration.",
                        confidence=Confidence.FIRM))
            except requests.RequestException:
                pass




def check_dns(url: str, result: ScanResult):
    """DNS: SPF, DMARC, CAA, DNSSEC, MX, NS, zone transfer."""
    if not HAS_DNS:
        result.add(Finding("DNS", "dnspython module not installed", Severity.INFO,
            "Install dnspython for extended DNS checks: pip install dnspython",
            confidence=Confidence.CONFIRMED))
        return

    parsed = urllib.parse.urlparse(url)
    domain = parsed.hostname
    # Get base domain (strip www)
    if domain.startswith("www."):
        domain = domain[4:]

    resolver = dns.resolver.Resolver()
    resolver.timeout = 5
    resolver.lifetime = 10
    try:
        answers = resolver.resolve(domain, "TXT")
        spf_found = False
        for rdata in answers:
            txt = rdata.to_text().strip('"')
            if txt.startswith("v=spf1"):
                spf_found = True
                if "+all" in txt:
                    result.add(Finding("DNS/Email", "SPF +all (allows everyone)", Severity.HIGH,
                        f"SPF: {txt}. +all allows anyone to send on behalf of the domain.",
                        "Use -all or ~all in the SPF record.",
                confidence=Confidence.CONFIRMED))
                elif "~all" in txt:
                    result.add(Finding("DNS/Email", "SPF ~all (soft fail)", Severity.LOW,
                        f"SPF: {txt}. ~all is soft fail, -all recommended.",
                        "Change ~all to -all for strict policy.",
                        confidence=Confidence.CONFIRMED))
                elif "-all" in txt:
                    result.add(Finding("DNS/Email", "SPF configured correctly", Severity.INFO,
                        f"SPF: {txt}",
                        confidence=Confidence.CONFIRMED))
                else:
                    result.add(Finding("DNS/Email", "SPF without all mechanism", Severity.MEDIUM,
                        f"SPF: {txt}. No all mechanism.",
                        "Add -all to the end of the SPF record.",
                        confidence=Confidence.CONFIRMED))
        if not spf_found:
            result.add(Finding("DNS/Email", "No SPF record", Severity.MEDIUM,
                "Domain has no SPF record — email spoofing possible.",
                "Add TXT record v=spf1 ... -all",
                confidence=Confidence.CONFIRMED))
    except Exception:
        result.add(Finding("DNS/Email", "No SPF record", Severity.MEDIUM,
            "SPF record not found.",
            "Add an SPF TXT record to prevent email spoofing.",
            confidence=Confidence.CONFIRMED))
    try:
        answers = resolver.resolve(f"_dmarc.{domain}", "TXT")
        dmarc_found = False
        for rdata in answers:
            txt = rdata.to_text().strip('"')
            if "v=dmarc1" in txt.lower():
                dmarc_found = True
                if "p=none" in txt.lower():
                    result.add(Finding("DNS/Email", "DMARC p=none (monitoring only)", Severity.LOW,
                        f"DMARC: {txt}. Policy none — no emails blocked.",
                        "Switch to p=quarantine or p=reject.",
                        confidence=Confidence.CONFIRMED))
                elif "p=quarantine" in txt.lower():
                    result.add(Finding("DNS/Email", "DMARC p=quarantine", Severity.INFO,
                        f"DMARC: {txt}.",
                        confidence=Confidence.CONFIRMED))
                elif "p=reject" in txt.lower():
                    result.add(Finding("DNS/Email", "DMARC p=reject — good", Severity.INFO,
                        f"DMARC: {txt}.",
                        confidence=Confidence.CONFIRMED))
        if not dmarc_found:
            result.add(Finding("DNS/Email", "No DMARC record", Severity.MEDIUM,
                "DMARC not configured — no email spoofing protection.",
                "Add TXT record _dmarc.domain with v=DMARC1; p=reject.",
                confidence=Confidence.CONFIRMED))
    except Exception:
        result.add(Finding("DNS/Email", "No DMARC record", Severity.MEDIUM,
            "DMARC record not found.",
            "Add a DMARC TXT record to prevent email spoofing.",
            confidence=Confidence.CONFIRMED))
    try:
        answers = resolver.resolve(domain, "CAA")
        caa_records = [rdata.to_text() for rdata in answers]
        result.add(Finding("DNS", f"CAA records configured ({len(caa_records)})", Severity.INFO,
            f"CAA: {'; '.join(caa_records[:3])}.",
            confidence=Confidence.CONFIRMED))
    except Exception:
        result.add(Finding("DNS", "No CAA records", Severity.LOW,
            "No CAA records — any CA can issue certificates for this domain.",
            "Add CAA records to restrict authorized CAs.",
            confidence=Confidence.CONFIRMED))
    try:
        answers = resolver.resolve(domain, "MX")
        mx_records = [rdata.to_text() for rdata in answers]
        result.add(Finding("DNS", f"MX records ({len(mx_records)})", Severity.INFO,
            f"MX: {'; '.join(mx_records[:5])}.",
            confidence=Confidence.CONFIRMED))
    except Exception:
        pass
    try:
        answers = resolver.resolve(domain, "NS")
        ns_list = [rdata.to_text() for rdata in answers]
        if len(ns_list) < 2:
            result.add(Finding("DNS", "Only 1 NS server", Severity.MEDIUM,
                f"NS: {', '.join(ns_list)}. Single NS — no redundancy.",
                "Add at least 2 NS servers.",
                confidence=Confidence.CONFIRMED))
        else:
            result.add(Finding("DNS", f"NS servers ({len(ns_list)})", Severity.INFO,
                f"NS: {', '.join(ns_list)}.",
            confidence=Confidence.CONFIRMED))
        try:
            import dns.zone
            import dns.query
            for ns in ns_list:
                ns_host = ns.rstrip(".")
                try:
                    ns_ip = socket.gethostbyname(ns_host)
                    z = dns.zone.from_xfr(dns.query.xfr(ns_ip, domain, timeout=5))
                    if z:
                        names = [str(n) for n in z.nodes.keys()]
                        result.add(Finding("DNS", f"Zone transfer allowed on {ns_host}!", Severity.HIGH,
                            f"AXFR from {ns_host} exposes {len(names)} records.",
                            f"Deny zone transfer on {ns_host} for unauthorized servers.",
                            confidence=Confidence.CONFIRMED))
                        break
                except Exception:
                    pass
        except ImportError:
            pass

    except Exception:
        pass
    try:
        answers = resolver.resolve(domain, "DNSKEY")
        result.add(Finding("DNS", "DNSSEC configured", Severity.INFO,
            "DNSKEY records found — DNSSEC enabled.",
            confidence=Confidence.CONFIRMED))
    except Exception:
        result.add(Finding("DNS", "DNSSEC not configured", Severity.LOW,
            "DNSSEC not detected — DNS responses not protected from spoofing.",
            "Configure DNSSEC with your domain registrar.",
            confidence=Confidence.CONFIRMED))




def check_rate_limiting(session: requests.Session, url: str, result: ScanResult):
    """Rate limiting check."""
    base = url.rstrip("/")

    # Send 20 rapid requests
    statuses = []
    for _ in range(20):
        try:
            resp = session.get(base, timeout=5)
            statuses.append(resp.status_code)
        except requests.RequestException:
            statuses.append(0)

    rate_limited = any(s in (429, 503) for s in statuses)
    if rate_limited:
        result.add(Finding("Rate Limiting", "Rate limiting active", Severity.INFO,
            "Server limits request rate (429/503).",
            confidence=Confidence.FIRM))
    else:
        result.add(Finding("Rate Limiting", "No rate limiting detected", Severity.LOW,
            "20 rapid requests — all passed without throttling.",
            "Configure rate limiting for brute-force and DDoS protection.",
            confidence=Confidence.FIRM))

    # Check login endpoint
    login_paths = ["/login", "/api/login", "/auth/login", "/wp-login.php",
                   "/user/login", "/admin/login", "/account/login", "/signin"]
    for path in login_paths:
        try:
            resp = session.get(f"{base}{path}", timeout=5)
            if resp.status_code in (200, 302):
                post_statuses = []
                for _ in range(10):
                    try:
                        r = session.post(f"{base}{path}", timeout=5,
                            data={"username": "test", "password": "test"})
                        post_statuses.append(r.status_code)
                    except requests.RequestException:
                        pass
                if post_statuses and not any(s in (429, 503) for s in post_statuses):
                    result.add(Finding("Rate Limiting",
                        f"No rate limiting on {path}", Severity.MEDIUM,
                        f"Login form {path} does not limit attempts — brute-force possible.",
                        "Add rate limiting and CAPTCHA to the login page.",
                        confidence=Confidence.FIRM))
                break
        except requests.RequestException:
            pass




def check_http2(session: requests.Session, url: str, result: ScanResult):
    """HTTP/2 and compression check."""
    parsed = urllib.parse.urlparse(url)
    host = parsed.hostname
    port = parsed.port or (443 if parsed.scheme == "https" else 80)

    # HTTP/2 check via ALPN
    if parsed.scheme == "https":
        try:
            ctx = ssl.create_default_context()
            ctx.set_alpn_protocols(["h2", "http/1.1"])
            with socket.create_connection((host, port), timeout=5) as sock:
                with ctx.wrap_socket(sock, server_hostname=host) as ssock:
                    proto = ssock.selected_alpn_protocol()
                    if proto == "h2":
                        result.add(Finding("Performance", "HTTP/2 supported", Severity.INFO,
                            "Server supports HTTP/2.",
                            confidence=Confidence.CONFIRMED))
                    elif proto == "http/1.1":
                        result.add(Finding("Performance", "No HTTP/2", Severity.LOW,
                            "Server does not support HTTP/2.",
                            "Enable HTTP/2 for better performance.",
                            confidence=Confidence.CONFIRMED))
        except Exception:
            pass

    try:
        resp = session.get(url, headers={"Accept-Encoding": "gzip, deflate, br"}, timeout=10)
        encoding = resp.headers.get("Content-Encoding", "")
        if encoding:
            result.add(Finding("Performance", f"Compression: {encoding}", Severity.INFO,
                f"Server uses compression: {encoding}.",
                confidence=Confidence.CONFIRMED))
        else:
            result.add(Finding("Performance", "No compression", Severity.LOW,
                "Server does not compress responses (gzip/br).",
                "Enable gzip or Brotli compression to save bandwidth.",
                confidence=Confidence.CONFIRMED))
    except requests.RequestException:
        pass




def check_clickjacking(session: requests.Session, url: str, result: ScanResult):
    """Clickjacking protection check."""
    try:
        resp = session.get(url, timeout=10)
        xfo_raw = resp.headers.get("X-Frame-Options", "")
        xfo = xfo_raw.lower().strip()
        csp = resp.headers.get("Content-Security-Policy", "").lower()

        has_csp_fa = "frame-ancestors" in csp

        # XFO validation: must be exactly one of DENY, SAMEORIGIN, or ALLOW-FROM
        # Multiple values (e.g. "SAMEORIGIN, DENY") are invalid per RFC 7034
        has_xfo = xfo in ("deny", "sameorigin") or xfo.startswith("allow-from")
        if "," in xfo_raw:
            has_xfo = False
            result.add(Finding("Clickjacking", f"Invalid X-Frame-Options: multiple values",
                Severity.MEDIUM,
                f"X-Frame-Options contains multiple values ('{xfo_raw}'). "
                "Browsers may ignore the header entirely.",
                "Use only one value: DENY or SAMEORIGIN. Prefer CSP frame-ancestors.",
                evidence=f"X-Frame-Options: {xfo_raw}",
                confidence=Confidence.CONFIRMED))

        if not has_xfo and not has_csp_fa:
            hdrs = f"X-Frame-Options: {xfo_raw or '(missing)'}\nCSP: {csp[:120] or '(missing)'}"
            result.add(Finding("Clickjacking", "No clickjacking protection", Severity.MEDIUM,
                "Neither X-Frame-Options nor CSP frame-ancestors are configured.",
                "Add X-Frame-Options: DENY and/or CSP frame-ancestors 'self'.",
                evidence=hdrs,
                confidence=Confidence.CONFIRMED))
    except requests.RequestException:
        pass




def check_subdomains(url: str, result: ScanResult):
    """Common subdomain enumeration."""
    parsed = urllib.parse.urlparse(url)
    domain = parsed.hostname
    if domain.startswith("www."):
        domain = domain[4:]

    # Wildcard detection: resolve random nonexistent subdomain
    wildcard_ip = None
    try:
        random_sub = f"qscan-nonexist-{int(time.time())}"
        wildcard_ip = socket.gethostbyname(f"{random_sub}.{domain}")
    except socket.gaierror:
        pass

    if wildcard_ip:
        result.add(Finding("DNS", f"Wildcard DNS: *.{domain} -> {wildcard_ip}",
            Severity.LOW,
            f"Any subdomain of {domain} resolves to {wildcard_ip}. "
            "Wildcard DNS increases attack surface (subdomain takeover, phishing).",
            "Remove wildcard (*) DNS record if not needed.",
            confidence=Confidence.FIRM))
        return

    common_subdomains = [
        "www", "mail", "ftp", "admin", "api", "dev", "staging", "test",
        "beta", "demo", "portal", "vpn", "remote", "git", "gitlab",
        "jenkins", "ci", "cd", "monitoring", "grafana", "kibana",
        "elastic", "db", "database", "mysql", "postgres", "redis",
        "cache", "cdn", "static", "media", "files", "upload",
        "backup", "old", "new", "v2", "app", "mobile", "m",
        "shop", "store", "blog", "forum", "wiki", "docs",
        "support", "help", "status", "panel", "cpanel", "webmail",
        "ns1", "ns2", "mx", "smtp", "pop", "imap",
    ]

    found = []
    def check_sub(sub):
        fqdn = f"{sub}.{domain}"
        try:
            ip = socket.gethostbyname(fqdn)
            return (sub, fqdn, ip)
        except socket.gaierror:
            return None

    with ThreadPoolExecutor(max_workers=20) as pool:
        futures = {pool.submit(check_sub, s): s for s in common_subdomains}
        for future in as_completed(futures):
            r = future.result()
            if r:
                found.append(r)

    if found:
        found.sort()
        for sub, fqdn, ip in found:
            sev = Severity.INFO
            if sub in ("admin", "staging", "test", "dev", "beta", "demo",
                       "git", "gitlab", "jenkins", "backup", "old",
                       "db", "database", "mysql", "postgres", "redis",
                       "phpmyadmin", "cpanel", "panel"):
                sev = Severity.LOW
            result.add(Finding("Subdomains", f"{fqdn} -> {ip}", sev,
                f"Subdomain {fqdn} resolves to {ip}.",
                confidence=Confidence.FIRM if sev != Severity.INFO else Confidence.CONFIRMED))




def check_response_time(session: requests.Session, url: str, result: ScanResult):
    """Response time analysis."""
    times = []
    for _ in range(3):
        try:
            start = time.time()
            session.get(url, timeout=15)
            times.append(time.time() - start)
        except requests.RequestException:
            pass

    if times:
        avg = sum(times) / len(times)
        if avg > 5:
            result.add(Finding("Performance", f"Slow response: {avg:.1f}s", Severity.MEDIUM,
                f"Average response time {avg:.1f}s — server overloaded or poorly optimized.",
                "Check server load, add caching.",
                confidence=Confidence.CONFIRMED))
        elif avg > 2:
            result.add(Finding("Performance", f"Response: {avg:.1f}s", Severity.LOW,
                f"Average response time {avg:.1f}s.",
                "Consider optimization and CDN.",
                confidence=Confidence.CONFIRMED))
        else:
            result.add(Finding("Performance", f"Response: {avg:.2f}s", Severity.INFO,
                f"Average response time {avg:.2f}s — good.",
                confidence=Confidence.CONFIRMED))




def check_subdomain_takeover(session: requests.Session, url: str, result: ScanResult):
    """CNAME-based subdomain takeover check."""
    if not HAS_DNS:
        return

    parsed = urllib.parse.urlparse(url)
    domain = parsed.hostname

    takeover_signatures = {
        "github.io": "GitHub Pages",
        "herokuapp.com": "Heroku",
        "s3.amazonaws.com": "AWS S3",
        "cloudfront.net": "AWS CloudFront",
        "azurewebsites.net": "Azure",
        "trafficmanager.net": "Azure Traffic Manager",
        "blob.core.windows.net": "Azure Blob",
        "cloudapp.net": "Azure Cloud",
        "netlify.app": "Netlify",
        "vercel.app": "Vercel",
        "firebaseapp.com": "Firebase",
        "web.app": "Firebase",
        "pantheonsite.io": "Pantheon",
        "ghost.io": "Ghost",
        "myshopify.com": "Shopify",
        "zendesk.com": "Zendesk",
        "surge.sh": "Surge.sh",
        "bitbucket.io": "Bitbucket",
        "ghost.org": "Ghost",
        "helpjuice.com": "HelpJuice",
        "helpscoutdocs.com": "HelpScout",
        "readthedocs.io": "ReadTheDocs",
        "teamwork.com": "Teamwork",
        "thinkific.com": "Thinkific",
        "tilda.ws": "Tilda",
        "wordpress.com": "WordPress.com",
        "fly.dev": "Fly.io",
    }

    resolver = dns.resolver.Resolver()
    resolver.timeout = 5

    try:
        answers = resolver.resolve(domain, "CNAME")
        for rdata in answers:
            cname = str(rdata.target).rstrip(".")
            for pattern, service in takeover_signatures.items():
                if cname.endswith(pattern):
                    # Check if CNAME resolves
                    try:
                        resp = session.get(f"http://{cname}", timeout=5)
                        # Typical takeover response patterns
                        takeover_texts = [
                            "there isn't a github pages site here",
                            "no such app", "heroku | no such app",
                            "nosuchbucket", "the specified bucket does not exist",
                            "this site can't be reached", "not found",
                            "project not found", "page not found",
                        ]
                        body_lower = resp.text.lower()
                        if any(t in body_lower for t in takeover_texts):
                            result.add(Finding("Subdomain Takeover",
                                f"Possible takeover: {domain} -> {cname}", Severity.HIGH,
                                f"CNAME points to {service} ({cname}), "
                                "but resource does not exist — subdomain takeover possible.",
                                f"Remove the CNAME record or create the resource in {service}.",
                                confidence=Confidence.FIRM))
                    except requests.RequestException:
                        result.add(Finding("Subdomain Takeover",
                            f"CNAME to {service}: {cname} (not responding)", Severity.MEDIUM,
                            f"CNAME points to {service} ({cname}), but host is not responding.",
                            f"Check the resource status in {service}.",
                            confidence=Confidence.FIRM))
                    break
    except Exception:
        pass




def check_websocket(session: requests.Session, url: str, result: ScanResult):
    """WebSocket endpoint check."""
    parsed = urllib.parse.urlparse(url)
    host = parsed.hostname
    port = parsed.port or (443 if parsed.scheme == "https" else 80)

    ws_paths = ["/ws", "/websocket", "/socket.io/", "/sockjs/", "/cable",
                "/hub", "/signalr/", "/realtime", "/live"]

    for path in ws_paths:
        try:
            ws_url = f"{url.rstrip('/')}{path}"
            resp = session.get(ws_url, headers={
                "Upgrade": "websocket",
                "Connection": "Upgrade",
                "Sec-WebSocket-Key": "dGhlIHNhbXBsZSBub25jZQ==",
                "Sec-WebSocket-Version": "13",
            }, timeout=5)

            if resp.status_code == 101:
                result.add(Finding("WebSocket", f"WebSocket endpoint: {path}", Severity.INFO,
                    f"WebSocket available at {path}.",
                    confidence=Confidence.CONFIRMED))
                resp2 = session.get(ws_url, headers={
                    "Upgrade": "websocket",
                    "Connection": "Upgrade",
                    "Sec-WebSocket-Key": "dGhlIHNhbXBsZSBub25jZQ==",
                    "Sec-WebSocket-Version": "13",
                    "Origin": "https://evil.qscan.test",
                }, timeout=5)
                if resp2.status_code == 101:
                    result.add(Finding("WebSocket", f"WebSocket {path} without Origin check", Severity.MEDIUM,
                        "WebSocket accepts connections from any Origin.",
                        "Validate Origin during WebSocket handshake.",
                    confidence=Confidence.TENTATIVE))
                break
            elif resp.status_code == 400 and "upgrade" in resp.text.lower():
                result.add(Finding("WebSocket", f"WebSocket endpoint: {path}", Severity.INFO,
                    f"WebSocket detected at {path} (400 Bad Request — proper handshake needed).",
                    confidence=Confidence.CONFIRMED))
                break
        except requests.RequestException:
            pass




def check_user_enumeration(session: requests.Session, url: str, result: ScanResult):
    """User enumeration via login response differences."""
    base = url.rstrip("/")
    login_paths = ["/login", "/api/login", "/auth/login", "/signin",
                   "/account/login", "/user/login"]

    for path in login_paths:
        try:
            resp1 = session.post(f"{base}{path}", timeout=8,
                data={"username": "qscan_nonexist_user_12345", "password": "wrong"},
                allow_redirects=False)
            if _is_spa_response(resp1, result):
                continue
            resp2 = session.post(f"{base}{path}", timeout=8,
                data={"username": "admin", "password": "wrong"},
                allow_redirects=False)
            if _is_spa_response(resp2, result):
                continue

            if resp1.status_code == resp2.status_code and resp1.status_code in (200, 401, 403):
                ct = resp1.headers.get("Content-Type", "").lower()
                if "text/html" in ct and "application/json" not in ct:
                    continue
                body1 = resp1.text.lower()
                body2 = resp2.text.lower()
                size_diff = abs(len(body1) - len(body2))
                if body1 != body2 and size_diff > 20:
                    result.add(Finding("User Enumeration",
                        f"Different responses on {path}", Severity.MEDIUM,
                        "Responses differ for existing vs non-existing users — "
                        "allows account enumeration.",
                        "Use identical responses for invalid login/password.",
                    confidence=Confidence.TENTATIVE))
                    break
        except requests.RequestException:
            pass




def check_debug_endpoints(session: requests.Session, url: str, result: ScanResult):
    """Debug endpoints, server timing, profiler."""
    base = url.rstrip("/")
    debug_paths = [
        ("/_debug/", "Debug toolbar"),
        ("/_profiler/", "Symfony Profiler"),
        ("/__debug__/", "Django Debug Toolbar"),
        ("/debug/default/", "Yii Debug"),
        ("/telescope", "Laravel Telescope"),
        ("/horizon", "Laravel Horizon"),
        ("/_debugbar/", "Laravel Debugbar"),
        ("/actuator", "Spring Boot Actuator"),
        ("/actuator/health", "Spring Health"),
        ("/actuator/env", "Spring Env (secrets!)"),
        ("/actuator/configprops", "Spring Config"),
        ("/actuator/mappings", "Spring Mappings"),
        ("/console", "H2 Console / DevTools"),
        ("/metrics", "Metrics endpoint"),
        ("/health", "Health check"),
        ("/healthz", "Kubernetes health"),
        ("/readyz", "Kubernetes ready"),
        ("/env", "Environment endpoint"),
        ("/info", "Info endpoint"),
        ("/__diagnostics", "Diagnostics"),
        ("/diag", "Diagnostics"),
        ("/status", "Status page"),
    ]

    critical_paths = {"/actuator/env", "/actuator/configprops", "/env"}
    high_paths = {"/_profiler/", "/__debug__/", "/telescope", "/_debugbar/",
                  "/actuator", "/console", "/horizon"}
    # Generic paths — only count if response is JSON/plain
    soft_paths = {"/info", "/status", "/health", "/healthz", "/readyz", "/metrics"}

    # Fetch a known-404 page to detect soft-404 patterns
    import hashlib as _hl2
    try:
        r404 = session.get(f"{base}/_qz_nonexist_debug_check_7291/", timeout=8, allow_redirects=False)
        soft404_hash = _hl2.sha256(r404.content).hexdigest() if r404.status_code == 200 else None
        soft404_size = len(r404.content) if r404.status_code == 200 else 0
    except requests.RequestException:
        soft404_hash = None
        soft404_size = 0

    def check_debug(entry):
        path, desc = entry
        try:
            resp = session.get(f"{base}{path}", timeout=8, allow_redirects=False)
            return (path, desc, resp.status_code, len(resp.content), resp)
        except requests.RequestException:
            return None

    # Keywords that indicate a REAL debug/admin endpoint (not a user-facing page)
    debug_indicators = [
        "actuator", "spring", "boot", "profiler", "debugbar", "telescope",
        "horizon", "laravel", "symfony", "django", "h2 console", "h2-console",
        "database console", "bean", "configprops", "mappings", "env",
        "jvm", "heap", "thread", "dump", "trace", "logfile",
        "db_", "password", "secret", "token", "dsn", "jdbc",
    ]

    with ThreadPoolExecutor(max_workers=10) as pool:
        futures = {pool.submit(check_debug, e): e for e in debug_paths}
        for future in as_completed(futures):
            r = future.result()
            if r is None:
                continue
            path, desc, status, size, resp = r
            if status == 200 and size > 50:
                if _is_spa_response(resp, result):
                    continue
                # Soft-404 detection
                if soft404_hash and _hl2.sha256(resp.content).hexdigest() == soft404_hash:
                    continue
                if soft404_size > 500 and abs(size - soft404_size) / max(soft404_size, 1) < 0.05:
                    continue

                ct = resp.headers.get("Content-Type", "").lower()
                body_lower = resp.text[:2000].lower()

                # For generic paths (/info, /status) — only if response is not HTML
                if path in soft_paths and "text/html" in ct:
                    continue

                # For debug paths that return HTML: verify it's actually a debug page
                # not a legitimate user-facing page (like github.com/console)
                if "text/html" in ct:
                    # Large HTML pages with navigation are likely user-facing
                    if size > 10000 and any(nav in body_lower for nav in
                                             ["<nav", "navbar", "footer", "sign in",
                                              "log in", "sign up", "copyright"]):
                        continue
                    # Must contain at least one debug indicator
                    if not any(ind in body_lower for ind in debug_indicators):
                        continue

                if path in critical_paths:
                    sev = Severity.CRITICAL
                elif path in high_paths:
                    sev = Severity.HIGH
                else:
                    sev = Severity.MEDIUM
                result.add(Finding("Debug/Profiler", f"Accessible: {path} ({desc})", sev,
                    f"{desc} publicly accessible ({size} bytes).",
                    f"Restrict access to {path} or disable in production.",
                    confidence=Confidence.FIRM))

    # Server-Timing header
    try:
        resp = session.get(url, timeout=10)
        if "server-timing" in {k.lower() for k in resp.headers}:
            result.add(Finding("Debug/Profiler", "Server-Timing header", Severity.LOW,
                f"Server-Timing: {resp.headers['Server-Timing'][:100]} — exposes internal metrics.",
                "Remove Server-Timing in production.",
                confidence=Confidence.FIRM))
    except requests.RequestException:
        pass




def check_content_type(session: requests.Session, url: str, result: ScanResult):
    """Content-Type mismatch check."""
    base = url.rstrip("/")
    checks = [
        ("/robots.txt", "text/plain"),
        ("/sitemap.xml", "text/xml"),
        ("/favicon.ico", "image/"),
    ]
    for path, expected_prefix in checks:
        try:
            resp = session.get(f"{base}{path}", timeout=8)
            if resp.status_code == 200:
                ct = resp.headers.get("Content-Type", "")
                if ct and not ct.startswith(expected_prefix) and "application/xml" not in ct:
                    result.add(Finding("Content-Type", f"Wrong Content-Type for {path}", Severity.LOW,
                        f"{path} served as {ct}, expected {expected_prefix}*.",
                        f"Set correct Content-Type for {path}.",
                        confidence=Confidence.CONFIRMED))
        except requests.RequestException:
            pass




def check_backup_files(session: requests.Session, url: str, result: ScanResult):
    """Backup file discovery by extension."""
    parsed = urllib.parse.urlparse(url)
    path = parsed.path.rstrip("/") or "/index"
    base_url = f"{parsed.scheme}://{parsed.hostname}"
    if parsed.port:
        base_url += f":{parsed.port}"

    # Attempt to find backups of main files
    base_names = ["/index", path] if path != "/index" else ["/index"]
    extensions = [
        ".php.bak", ".php~", ".php.old", ".php.save", ".php.swp", ".php.swo",
        ".bak", ".old", ".save", ".orig", ".copy", ".tmp",
        ".html.bak", ".html~", ".html.old",
        ".asp.bak", ".aspx.bak",
        ".py.bak", ".py~",
        ".conf.bak", ".conf~",
        ".sql", ".sql.gz", ".sql.bz2",
    ]

    def check_backup(full_url):
        try:
            resp = session.get(full_url, timeout=8, allow_redirects=False)
            return (full_url, resp.status_code, len(resp.content), resp)
        except requests.RequestException:
            return None

    urls_to_check = []
    for bn in base_names:
        for ext in extensions:
            urls_to_check.append(f"{base_url}{bn}{ext}")

    with ThreadPoolExecutor(max_workers=10) as pool:
        futures = {pool.submit(check_backup, u): u for u in urls_to_check}
        for future in as_completed(futures):
            r = future.result()
            if r and r[1] == 200 and r[2] > 0:
                if _is_spa_response(r[3], result):
                    continue
                result.add(Finding("Backup Files", f"Found: {r[0]}", Severity.HIGH,
                    f"Backup file publicly accessible ({r[2]} bytes).",
                    f"Remove the backup file or restrict access.",
                    confidence=Confidence.FIRM))




def check_error_handling(session: requests.Session, url: str, result: ScanResult):
    """Error handling check."""
    base = url.rstrip("/")

    error_triggers = [
        (f"{base}/%ff", "Invalid URL encoding"),
        (f"{base}/a" * 500, "Very long URL"),
        (f"{base}/?{'a=1&' * 200}", "Many parameters"),
    ]

    # Keywords indicating verbose error disclosure
    debug_keywords = [
        "traceback", "stack trace", "stacktrace", "exception", "debug",
        "internal server error", "syntax error", "fatal error",
        "unhandled exception", "runtime error", "segfault",
        "at line ", "file \"", "in module", "raise ",
        "sqlstate", "mysql", "pg_query", "ora-", "odbc",
        "asp.net", "system.web", "server error in",
        "django.core", "flask.app", "express.js",
        "werkzeug", "laravel", "symfony",
    ]

    for test_url, desc in error_triggers:
        try:
            resp = session.get(test_url, timeout=8)
            if resp.status_code >= 400:
                body_lower = resp.text.lower()
                if len(resp.text) > 200 and any(kw in body_lower for kw in debug_keywords):
                    sev = Severity.HIGH if resp.status_code == 500 else Severity.MEDIUM
                    result.add(Finding("Error Handling",
                        f"{resp.status_code} with debug info ({desc})", sev,
                        f"URL ‘{desc}’ triggers {resp.status_code} with verbose error disclosure.",
                        "Configure error handling to show generic error pages without technical details.",
                        confidence=Confidence.FIRM))
        except requests.RequestException:
            pass




def check_robots_analysis(session: requests.Session, url: str, result: ScanResult):
    """robots.txt analysis: hidden paths, Disallow directives."""
    base = url.rstrip("/")
    try:
        resp = session.get(f"{base}/robots.txt", timeout=8)
        if resp.status_code != 200:
            return
        lines = resp.text.splitlines()
        disallowed = []
        for line in lines:
            line = line.strip()
            if line.lower().startswith("disallow:"):
                path = line.split(":", 1)[1].strip()
                if path and path != "/":
                    disallowed.append(path)

        interesting = [
            "admin", "api", "private", "secret", "backup", "config",
            "database", "db", "debug", "dev", "staging", "test",
            "internal", "panel", "dashboard", "manage", "cgi-bin",
            "wp-", "phpmyadmin", "cpanel", "login", "auth",
            "upload", "tmp", "temp", "log", "old", "hidden",
        ]

        hidden_paths = []
        seen_paths = set()
        for path in disallowed:
            # Dedup and skip wildcard paths
            if path in seen_paths or "*" in path:
                continue
            seen_paths.add(path)
            path_lower = path.lower()
            if any(kw in path_lower for kw in interesting):
                hidden_paths.append(path)
                # Check accessibility (max 10 paths)
                if len([f for f in result.findings
                        if f.category == "robots.txt" and "Hidden path" in f.title]) >= 10:
                    continue
                try:
                    r2 = session.get(f"{base}{path}", timeout=5, allow_redirects=False)
                    if r2.status_code == 200 and not _is_spa_response(r2, result):
                        result.add(Finding("robots.txt", f"Hidden path accessible: {path}",
                            Severity.LOW,
                            f"Path {path} is hidden in robots.txt but accessible (200 OK).",
                            f"If {path} should not be public, restrict access.",
                            confidence=Confidence.FIRM))
                except requests.RequestException:
                    pass

        if hidden_paths:
            result.add(Finding("robots.txt", f"Interesting Disallow paths ({len(hidden_paths)})",
                Severity.LOW,
                f"robots.txt hides: {', '.join(hidden_paths[:10])}.",
                "Verify hidden paths are truly inaccessible.",
                confidence=Confidence.CONFIRMED))

        if disallowed and not hidden_paths:
            paths_list = ", ".join(disallowed[:15])
            result.add(Finding("robots.txt", f"Disallow directives ({len(disallowed)})",
                Severity.INFO, f"robots.txt disallows: {paths_list}.",
                evidence=resp.text[:500],
                confidence=Confidence.CONFIRMED))

    except requests.RequestException:
        pass




def check_graphql(session: requests.Session, url: str, result: ScanResult):
    """GraphQL: introspection, IDE, debug."""
    base = url.rstrip("/")
    gql_paths = ["/graphql", "/graphql/", "/api/graphql", "/v1/graphql",
                 "/gql", "/query", "/api/query"]

    introspection_query = '{"query":"{ __schema { types { name } } }"}'

    for path in gql_paths:
        try:
            resp = session.get(f"{base}{path}", timeout=8)
            if resp.status_code != 200 or _is_spa_response(resp, result):
                continue
            body_lower = resp.text.lower()

            is_gql_ide = any(kw in body_lower for kw in [
                "graphiql", "graphql playground", "graphql ide",
                "explorerpage", "graphql-playground"
            ])
            # Skip if it's a large user-facing page (not a standalone IDE)
            if is_gql_ide and len(resp.text) > 20000 and any(
                    nav in body_lower for nav in ["<nav", "navbar", "footer", "sign in"]):
                is_gql_ide = False
            if is_gql_ide:
                result.add(Finding("GraphQL", f"GraphQL IDE accessible: {path}",
                    Severity.MEDIUM,
                    f"GraphQL IDE ({path}) is publicly accessible.",
                    "Disable GraphQL IDE in production.",
                    confidence=Confidence.FIRM))

            # Introspection check
            resp2 = session.post(f"{base}{path}", timeout=8,
                data=introspection_query,
                headers={"Content-Type": "application/json"})
            if resp2.status_code == 200:
                try:
                    data = resp2.json()
                    if "data" in data and "__schema" in str(data.get("data", {})):
                        types = data.get("data", {}).get("__schema", {}).get("types", [])
                        type_names = [t.get("name", "") for t in types[:10]
                                      if not t.get("name", "").startswith("__")]
                        result.add(Finding("GraphQL", f"Introspection enabled: {path}",
                            Severity.HIGH,
                            f"GraphQL introspection exposes API schema. Types: {', '.join(type_names[:5])}...",
                            "Disable introspection in production.",
                            confidence=Confidence.FIRM))
                        break
                except (ValueError, KeyError):
                    pass

            # Field suggestion disclosure
            resp3 = session.post(f"{base}{path}", timeout=8,
                data='{"query":"{ __nonexistent }"}',
                headers={"Content-Type": "application/json"})
            if resp3.status_code == 200:
                try:
                    data = resp3.json()
                    errors = data.get("errors", [])
                    for err in errors:
                        msg = err.get("message", "")
                        if "did you mean" in msg.lower() or "suggestion" in msg.lower():
                            result.add(Finding("GraphQL", f"GraphQL field suggestions: {path}",
                                Severity.LOW,
                                f"GraphQL suggests fields: {msg[:200]}.",
                                "Disable field suggestions in production.",
                                confidence=Confidence.FIRM))
                            break
                except (ValueError, KeyError):
                    pass

        except requests.RequestException:
            pass




def check_js_secrets(session: requests.Session, url: str, result: ScanResult):
    """Scan JavaScript for API keys and secrets."""
    try:
        resp = session.get(url, timeout=15)
    except requests.RequestException:
        return

    # Extract JS file references
    js_urls = set(re.findall(r'src=["\']([^"\']*\.js(?:\?[^"\']*)?)["\']', resp.text, re.IGNORECASE))
    parsed = urllib.parse.urlparse(url)
    base = f"{parsed.scheme}://{parsed.hostname}"
    if parsed.port:
        base += f":{parsed.port}"

    # Secret patterns: (regex, name, case_sensitive)
    # case_sensitive=True — pattern checked without IGNORECASE (for tokens with fixed prefix)
    secret_patterns = [
        (r'(?:api[_-]?key|apikey)\s*[:=]\s*["\']([a-zA-Z0-9_\-]{20,})["\']', "API Key", False),
        (r'(?:api[_-]?secret|apisecret)\s*[:=]\s*["\']([a-zA-Z0-9_\-]{20,})["\']', "API Secret", False),
        (r'(?:secret[_-]?key|secretkey)\s*[:=]\s*["\']([a-zA-Z0-9_\-]{20,})["\']', "Secret Key", False),
        (r'(?:access[_-]?token|accesstoken)\s*[:=]\s*["\']([a-zA-Z0-9_\-\.]{20,})["\']', "Access Token", False),
        (r'(?:private[_-]?key|privatekey)\s*[:=]\s*["\']([a-zA-Z0-9_\-]{20,})["\']', "Private Key", False),
        (r'(?:auth[_-]?token|authtoken)\s*[:=]\s*["\']([a-zA-Z0-9_\-\.]{20,})["\']', "Auth Token", False),
        (r'(?:password|passwd|pwd)\s*[:=]\s*["\']([^"\']{6,})["\']', "Password", False),
        (r'AKIA[0-9A-Z]{16}', "AWS Access Key ID", True),
        (r'(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36,}', "GitHub Token", True),
        (r'sk-[a-zA-Z0-9]{20,}', "OpenAI/Stripe Secret Key", True),
        (r'pk_(?:live|test)_[a-zA-Z0-9]{20,}', "Stripe Publishable Key", True),
        (r'sk_(?:live|test)_[a-zA-Z0-9]{20,}', "Stripe Secret Key", True),
        (r'AIza[0-9A-Za-z\-_]{35}', "Google API Key", True),
        (r'ya29\.[0-9A-Za-z\-_]+', "Google OAuth Token", True),
        (r'xox[bpors]-[0-9a-zA-Z\-]{10,}', "Slack Token", True),
        (r'https://hooks\.slack\.com/services/T[a-zA-Z0-9_]{8}/B[a-zA-Z0-9_]{8}/[a-zA-Z0-9_]{24}', "Slack Webhook", True),
        (r'[0-9]+-[0-9A-Za-z_]{32}\.apps\.googleusercontent\.com', "Google OAuth Client ID", True),
        (r'-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----', "Private Key (PEM)", True),
        (r'eyJ[A-Za-z0-9_\-]*\.eyJ[A-Za-z0-9_\-]*\.[A-Za-z0-9_\-]*', "JWT Token", True),
        (r'(?:mongodb(?:\+srv)?://)[^\s"\'<>]{10,}', "MongoDB Connection String", False),
        (r'(?:mysql|postgres|postgresql)://[^\s"\'<>]{10,}', "Database Connection String", False),
        (r'(?:redis)://[^\s"\'<>]{10,}', "Redis Connection String", False),
        (r'SG\.[a-zA-Z0-9_\-]{22}\.[a-zA-Z0-9_\-]{43}', "SendGrid API Key", True),
        (r'(?:bearer|token)\s+[a-zA-Z0-9_\-\.]{20,}', "Bearer Token", False),
    ]

    inline_scripts = re.findall(r'<script[^>]*>(.*?)</script>', resp.text, re.DOTALL | re.IGNORECASE)
    all_js_content = "\n".join(inline_scripts)

    # Check external JS (max 10)
    checked = 0
    for js_url in list(js_urls)[:10]:
        if not js_url.startswith(("http://", "https://")):
            js_url = urllib.parse.urljoin(url, js_url)
        try:
            js_resp = session.get(js_url, timeout=8)
            if js_resp.status_code == 200 and len(js_resp.text) < 5_000_000:
                all_js_content += "\n" + js_resp.text
                checked += 1
        except requests.RequestException:
            pass

    found_secrets = set()
    for pattern, name, case_sensitive in secret_patterns:
        flags = 0 if case_sensitive else re.IGNORECASE
        matches = re.findall(pattern, all_js_content, flags)
        if matches:
            for match in matches[:2]:
                # Skip obvious placeholders
                match_str = match if isinstance(match, str) else match[0] if match else ""
                if match_str and not any(p in match_str.lower() for p in [
                    "example", "placeholder", "your_", "xxx", "test", "dummy",
                    "change_me", "insert", "todo", "sample", "demo"
                ]):
                    key = f"{name}: {match_str[:30]}..."
                    if key not in found_secrets:
                        found_secrets.add(key)
                        sev = Severity.CRITICAL if name in (
                            "AWS Access Key ID", "Private Key (PEM)",
                            "Database Connection String", "MongoDB Connection String",
                            "Stripe Secret Key", "GitHub Token"
                        ) else Severity.HIGH
                        result.add(Finding("Secrets in JS", f"{name} in JavaScript", sev,
                            f"Found {name}: {match_str[:40]}...",
                            f"Immediately revoke and rotate {name}. Do not store secrets in client-side JS.",
                            evidence=f"Match: {match_str[:100]}",
                            confidence=Confidence.FIRM))

    if not found_secrets and checked > 0:
        result.add(Finding("Secrets in JS", f"Checked {checked} JS files", Severity.INFO,
            "No secrets found in JavaScript.",
            confidence=Confidence.CONFIRMED))




def check_403_bypass(session: requests.Session, url: str, result: ScanResult):
    """403 bypass techniques."""
    base = url.rstrip("/")
    # Find 403 pages from previous checks
    test_paths = ["/admin", "/admin/", "/dashboard", "/panel", "/internal"]

    for path in test_paths:
        try:
            resp = session.get(f"{base}{path}", timeout=8, allow_redirects=False)
            if resp.status_code != 403:
                continue

            bypasses = [
                ("Path: /./", f"{base}/{path.strip('/')}/./", None),
                ("Path: //", f"{base}//{path.strip('/')}", None),
                ("Path: ..;/", f"{base}/..;{path}", None),
                ("Path: %2e/", f"{base}/{path.strip('/').replace('/', '%2f')}", None),
                ("Path: .json", f"{base}{path}.json", None),
                ("Path: ;", f"{base}{path};", None),
                ("Path: ..%00/", f"{base}{path}..%00/", None),
                ("Path: %20/", f"{base}{path}%20/", None),
                ("Header: X-Original-URL", f"{base}/", {"X-Original-URL": path}),
                ("Header: X-Rewrite-URL", f"{base}/", {"X-Rewrite-URL": path}),
                ("Header: X-Forwarded-For: 127.0.0.1", f"{base}{path}", {"X-Forwarded-For": "127.0.0.1"}),
                ("Header: X-Custom-IP: 127.0.0.1", f"{base}{path}", {"X-Custom-IP-Authorization": "127.0.0.1"}),
                ("Header: X-Real-IP: 127.0.0.1", f"{base}{path}", {"X-Real-IP": "127.0.0.1"}),
                ("Method: POST", None, None),  # POST instead of GET
            ]

            for name, bypass_url, headers in bypasses:
                try:
                    if name.startswith("Method:"):
                        r = session.post(f"{base}{path}", timeout=5, allow_redirects=False)
                    else:
                        r = session.get(bypass_url, headers=headers or {}, timeout=5,
                                       allow_redirects=False)
                    if r.status_code == 200 and len(r.content) > 100:
                        if _is_spa_response(r, result):
                            continue
                        result.add(Finding("403 Bypass",
                            f"403 bypass on {path} ({name})", Severity.HIGH,
                            f"Path {path} returns 403, but is bypassed via {name}.",
                            f"Fix access control rules for {path}.",
                    confidence=Confidence.TENTATIVE))
                        break
                except requests.RequestException:
                    pass
        except requests.RequestException:
            pass




def check_hpp(session: requests.Session, url: str, result: ScanResult):
    """HTTP Parameter Pollution (HPP)."""
    base = url.rstrip("/")
    try:
        resp1 = session.get(f"{base}/?page=1", timeout=8)
        resp2 = session.get(f"{base}/?page=1&page=2", timeout=8)
        resp3 = session.get(f"{base}/?page=1&page=2&page=3", timeout=8)

        if resp1.status_code == resp2.status_code == 200:
            # If content differs — parameters are processed
            if resp1.text != resp2.text and resp2.text != resp3.text:
                result.add(Finding("HPP", "HTTP Parameter Pollution possible", Severity.LOW,
                    "Server handles duplicate parameters differently.",
                    "Ensure duplicate parameter handling is safe.",
                    confidence=Confidence.TENTATIVE))
    except requests.RequestException:
        pass




def check_jsonp(session: requests.Session, url: str, result: ScanResult):
    """JSONP endpoint detection."""
    base = url.rstrip("/")
    callback_params = ["callback", "jsonp", "cb", "jsonpcallback", "func"]
    api_paths = ["/api/", "/api/v1/", "/api/v2/", "/json/", "/data/", "/ajax/", "/"]

    for path in api_paths:
        for param in callback_params:
            try:
                resp = session.get(f"{base}{path}?{param}=qscan_test_cb", timeout=8)
                if resp.status_code == 200 and "qscan_test_cb(" in resp.text:
                    result.add(Finding("JSONP", f"JSONP endpoint: {path}?{param}=", Severity.MEDIUM,
                        f"JSONP endpoint found: {path}?{param}= — data leak via CSRF possible.",
                        "Replace JSONP with CORS. If JSONP is needed, restrict domains.",
                        confidence=Confidence.FIRM))
                    return
            except requests.RequestException:
                pass




def check_jwt(session: requests.Session, url: str, result: ScanResult):
    """JWT token analysis."""
    try:
        resp = session.get(url, timeout=10)
    except requests.RequestException:
        return

    # Search for JWT in headers and cookies
    jwt_pattern = r'eyJ[A-Za-z0-9_\-]*\.eyJ[A-Za-z0-9_\-]*\.[A-Za-z0-9_\-]*'
    locations = []

    for hdr, val in resp.headers.items():
        matches = re.findall(jwt_pattern, val)
        for m in matches:
            locations.append((f"Header: {hdr}", m))

    for cookie in resp.cookies:
        matches = re.findall(jwt_pattern, str(cookie.value))
        for m in matches:
            locations.append((f"Cookie: {cookie.name}", m))

    body_matches = re.findall(jwt_pattern, resp.text)
    for m in body_matches[:3]:
        locations.append(("Body", m))

    for location, token in locations[:5]:
        try:
            parts = token.split(".")
            header_b64 = parts[0] + "=" * (4 - len(parts[0]) % 4)
            header = json.loads(base64.urlsafe_b64decode(header_b64))

            alg = header.get("alg", "")
            typ = header.get("typ", "")

            issues = []
            if alg.lower() == "none":
                issues.append(("alg=none — unsigned!", Severity.CRITICAL))
            elif alg in ("HS256", "HS384", "HS512"):
                issues.append((f"algorithm {alg} (HMAC) — secret brute-force possible", Severity.LOW))
            if not alg:
                issues.append(("algorithm not specified", Severity.HIGH))

            payload_b64 = parts[1] + "=" * (4 - len(parts[1]) % 4)
            payload = json.loads(base64.urlsafe_b64decode(payload_b64))

            exp = payload.get("exp")
            if exp:
                if exp < time.time():
                    issues.append(("token expired", Severity.LOW))
                elif exp - time.time() > 86400 * 30:
                    issues.append((f"token valid for > 30 days", Severity.LOW))
            else:
                issues.append(("no exp field (never expires)", Severity.MEDIUM))

            sensitive_keys = {"password", "passwd", "secret", "ssn", "credit_card",
                    "card_number", "cvv", "pin"}
            found_sensitive = [k for k in payload.keys() if k.lower() in sensitive_keys]
            if found_sensitive:
                issues.append((f"sensitive data: {', '.join(found_sensitive)}", Severity.HIGH))

            if issues:
                for desc, sev in issues:
                    result.add(Finding("JWT", f"JWT ({location}): {desc}", sev,
                        f"JWT in {location}: {desc}. Header: alg={alg}, typ={typ}.",
                        "Use RS256/ES256, set a reasonable exp, do not store sensitive data in JWT.",
                        confidence=Confidence.FIRM))

        except Exception:
            pass




def check_git_exposure(session: requests.Session, url: str, result: ScanResult):
    """Extended .git repository exposure check."""
    base = url.rstrip("/")
    git_files = [
        (".git/HEAD", "HEAD reference"),
        (".git/config", "Git config (remote URL)"),
        (".git/description", "Repository description"),
        (".git/info/refs", "Branch references"),
        (".git/packed-refs", "Packed refs"),
        (".git/objects/info/packs", "Pack files"),
        (".git/logs/HEAD", "Reflog"),
        (".git/COMMIT_EDITMSG", "Last commit message"),
        (".git/info/exclude", "Git exclude"),
    ]

    exposed_files = []
    for path, desc in git_files:
        try:
            resp = session.get(f"{base}/{path}", timeout=5, allow_redirects=False)
            if resp.status_code == 200 and len(resp.content) > 0:
                exposed_files.append((path, desc, resp.text[:200]))
        except requests.RequestException:
            pass

    if len(exposed_files) >= 2:
        details = "; ".join(f"{p} ({d})" for p, d, _ in exposed_files[:5])
        accessible_paths = ", ".join(p for p, _, _ in exposed_files)
        result.add(Finding("Git Exposure", f".git repository exposed ({len(exposed_files)} files)",
            Severity.CRITICAL,
            f"Git repository exposed: {details}. "
            "Full source code cloning may be possible!",
            "Immediately restrict access to .git/. In nginx: location ~ /\\.git { deny all; }",
            evidence=f"Accessible: {accessible_paths[:200]}",
                    confidence=Confidence.FIRM))

        # Check config for remote URL
        for path, desc, content in exposed_files:
            if path == ".git/config" and "url = " in content:
                remote = re.search(r'url\s*=\s*(.+)', content)
                if remote:
                    result.add(Finding("Git Exposure", f"Remote URL: {remote.group(1).strip()}",
                        Severity.HIGH,
                        f"Git remote: {remote.group(1).strip()}.",
                        "Ensure the remote repository is private.",
                        evidence=f".git/config: url = {remote.group(1).strip()[:150]}",
                    confidence=Confidence.FIRM))




def check_technology(session: requests.Session, url: str, result: ScanResult):
    """Technology fingerprinting."""
    try:
        resp = session.get(url, timeout=15)
    except requests.RequestException:
        return

    body = resp.text
    headers = {k.lower(): v for k, v in resp.headers.items()}
    technologies = []
    header_sigs = {
        "x-powered-by": {
            "php": "PHP", "asp.net": "ASP.NET", "express": "Express.js",
            "next.js": "Next.js", "nuxt": "Nuxt.js", "django": "Django",
            "flask": "Flask", "ruby": "Ruby", "servlet": "Java Servlet",
        },
        "server": {
            "nginx": "nginx", "apache": "Apache", "iis": "IIS",
            "caddy": "Caddy", "litespeed": "LiteSpeed",
            "openresty": "OpenResty", "cloudflare": "Cloudflare",
            "gunicorn": "Gunicorn", "uvicorn": "Uvicorn",
            "daphne": "Daphne", "cowboy": "Cowboy (Erlang)",
            "tengine": "Tengine",
        },
    }
    for hdr, sigs in header_sigs.items():
        val = headers.get(hdr, "").lower()
        for sig, name in sigs.items():
            if sig in val:
                technologies.append(name)
    html_sigs = [
        (r'wp-content|wp-includes', "WordPress"),
        (r'Joomla!|/media/jui/', "Joomla"),
        (r'Drupal\.settings|drupal\.js', "Drupal"),
        (r'/bitrix/', "1C-Bitrix"),
        (r'react\.development|react\.production|__NEXT_DATA__', "React"),
        (r'ng-app|ng-controller|angular\.module', "AngularJS"),
        (r'vue\.runtime|__vue__|v-bind:|v-on:|v-if', "Vue.js"),
        (r'svelte', "Svelte"),
        (r'ember\.js|data-ember', "Ember.js"),
        (r'jquery[\./]', "jQuery"),
        (r'bootstrap[\./]|class="[^"]*\bcontainer\b[^"]*".*class="[^"]*\brow\b', "Bootstrap"),
        (r'tailwindcss|class="[^"]*\bflex\b[^"]*\bitems-center\b', "Tailwind CSS"),
        (r'materialize\.css|materialize\.min\.css', "Materialize"),
        (r'bulma[\./]|class="[^"]*\bcolumns\b[^"]*\bcolumn\b', "Bulma"),
        (r'_next/static|__NEXT_DATA__', "Next.js"),
        (r'/_nuxt/', "Nuxt.js"),
        (r'gatsby', "Gatsby"),
        (r'doctype\s+html.*\bamp\b|cdn\.ampproject\.org', "AMP"),
        (r'shopify\.com|Shopify\.theme', "Shopify"),
        (r'wix\.com|wixstatic\.com', "Wix"),
        (r'squarespace\.com|squarespace-cdn', "Squarespace"),
        (r'recaptcha/api|g-recaptcha', "Google reCAPTCHA"),
        (r'hcaptcha\.com', "hCaptcha"),
        (r'cloudflare\.com/ajax|cf-turnstile', "Cloudflare (JS)"),
        (r'google-analytics\.com|gtag\(|ga\(', "Google Analytics"),
        (r'googletagmanager\.com', "Google Tag Manager"),
        (r'facebook\.net/|fbq\(', "Facebook Pixel"),
        (r'mc\.yandex\.ru/metrika|ym\(', "Yandex Metrika"),
        (r'socket\.io', "Socket.IO"),
        (r'webpack', "Webpack"),
        (r'vite', "Vite"),
    ]

    for pattern, name in html_sigs:
        if re.search(pattern, body, re.IGNORECASE) and name not in technologies:
            technologies.append(name)
    cookie_sigs = {
        "phpsessid": "PHP", "jsessionid": "Java", "asp.net_sessionid": "ASP.NET",
        "_csrf": "CSRF framework", "laravel_session": "Laravel",
        "_rails": "Ruby on Rails", "connect.sid": "Express.js",
        "django": "Django", "flask": "Flask",
    }
    for cookie in resp.cookies:
        for sig, name in cookie_sigs.items():
            if sig in cookie.name.lower() and name not in technologies:
                technologies.append(name)

    if technologies:
        result.add(Finding("Technologies", f"Detected: {', '.join(technologies)}",
            Severity.INFO, f"Technology stack: {', '.join(technologies)}.",
            confidence=Confidence.CONFIRMED))




# Built-in database of known CVEs for common software.
# Format: "product": [(max_affected_version, severity, cve_id, description), ...]
# Versions compared as strings via _version_lt.
_CVE_DATABASE = {
    "nginx": [
        ("1.27.0", "MEDIUM", "CVE-2025-23419", "TLS session reuse bypass via SNI mismatch"),
        ("1.25.3", "HIGH", "CVE-2024-7347", "Vulnerability in ngx_http_mp4_module when processing MP4 files"),
        ("1.23.3", "MEDIUM", "CVE-2023-44487", "HTTP/2 Rapid Reset DoS"),
        ("1.21.5", "HIGH", "CVE-2022-41741", "Memory corruption in ngx_http_mp4_module"),
        ("1.17.2", "MEDIUM", "CVE-2019-9511", "HTTP/2 Data Dribble DoS"),
    ],
    "apache": [
        ("2.4.61", "HIGH", "CVE-2024-40725", "Source code disclosure via HTTP requests to CGI"),
        ("2.4.58", "HIGH", "CVE-2024-27316", "HTTP/2 DoS via memory leak in header processing"),
        ("2.4.55", "CRITICAL", "CVE-2023-25690", "HTTP Request Smuggling via mod_proxy"),
        ("2.4.51", "CRITICAL", "CVE-2021-44790", "Buffer overflow in mod_lua"),
        ("2.4.49", "CRITICAL", "CVE-2021-41773", "Path Traversal — arbitrary file read"),
    ],
    "openssl": [
        ("3.1.3", "MEDIUM", "CVE-2023-5678", "DoS when processing DH parameters"),
        ("3.0.10", "HIGH", "CVE-2023-3817", "DoS when checking DH keys"),
        ("1.1.1u", "HIGH", "CVE-2023-2650", "DoS when processing ASN.1 identifiers"),
        ("1.1.1k", "HIGH", "CVE-2021-3449", "NULL pointer dereference — crash during TLS handshake"),
        ("1.0.2u", "CRITICAL", "CVE-2020-1967", "Segfault when processing signature_algorithms (DoS)"),
    ],
    "openssh": [
        ("9.7", "CRITICAL", "CVE-2024-6387", "regreSSHion — unauthenticated RCE"),
        ("9.5", "HIGH", "CVE-2023-51385", "Command injection via hostnames in ProxyCommand"),
        ("8.9", "MEDIUM", "CVE-2023-38408", "RCE via ssh-agent forwarding"),
    ],
    "php": [
        ("8.3.7", "CRITICAL", "CVE-2024-4577", "CGI argument injection — RCE"),
        ("8.2.18", "HIGH", "CVE-2024-2756", "Cookie protection bypass via __Host-/__Secure- prefix"),
        ("8.1.27", "HIGH", "CVE-2024-2757", "DoS via mb_encode_mimeheader (infinite loop)"),
        ("8.0.30", "HIGH", "CVE-2023-3247", "Info leak via SOAP WSDL cache"),
        ("7.4.33", "CRITICAL", "CVE-2022-31626", "Buffer overflow in MySQL PDO driver"),
    ],
    "iis": [
        ("10.0", "HIGH", "CVE-2022-21907", "HTTP Protocol Stack RCE"),
        ("10.0", "HIGH", "CVE-2021-31166", "HTTP.sys RCE (wormable)"),
    ],
    "express": [
        ("4.19.1", "MEDIUM", "CVE-2024-29041", "Open Redirect via URL parsing"),
        ("4.17.3", "HIGH", "CVE-2022-24999", "Prototype Pollution via qs (query string)"),
    ],
    "tomcat": [
        ("10.1.24", "HIGH", "CVE-2024-34750", "DoS via HTTP/2 header processing"),
        ("9.0.85", "HIGH", "CVE-2024-23672", "DoS via WebSocket connections"),
        ("9.0.71", "HIGH", "CVE-2023-28708", "Cookie without Secure flag on HTTPS redirect"),
        ("8.5.85", "CRITICAL", "CVE-2023-28709", "DoS when processing HTTP Trailer headers"),
    ],
    "django": [
        ("5.0.2", "MEDIUM", "CVE-2024-24680", "DoS via intcomma template filter"),
        ("4.2.10", "HIGH", "CVE-2024-27351", "ReDoS via regex in django.utils.text.Truncator"),
        ("4.1.13", "HIGH", "CVE-2023-46695", "DoS via long strings in django.utils.text.Truncator"),
    ],
    "flask": [
        ("2.3.1", "HIGH", "CVE-2023-30861", "Cookie leak between sessions when proxy caching"),
    ],
    "rails": [
        ("7.0.8", "HIGH", "CVE-2023-44487", "HTTP/2 Rapid Reset DoS via Puma"),
        ("7.0.4", "CRITICAL", "CVE-2023-22795", "ReDoS in Action Dispatch"),
        ("6.1.7", "CRITICAL", "CVE-2023-22794", "SQL injection via Sanitize"),
    ],
    "wordpress": [
        ("6.4.2", "CRITICAL", "CVE-2024-31210", "RCE via POP chain"),
        ("6.3.1", "HIGH", "CVE-2023-39999", "Sensitive data disclosure via REST API"),
        ("6.1.1", "CRITICAL", "CVE-2023-22622", "SSRF via pingback and DNS rebinding"),
    ],
    # NOTE: jquery CVEs moved to check_js_framework_cve() to avoid double-counting
    "litespeed": [
        ("6.1", "HIGH", "CVE-2024-4978", "RCE via malicious .htaccess parsing"),
        ("5.4.12", "HIGH", "CVE-2023-28731", "Path traversal via URI encoding"),
    ],
    "envoy": [
        ("1.29.1", "HIGH", "CVE-2024-23326", "HTTP/2 stream crash via RST_STREAM flood"),
        ("1.27.2", "HIGH", "CVE-2023-44487", "HTTP/2 Rapid Reset DoS"),
    ],
    "varnish": [
        ("7.4.2", "HIGH", "CVE-2024-30156", "HTTP/2 request smuggling via crafted headers"),
        ("7.3.0", "MEDIUM", "CVE-2023-44487", "HTTP/2 Rapid Reset DoS"),
    ],
    "next.js": [
        ("14.1.0", "HIGH", "CVE-2024-34351", "SSRF via Server Actions redirect"),
        ("13.4.5", "MEDIUM", "CVE-2023-46298", "DoS via large request body"),
    ],
    "node.js": [
        ("21.6.1", "HIGH", "CVE-2024-22019", "DoS via HTTP chunked encoding"),
        ("20.11.0", "HIGH", "CVE-2024-21892", "Privilege escalation via Linux capabilities"),
        ("18.19.0", "HIGH", "CVE-2023-44487", "HTTP/2 Rapid Reset DoS"),
    ],
    "gunicorn": [
        ("22.0.0", "HIGH", "CVE-2024-1135", "HTTP Request Smuggling via Transfer-Encoding"),
    ],
    "spring": [
        ("6.1.5", "CRITICAL", "CVE-2024-22262", "URL parsing SSRF bypass"),
        ("5.3.32", "CRITICAL", "CVE-2024-22243", "URL parsing bypass via userinfo"),
        ("5.3.17", "CRITICAL", "CVE-2022-22965", "Spring4Shell — RCE via data binding"),
    ],
}


def _extract_versions(result: ScanResult, session: requests.Session,
                      url: str) -> list:
    """Extract (product, version) pairs from headers and findings."""
    versions = []
    # From check_headers findings (Leak: Server: nginx/1.x)
    for f in result.findings:
        if "Leaks:" in f.title:
            # Parse "Leak: Server: nginx/1.25.0" or "Leak: X-Powered-By: PHP/8.2.1"
            val = f.title.split(":", 2)[-1].strip()
            # nginx/1.25.0, Apache/2.4.57, PHP/8.1.2
            match = re.search(r'(?i)(nginx|apache|openssl|openssh|php|iis|express|'
                              r'tomcat|django|flask|rails|wordpress|jquery|'
                              r'litespeed|envoy|varnish|next\.js|node\.js|'
                              r'gunicorn|spring)'
                              r'[/\s]+(\d[\d.a-z]*)', val)
            if match:
                product = match.group(1).lower()
                version = match.group(2)
                # Skip derivative products (KPHP is not PHP, HHVM is not PHP)
                prefix_start = max(0, match.start() - 1)
                prefix = val[prefix_start:match.start()]
                if product == "php" and prefix.lower() in ("k", "h"):
                    continue
                versions.append((product, version))

    # From HTML (jQuery CVEs moved to check_js_framework_cve)
    try:
        resp = session.get(url, timeout=10)
        # WordPress generator meta tag: <meta name="generator" content="WordPress 6.3.1">
        wp_match = re.search(r'<meta[^>]+content="WordPress\s+([\d.]+)"', resp.text)
        if wp_match:
            versions.append(("wordpress", wp_match.group(1)))
    except requests.RequestException:
        pass

    return versions


def check_cve(session: requests.Session, url: str, result: ScanResult):
    """Check detected software versions against known CVEs."""
    versions = _extract_versions(result, session, url)

    seen = set()
    for product, version in versions:
        if product in seen:
            continue
        cve_list = _CVE_DATABASE.get(product, [])
        for max_ver, sev_str, cve_id, desc in cve_list:
            if _version_lt(version, max_ver) or version == max_ver:
                severity = Severity[sev_str]
                result.add(Finding(
                    "Known CVEs",
                    f"{cve_id}: {product} {version}",
                    severity,
                    f"{desc}. Affected versions up to {max_ver}.",
                    f"Update {product} to the latest version.",
                    evidence=f"Detected via: {product}/{version} in HTTP headers",
                    confidence=Confidence.FIRM
                ))
                seen.add(product)
                break  # one (most severe) CVE per product




def check_cache_poisoning(session: requests.Session, url: str, result: ScanResult):
    """Web cache poisoning via unkeyed headers."""
    unkeyed_headers = [
        ("X-Forwarded-Host", "evil.qscan.test"),
        ("X-Forwarded-Scheme", "nothttps"),
        ("X-Forwarded-Port", "1234"),
        ("X-Original-URL", "/qscan-cache-test"),
        ("X-Rewrite-URL", "/qscan-cache-test"),
        ("X-Host", "evil.qscan.test"),
    ]

    try:
        resp_base = session.get(url, timeout=10)
        base_body = resp_base.text

        for header, value in unkeyed_headers:
            try:
                resp = session.get(url, headers={header: value}, timeout=10)
                if resp.status_code == 200:
                    # Check if our input is reflected
                    if value in resp.text and value not in base_body:
                        result.add(Finding("Cache Poisoning",
                            f"Unkeyed header reflected: {header}", Severity.HIGH,
                            f"Header {header}: {value} is reflected in response. "
                            "If the response is cached, cache poisoning is possible.",
                            f"Do not use {header} header in response generation, or add it to the cache key.",
                    confidence=Confidence.TENTATIVE))
                    # Check for differences in response headers
                    if resp.headers.get("Location", "") != resp_base.headers.get("Location", ""):
                        if value in resp.headers.get("Location", ""):
                            result.add(Finding("Cache Poisoning",
                                f"Redirect poisoning: {header}", Severity.HIGH,
                                f"{header} header affects Location redirect.",
                                f"Do not trust {header} header for redirect generation.",
                    confidence=Confidence.TENTATIVE))
            except requests.RequestException:
                pass
    except requests.RequestException:
        pass




def check_session_fixation(session: requests.Session, url: str, result: ScanResult):
    """Session fixation check."""
    base = url.rstrip("/")
    login_paths = ["/login", "/signin", "/auth/login", "/user/login", "/account/login"]

    for path in login_paths:
        try:
            resp = session.get(f"{base}{path}", timeout=8)
            if resp.status_code != 200:
                continue

            # Get session cookie before login
            pre_cookies = {c.name: c.value for c in resp.cookies}
            session_cookies = {k: v for k, v in pre_cookies.items()
                             if any(s in k.lower() for s in ["session", "sid", "sess", "phpsessid", "jsessionid"])}

            if session_cookies:
                # Try to set session via URL
                for cookie_name in session_cookies:
                    try:
                        resp2 = session.get(
                            f"{base}{path}?{cookie_name}=fixated_session_value",
                            timeout=8
                        )
                        for c in resp2.cookies:
                            if c.name == cookie_name and c.value == "fixated_session_value":
                                result.add(Finding("Session Fixation",
                                    f"Session fixation via URL: {cookie_name}", Severity.HIGH,
                                    f"Session cookie {cookie_name} can be set via URL parameter.",
                                    "Generate a new session ID after authentication.",
                    confidence=Confidence.TENTATIVE))
                    except requests.RequestException:
                        pass
            break
        except requests.RequestException:
            pass




def check_verb_tampering(session: requests.Session, url: str, result: ScanResult):
    """Authorization bypass via HTTP verb tampering."""
    base = url.rstrip("/")
    protected_paths = ["/admin", "/dashboard", "/panel", "/api/admin",
                       "/manage", "/internal", "/settings"]

    for path in protected_paths:
        try:
            get_resp = session.get(f"{base}{path}", timeout=5, allow_redirects=False)
            if get_resp.status_code not in (401, 403, 302):
                continue

            for method in ["POST", "PUT", "PATCH", "HEAD", "OPTIONS"]:
                try:
                    resp = session.request(method, f"{base}{path}", timeout=5, allow_redirects=False)
                    if resp.status_code == 200 and len(resp.content) > 100:
                        result.add(Finding("Verb Tampering",
                            f"Auth bypass on {path} via {method}", Severity.HIGH,
                            f"GET on {path} returns {get_resp.status_code}, "
                            f"but {method} returns 200.",
                            f"Protect {path} for all HTTP methods.",
                    confidence=Confidence.TENTATIVE))
                        break
                except requests.RequestException:
                    pass
        except requests.RequestException:
            pass




def check_cloud_storage(session: requests.Session, url: str, result: ScanResult):
    """Open cloud storage check (S3, GCS, Azure Blob)."""
    try:
        resp = session.get(url, timeout=15)
    except requests.RequestException:
        return

    body = resp.text

    # Search for cloud storage references
    patterns = [
        (r'(https?://[\w\-]+\.s3[\.\-][\w\-]+\.amazonaws\.com[^\s"\'<>]*)', "AWS S3"),
        (r'(https?://s3[\.\-][\w\-]+\.amazonaws\.com/[\w\-]+[^\s"\'<>]*)', "AWS S3"),
        (r'(https?://[\w\-]+\.s3\.amazonaws\.com[^\s"\'<>]*)', "AWS S3"),
        (r'(https?://storage\.googleapis\.com/[\w\-]+[^\s"\'<>]*)', "Google Cloud Storage"),
        (r'(https?://[\w\-]+\.storage\.googleapis\.com[^\s"\'<>]*)', "Google Cloud Storage"),
        (r'(https?://[\w\-]+\.blob\.core\.windows\.net[^\s"\'<>]*)', "Azure Blob Storage"),
    ]

    found_buckets = set()
    for pattern, cloud in patterns:
        matches = re.findall(pattern, body)
        for bucket_url in matches:
            if bucket_url not in found_buckets:
                found_buckets.add(bucket_url)
                # Check if listing is accessible
                try:
                    bucket_base = re.match(r'(https?://[^/]+)', bucket_url).group(1)
                    r = session.get(bucket_base, timeout=5)
                    if r.status_code == 200 and ("<ListBucket" in r.text or "<EnumerationResults" in r.text):
                        result.add(Finding("Cloud Storage",
                            f"{cloud} bucket listing open", Severity.HIGH,
                            f"Bucket {bucket_base} allows file listing.",
                            "Restrict public bucket listing access.",
                            confidence=Confidence.FIRM))
                    elif r.status_code == 200:
                        result.add(Finding("Cloud Storage",
                            f"{cloud} bucket public: {bucket_base}", Severity.LOW,
                            f"Bucket {bucket_base} is publicly accessible.",
                            "Verify public access is intentional.",
                            confidence=Confidence.FIRM))
                except requests.RequestException:
                    pass




def check_wp_plugins(session: requests.Session, url: str, result: ScanResult):
    """WordPress plugin and theme enumeration."""
    base = url.rstrip("/")

    # Verify this is WordPress
    try:
        resp = session.get(f"{base}/wp-login.php", timeout=8, allow_redirects=False)
        if resp.status_code not in (200, 302):
            return
    except requests.RequestException:
        return

    popular_plugins = [
        "contact-form-7", "woocommerce", "akismet", "jetpack",
        "yoast-seo", "wordfence", "elementor", "classic-editor",
        "wpforms-lite", "updraftplus", "really-simple-ssl",
        "all-in-one-seo-pack", "wp-super-cache", "w3-total-cache",
        "redirection", "duplicate-post", "wp-mail-smtp",
        "advanced-custom-fields", "tinymce-advanced", "tablepress",
        "better-wp-security", "sucuri-scanner", "ithemes-security",
        "google-analytics-for-wordpress", "google-sitemap-generator",
        "wp-optimize", "autoptimize", "wp-fastest-cache",
        "regenerate-thumbnails", "cookie-notice", "gdpr-cookie-consent",
        "loginizer", "limit-login-attempts-reloaded",
        "user-role-editor", "members", "bbpress", "buddypress",
        "easy-digital-downloads", "gravity-forms", "ninja-forms",
        "formidable", "wp-file-manager", "file-manager",
    ]

    found_plugins = []

    def check_plugin(plugin):
        try:
            resp = session.get(f"{base}/wp-content/plugins/{plugin}/readme.txt",
                             timeout=5, allow_redirects=False)
            if resp.status_code == 200:
                ver_match = re.search(r'Stable tag:\s*([\d.]+)', resp.text, re.IGNORECASE)
                version = ver_match.group(1) if ver_match else "unknown"
                return (plugin, version)
            resp2 = session.get(f"{base}/wp-content/plugins/{plugin}/",
                     timeout=5, allow_redirects=False)
            if resp2.status_code in (200, 403):
                return (plugin, "detected")
        except requests.RequestException:
            pass
        return None

    with ThreadPoolExecutor(max_workers=15) as pool:
        futures = {pool.submit(check_plugin, p): p for p in popular_plugins}
        for future in as_completed(futures):
            r = future.result()
            if r:
                found_plugins.append(r)

    if found_plugins:
        found_plugins.sort()
        for plugin, version in found_plugins:
            sev = Severity.MEDIUM if plugin in (
                "wp-file-manager", "file-manager", "loginizer"
            ) else Severity.LOW
            result.add(Finding("WP Plugins", f"Plugin: {plugin} (v{version})", sev,
                f"WordPress plugin {plugin} version {version}.",
                f"Ensure {plugin} is updated to the latest version.",
                confidence=Confidence.FIRM))

    try:
        resp = session.get(base, timeout=10)
        theme_match = re.search(r'/wp-content/themes/([\w\-]+)/', resp.text)
        if theme_match:
            theme = theme_match.group(1)
            result.add(Finding("WP Themes", f"Theme: {theme}", Severity.INFO,
                f"WordPress theme: {theme}.",
                confidence=Confidence.FIRM))
            # Check style.css for version
            try:
                style_resp = session.get(
                    f"{base}/wp-content/themes/{theme}/style.css", timeout=5)
                if style_resp.status_code == 200:
                    ver = re.search(r'Version:\s*([\d.]+)', style_resp.text)
                    if ver:
                        result.add(Finding("WP Themes", f"Theme {theme} version: {ver.group(1)}",
                            Severity.INFO, f"Theme {theme} version {ver.group(1)}.",
                            confidence=Confidence.FIRM))
            except requests.RequestException:
                pass
    except requests.RequestException:
        pass




def check_xxe(session: requests.Session, url: str, result: ScanResult):
    """XXE via XML endpoints."""
    base = url.rstrip("/")
    xml_paths = ["/api/", "/api/v1/", "/upload", "/import", "/xmlrpc.php",
                 "/soap", "/wsdl", "/xml", "/rss", "/feed", "/sitemap.xml"]

    xxe_payload = '''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE foo [
  <!ENTITY xxe SYSTEM "file:///etc/hostname">
]>
<root><data>&xxe;</data></root>'''

    # Baseline: send same POST without XXE payload to compare
    clean_xml = '<?xml version="1.0"?><root><data>test</data></root>'

    for path in xml_paths:
        try:
            resp = session.post(f"{base}{path}", timeout=8,
                data=xxe_payload,
                headers={"Content-Type": "application/xml"})

            if resp.status_code == 200:
                if _is_spa_response(resp, result):
                    continue
                ct = resp.headers.get("Content-Type", "").lower()
                if "text/html" in ct:
                    continue
                body = resp.text.lower()

                # Strong indicators: /etc/passwd content pattern
                if "root:x:0:0:" in body or "root:x:0:" in body:
                    result.add(Finding("XXE", f"XXE vulnerability: {path}", Severity.CRITICAL,
                        f"XML External Entity injection at {path} allows reading system files.",
                        "Disable external entities in the XML parser.",
                    confidence=Confidence.TENTATIVE))
                    return

                # Compare with clean XML to detect entity resolution
                try:
                    clean_resp = session.post(f"{base}{path}", timeout=8,
                        data=clean_xml,
                        headers={"Content-Type": "application/xml"})
                    if (clean_resp.status_code == 200
                            and len(resp.text) != len(clean_resp.text)
                            and abs(len(resp.text) - len(clean_resp.text)) > 10):
                        if any(kw in body for kw in ["/bin/bash", "/bin/sh", "daemon:x:", "nobody:x:"]):
                            result.add(Finding("XXE", f"XXE vulnerability: {path}", Severity.CRITICAL,
                                f"XML External Entity injection at {path} allows reading system files.",
                                "Disable external entities in the XML parser.",
                    confidence=Confidence.TENTATIVE))
                            return
                except requests.RequestException:
                    pass
        except requests.RequestException:
            pass




def check_prototype_pollution(session: requests.Session, url: str, result: ScanResult):
    """Prototype Pollution via URL parameters."""
    base = url.rstrip("/")
    payloads = [
        "__proto__[qscan]=polluted",
        "constructor[prototype][qscan]=polluted",
        "__proto__.qscan=polluted",
    ]

    for payload in payloads:
        try:
            resp = session.get(f"{base}/?{payload}", timeout=8)
            if "polluted" in resp.text and "qscan" in resp.text:
                # Verify this is not just parameter reflection
                resp2 = session.get(f"{base}/?normal_param=polluted", timeout=8)
                if "polluted" not in resp2.text:
                    result.add(Finding("Prototype Pollution",
                        "Prototype Pollution via URL", Severity.HIGH,
                        f"Payload {payload} affects server response.",
                        "Sanitize object keys. Use Object.create(null).",
                    confidence=Confidence.TENTATIVE))
                    return
        except requests.RequestException:
            pass




def check_email_security(session: requests.Session, url: str, result: ScanResult):
    """MTA-STS, BIMI and extended email security."""
    parsed = urllib.parse.urlparse(url)
    domain = parsed.hostname
    if domain.startswith("www."):
        domain = domain[4:]

    # MTA-STS
    try:
        resp = session.get(f"https://mta-sts.{domain}/.well-known/mta-sts.txt", timeout=8)
        if resp.status_code == 200 and "version:" in resp.text.lower():
            result.add(Finding("Email Security", "MTA-STS configured", Severity.INFO,
                "MTA-STS policy found.",
                confidence=Confidence.CONFIRMED))
        else:
            result.add(Finding("Email Security", "No MTA-STS", Severity.LOW,
                "MTA-STS not configured — no protection against SMTP downgrade attacks.",
                f"Configure MTA-STS: create mta-sts.{domain} with .well-known/mta-sts.txt",
                confidence=Confidence.CONFIRMED))
    except requests.RequestException:
        result.add(Finding("Email Security", "No MTA-STS", Severity.LOW,
            "MTA-STS not configured.",
            f"Configure MTA-STS to protect email connections.",
            confidence=Confidence.CONFIRMED))

    # BIMI
    if HAS_DNS:
        try:
            resolver = dns.resolver.Resolver()
            resolver.timeout = 5
            answers = resolver.resolve(f"default._bimi.{domain}", "TXT")
            for rdata in answers:
                txt = rdata.to_text().strip('"')
                if "v=bimi1" in txt.lower():
                    result.add(Finding("Email Security", "BIMI configured", Severity.INFO,
                        f"BIMI: {txt[:100]}",
                        confidence=Confidence.CONFIRMED))
        except Exception:
            pass




def check_breach(session: requests.Session, url: str, result: ScanResult):
    """BREACH attack check."""
    try:
        resp = session.get(url, headers={"Accept-Encoding": "gzip, deflate"}, timeout=10)
        encoding = resp.headers.get("Content-Encoding", "")
        ct = resp.headers.get("Content-Type", "")

        # BREACH: compression + HTTPS + input reflection + CSRF token
        if encoding in ("gzip", "deflate", "br"):
            parsed = urllib.parse.urlparse(url)
            if parsed.scheme == "https":
                # Check input reflection
                resp2 = session.get(f"{url}?q=qscan_breach_test", timeout=8)
                if "qscan_breach_test" in resp2.text:
                    # Check for secret presence (CSRF token, etc.)
                    if any(kw in resp2.text.lower() for kw in [
                        "csrf", "_token", "authenticity_token", "xsrf"
                    ]):
                        result.add(Finding("BREACH", "BREACH attack possible", Severity.MEDIUM,
                            "HTTPS + compression + input reflection + secret in body = BREACH conditions.",
                            "Disable compression for pages with secrets or use randomization.",
                            confidence=Confidence.FIRM))
    except requests.RequestException:
        pass




def check_deserialization(session: requests.Session, url: str, result: ScanResult):
    """Insecure deserialization indicators."""
    try:
        resp = session.get(url, timeout=10)
    except requests.RequestException:
        return

    # Check cookies for serialized data
    for cookie in resp.cookies:
        val = cookie.value
        # PHP serialized
        if re.match(r'^[aOsib]:\d+:', val) or 'a:' in val:
            result.add(Finding("Deserialization",
                f"PHP serialization in cookie '{cookie.name}'", Severity.HIGH,
                f"Cookie {cookie.name} contains PHP-serialized data.",
                "Use JSON instead of serialize(). Sign and validate cookies.",
                confidence=Confidence.FIRM))
        # Java serialized (base64 of aced0005)
        elif val.startswith("rO0AB") or val.startswith("H4sIA"):
            result.add(Finding("Deserialization",
                f"Java serialization in cookie '{cookie.name}'", Severity.HIGH,
                f"Cookie {cookie.name} contains Java-serialized data.",
                "Do not deserialize user input. Use a class whitelist.",
                confidence=Confidence.FIRM))
        # .NET ViewState
        elif cookie.name == "__VIEWSTATE":
            if "mac" not in str(resp.headers).lower():
                result.add(Finding("Deserialization",
                    "ViewState without MAC", Severity.HIGH,
                    "__VIEWSTATE may be unsigned — RCE possible.",
                    "Enable enableViewStateMac in web.config.",
                    confidence=Confidence.FIRM))

    # ViewState in HTML
    body = resp.text
    vs_match = re.search(r'__VIEWSTATE[^>]*value="([^"]+)"', body)
    if vs_match:
        vs_val = vs_match.group(1)
        # Unsigned if starts with /w (base64 without MAC)
        result.add(Finding("Deserialization", "ViewState detected", Severity.INFO,
            f"ASP.NET ViewState found (length: {len(vs_val)}).",
            confidence=Confidence.CONFIRMED))




def check_ssrf_params(session: requests.Session, url: str, result: ScanResult):
    """SSRF parameter check."""
    base = url.rstrip("/")
    ssrf_params = [
        "url", "uri", "src", "source", "href", "link", "redirect",
        "feed", "host", "site", "domain", "proxy", "page",
        "callback", "return", "data", "load", "fetch", "request",
        "img", "image", "file", "path", "target", "ref",
    ]

    # Use DNS rebinding-safe address
    canary = "http://169.254.169.254/latest/meta-data/"  # AWS metadata

    for param in ssrf_params:
        try:
            resp = session.get(f"{base}/?{param}={urllib.parse.quote(canary)}",
                             timeout=8, allow_redirects=False)

            if resp.status_code in (200, 301, 302):
                body = resp.text.lower()
                # If response contains AWS metadata
                if any(kw in body for kw in [
                    "ami-id", "instance-id", "security-credentials",
                    "instance-type", "local-hostname"
                ]):
                    result.add(Finding("SSRF", f"SSRF via ?{param}=", Severity.CRITICAL,
                        f"Parameter {param} allows access to internal resources (AWS metadata).",
                        f"Do not use parameter {param} for server-side HTTP requests. "
                        "Configure a domain whitelist.",
                    confidence=Confidence.TENTATIVE))
                    return
        except requests.RequestException:
            pass




def check_command_injection(session: requests.Session, url: str, result: ScanResult):
    """Command Injection via common parameters."""
    base = url.rstrip("/")
    params = ["cmd", "exec", "command", "ping", "query", "host",
              "ip", "domain", "port", "dir", "path", "file",
              "filename", "name", "search", "input"]

    # Safe payloads — non-destructive, detection only
    payloads = [
        (";echo qscan_cmd_test", "qscan_cmd_test", "echo"),
        ("|echo qscan_cmd_test", "qscan_cmd_test", "pipe echo"),
        ("$(echo qscan_cmd_test)", "qscan_cmd_test", "subshell"),
        ("`echo qscan_cmd_test`", "qscan_cmd_test", "backtick"),
        ("& echo qscan_cmd_test &", "qscan_cmd_test", "ampersand"),
    ]

    for param in params:
        for payload, expected, name in payloads:
            try:
                resp = session.get(
                    f"{base}/?{param}={urllib.parse.quote(payload)}", timeout=8)
                if _is_spa_response(resp, result):
                    continue
                if expected in resp.text:
                    # Check it's not just URL-reflected (parameter echoed in links/hrefs)
                    idx = resp.text.find(expected)
                    context = resp.text[max(0, idx - 100):idx + len(expected) + 50]
                    url_encoded = urllib.parse.quote(expected)
                    if url_encoded in context or f"%{expected}" in context.lower() or \
                       f"={expected}" in context or f"/{expected}" in context:
                        continue  # URL reflection, not command execution

                    # Exclude if just reflection
                    resp2 = session.get(
                        f"{base}/?{param}={urllib.parse.quote('qscan_no_cmd')}", timeout=8)
                    if expected not in resp2.text:
                        result.add(Finding("Command Injection",
                            f"OS command injection: ?{param}= ({name})", Severity.CRITICAL,
                            f"Parameter {param} allows OS command execution.",
                            "Never pass user input to shell commands. "
                            "Use safe APIs.",
                            confidence=Confidence.TENTATIVE))
                        return
            except requests.RequestException:
                pass




def check_nosql_injection(session: requests.Session, url: str, result: ScanResult):
    """NoSQL Injection (MongoDB, CouchDB)."""
    base = url.rstrip("/")

    # MongoDB operator injection
    mongo_payloads = [
        ("username[$ne]=&password[$ne]=", "MongoDB $ne operator"),
        ("username[$gt]=&password[$gt]=", "MongoDB $gt operator"),
        ("username[$regex]=.*&password[$regex]=.*", "MongoDB $regex"),
        ('{"$gt":""}', "MongoDB JSON $gt"),
    ]

    login_paths = ["/login", "/api/login", "/auth/login", "/api/auth",
                   "/api/users/login", "/api/v1/auth"]

    for path in login_paths:
        try:
            resp_normal = session.post(f"{base}{path}", timeout=8,
                data={"username": "admin", "password": "wrong123"},
                allow_redirects=False)
        except requests.RequestException:
            continue

        if resp_normal.status_code not in (200, 401, 403, 302):
            continue
        if _is_spa_response(resp_normal, result):
            continue

        for payload, name in mongo_payloads:
            try:
                if payload.startswith("{"):
                    resp = session.post(f"{base}{path}", timeout=8,
                        json={"username": {"$gt": ""}, "password": {"$gt": ""}},
                        allow_redirects=False)
                else:
                    resp = session.post(f"{base}{path}?{payload}", timeout=8,
                        allow_redirects=False)

                # If response differs (successful login)
                if resp.status_code != resp_normal.status_code:
                    if resp.status_code in (200, 302) and resp_normal.status_code in (401, 403):
                        result.add(Finding("NoSQL Injection",
                            f"NoSQL injection at {path} ({name})", Severity.CRITICAL,
                            f"Operator {name} alters authentication behavior.",
                            "Validate input types. Reject MongoDB operators in user input.",
                            confidence=Confidence.TENTATIVE))
                        return
            except requests.RequestException:
                pass




def check_lfi_rfi(session: requests.Session, url: str, result: ScanResult):
    """LFI/RFI via common parameters."""
    base = url.rstrip("/")
    include_params = ["page", "file", "include", "template", "doc",
                      "document", "view", "content", "module", "plugin",
                      "load", "read", "lang", "language", "layout",
                      "theme", "skin", "action", "display"]

    lfi_payloads = [
        ("../../../../etc/passwd", "root:", "LFI /etc/passwd"),
        ("..\\..\\..\\..\\windows\\win.ini", "[fonts]", "LFI win.ini"),
        ("....//....//....//etc/passwd", "root:", "LFI double-dot bypass"),
        ("..%252f..%252f..%252fetc/passwd", "root:", "LFI double-encode"),
        ("/etc/passwd%00", "root:", "LFI null byte"),
        # PD9waHA = base64("<?php"), more specific than just "PD"
        ("php://filter/convert.base64-encode/resource=index", "PD9waHA", "PHP filter wrapper"),
    ]

    for param in include_params:
        # First check if parameter is accepted at all
        try:
            resp_base = session.get(f"{base}/?{param}=index", timeout=8)
            if resp_base.status_code not in (200, 500):
                continue
        except requests.RequestException:
            continue

        for payload, indicator, name in lfi_payloads:
            try:
                resp = session.get(
                    f"{base}/?{param}={urllib.parse.quote(payload)}", timeout=8)
                if _is_spa_response(resp, result):
                    continue
                if indicator in resp.text:
                    # Verify indicator wasn't already in the baseline response
                    if indicator in resp_base.text:
                        continue
                    result.add(Finding("LFI/RFI",
                        f"Local File Inclusion: ?{param}= ({name})", Severity.CRITICAL,
                        f"Parameter {param} allows reading local files.",
                        "Use a file whitelist. Do not pass user input to include/require.",
                        confidence=Confidence.TENTATIVE))
                    return
            except requests.RequestException:
                pass




def check_request_smuggling(session: requests.Session, url: str, result: ScanResult):
    """HTTP Request Smuggling indicators (CL.TE / TE.CL)."""
    parsed = urllib.parse.urlparse(url)
    host = parsed.hostname
    port = parsed.port or (443 if parsed.scheme == "https" else 80)
    use_ssl = parsed.scheme == "https"

    # Check if server accepts Transfer-Encoding
    try:
        resp = session.post(url, timeout=8,
            headers={
                "Transfer-Encoding": "chunked",
                "Content-Length": "0",
            },
            data="0\r\n\r\n")

        # If server accepts both headers
        resp2 = session.post(url, timeout=8,
            headers={
                "Content-Length": "5",
                "Transfer-Encoding": "chunked",
            },
            data="0\r\n\r\n")

        # Behavioral differences may indicate smuggling
        if resp.status_code != resp2.status_code:
            result.add(Finding("Request Smuggling",
                "Different behavior CL vs TE", Severity.MEDIUM,
                f"Server responds differently to CL={resp.status_code} vs TE={resp2.status_code}. "
                "HTTP Request Smuggling vulnerability possible.",
                "Ensure frontend and backend handle CL/TE consistently.",
                    confidence=Confidence.TENTATIVE))

        # Obfuscated Transfer-Encoding
        obfuscated_te = [
            "Transfer-Encoding: xchunked",
            "Transfer-Encoding : chunked",
            "Transfer-Encoding: chunked\r\nTransfer-Encoding: x",
            "Transfer-Encoding:\tchunked",
        ]
        for te in obfuscated_te:
            try:
                r = session.post(url, timeout=5,
                    headers={"Transfer-Encoding": te.split(": ", 1)[1] if ": " in te else "chunked"},
                    data="0\r\n\r\n")
                if r.status_code == 200:
                    result.add(Finding("Request Smuggling",
                        "Server accepts obfuscated Transfer-Encoding", Severity.LOW,
                        f"Non-standard TE header accepted. May be used for smuggling.",
                        "Normalize Transfer-Encoding handling.",
                    confidence=Confidence.TENTATIVE))
                    break
            except requests.RequestException:
                pass

    except requests.RequestException:
        pass








def check_sni(url: str, result: ScanResult):
    """SNI mismatch check."""
    parsed = urllib.parse.urlparse(url)
    if parsed.scheme != "https":
        return

    host = parsed.hostname
    port = parsed.port or 443

    try:
        # Connect without SNI
        ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
        ctx.check_hostname = False
        ctx.verify_mode = ssl.CERT_NONE
        with socket.create_connection((host, port), timeout=5) as sock:
            with ctx.wrap_socket(sock) as ssock:  # no server_hostname = no SNI
                cert = ssock.getpeercert(binary_form=True)
                # Parse CN from binary cert
                # Compare with SNI connection
                pass  # Server returns some cert without SNI

        # Connect with wrong SNI
        ctx2 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
        ctx2.check_hostname = False
        ctx2.verify_mode = ssl.CERT_NONE
        with socket.create_connection((host, port), timeout=5) as sock2:
            with ctx2.wrap_socket(sock2, server_hostname="wrong.qscan.test") as ssock2:
                cert2 = ssock2.getpeercert()
                # If server returns our domain's cert for wrong SNI
                sans = [v for _, v in cert2.get("subjectAltName", [])]
                if host in sans or any(s.startswith("*.") and host.endswith(s[1:]) for s in sans):
                    result.add(Finding("SSL/TLS", "Server ignores SNI", Severity.LOW,
                        f"Server returns certificate for {host} regardless of SNI.",
                        "Configure virtual hosts with separate certificates.",
                        confidence=Confidence.CONFIRMED))
    except Exception:
        pass




def check_cookie_scope(session: requests.Session, url: str, result: ScanResult):
    """Cookie scope check (overly broad domain)."""
    parsed = urllib.parse.urlparse(url)
    host = parsed.hostname
    if not host:
        return

    try:
        resp = session.get(url, timeout=10)

        # Collect all Set-Cookie headers properly
        set_cookies = []
        if hasattr(resp.raw, 'headers'):
            # urllib3 HTTPHeaderDict — multiple Set-Cookie headers
            if hasattr(resp.raw.headers, 'getlist'):
                set_cookies = resp.raw.headers.getlist("Set-Cookie")
            elif hasattr(resp.raw.headers, 'items'):
                set_cookies = [v for k, v in resp.raw.headers.items()
                              if k.lower() == "set-cookie"]
        if not set_cookies:
            sc = resp.headers.get("Set-Cookie")
            if sc:
                set_cookies = [sc]

        for sc in set_cookies:
            name = sc.split("=")[0].strip()
            domain_match = re.search(r'domain=\.?([^;]+)', sc, re.IGNORECASE)
            if domain_match:
                cookie_domain = domain_match.group(1).strip().lower()
                host_lower = host.lower()
                # Check if cookie domain is broader than necessary
                # e.g. cookie domain=.example.com on sub.example.com
                parts = host_lower.split(".")
                if len(parts) > 2:
                    parent = ".".join(parts[-2:])
                    if cookie_domain in (parent, f".{parent}"):
                        result.add(Finding("Cookie Scope",
                            f"Cookie '{name}' with broad domain={cookie_domain}", Severity.LOW,
                            f"Cookie accessible on all subdomains of {cookie_domain}.",
                            "Restrict cookie domain to the specific subdomain if possible.",
                            confidence=Confidence.CONFIRMED))
                # Also detect cross-TLD sharing (domain != host)
                elif cookie_domain != host_lower and \
                     cookie_domain != f".{host_lower}" and \
                     host_lower.endswith(cookie_domain.lstrip(".")):
                    result.add(Finding("Cookie Scope",
                        f"Cookie '{name}' scoped to parent domain {cookie_domain}", Severity.LOW,
                        f"Cookie '{name}' is scoped to {cookie_domain}, broader than {host_lower}.",
                        "Restrict cookie domain to the specific host if possible.",
                        confidence=Confidence.CONFIRMED))
    except requests.RequestException:
        pass




def check_timing_enum(session: requests.Session, url: str, result: ScanResult):
    """Timing-based user enumeration."""
    base = url.rstrip("/")
    login_paths = ["/login", "/api/login", "/auth/login", "/signin"]

    for path in login_paths:
        try:
            resp = session.get(f"{base}{path}", timeout=8)
            if resp.status_code != 200:
                continue

            times_nonexist = []
            times_admin = []

            for _ in range(3):
                start = time.time()
                try:
                    session.post(f"{base}{path}", timeout=8,
                        data={"username": f"qscan_fake_{int(time.time())}", "password": "wrong"})
                    times_nonexist.append(time.time() - start)
                except requests.RequestException:
                    pass

                start = time.time()
                try:
                    session.post(f"{base}{path}", timeout=8,
                        data={"username": "admin", "password": "wrong"})
                    times_admin.append(time.time() - start)
                except requests.RequestException:
                    pass

            if times_nonexist and times_admin:
                avg_nonexist = sum(times_nonexist) / len(times_nonexist)
                avg_admin = sum(times_admin) / len(times_admin)
                diff = abs(avg_admin - avg_nonexist)

                if diff > 0.5:
                    faster = "existing user" if avg_admin > avg_nonexist else "non-existing user"
                    result.add(Finding("Timing Enumeration",
                        f"Response time difference on {path}: {diff:.2f}s", Severity.MEDIUM,
                        f"Response for {faster} user is noticeably slower ({avg_admin:.2f}s vs {avg_nonexist:.2f}s).",
                        "Use consistent processing time for all login inputs.",
                    confidence=Confidence.TENTATIVE))
            break
        except requests.RequestException:
            pass




def check_openapi(session: requests.Session, url: str, result: ScanResult):
    """Swagger/OpenAPI spec analysis."""
    base = url.rstrip("/")
    spec_paths = [
        "/swagger.json", "/swagger/v1/swagger.json", "/swagger.yaml",
        "/openapi.json", "/openapi.yaml", "/api-docs",
        "/api-docs/swagger.json", "/v1/swagger.json", "/v2/swagger.json",
        "/api/swagger.json", "/api/openapi.json",
        "/swagger-ui/swagger.json", "/docs/swagger.json",
    ]

    for path in spec_paths:
        try:
            resp = session.get(f"{base}{path}", timeout=8)
            if resp.status_code != 200:
                continue

            try:
                spec = resp.json()
            except ValueError:
                continue

            # This is an OpenAPI/Swagger spec
            title = spec.get("info", {}).get("title", "API")
            version = spec.get("info", {}).get("version", "?")
            paths = spec.get("paths", {})

            result.add(Finding("OpenAPI", f"Spec accessible: {path}", Severity.MEDIUM,
                f"OpenAPI/Swagger for '{title}' v{version} publicly accessible ({len(paths)} endpoints).",
                "Restrict API spec access in production.",
                confidence=Confidence.FIRM))

            unprotected = []
            sensitive_endpoints = []
            for ep_path, methods in paths.items():
                for method, details in methods.items():
                    if method.lower() in ("get", "post", "put", "delete", "patch"):
                        # No security requirement
                        security = details.get("security", spec.get("security"))
                        if not security:
                            unprotected.append(f"{method.upper()} {ep_path}")
                        # Sensitive endpoints
                        ep_lower = ep_path.lower()
                        if any(kw in ep_lower for kw in [
                            "admin", "user", "password", "token", "secret",
                            "config", "setting", "delete", "upload", "file"
                        ]):
                            sensitive_endpoints.append(f"{method.upper()} {ep_path}")

            if unprotected:
                result.add(Finding("OpenAPI", f"Unauthenticated endpoints ({len(unprotected)})",
                    Severity.MEDIUM,
                    f"Examples: {', '.join(unprotected[:5])}.",
                    "Add security requirements to all endpoints.",
                    confidence=Confidence.FIRM))

            if sensitive_endpoints:
                result.add(Finding("OpenAPI", f"Sensitive endpoints ({len(sensitive_endpoints)})",
                    Severity.LOW,
                    f"Sensitive APIs: {', '.join(sensitive_endpoints[:5])}.",
                    "Ensure proper authorization for sensitive endpoints.",
                    confidence=Confidence.FIRM))

            return
        except requests.RequestException:
            pass




def check_wsdl(session: requests.Session, url: str, result: ScanResult):
    """WSDL detection (SOAP API)."""
    base = url.rstrip("/")
    wsdl_paths = [
        "/?wsdl", "/service?wsdl", "/api?wsdl",
        "/ws?wsdl", "/services?wsdl", "/soap?wsdl",
        "/Service.asmx?wsdl", "/WebService.asmx?wsdl",
    ]

    for path in wsdl_paths:
        try:
            resp = session.get(f"{base}{path}", timeout=8)
            if resp.status_code != 200:
                continue
            if _is_spa_response(resp, result):
                continue
            body_lower = resp.text.lower()
            if "<wsdl:" not in body_lower and not ("definitions" in body_lower and "service" in body_lower):
                continue
            operations = re.findall(r'<wsdl:operation name="([^"]+)"', resp.text, re.IGNORECASE)
            if not operations:
                operations = re.findall(r'operation name="([^"]+)"', resp.text, re.IGNORECASE)
            result.add(Finding("WSDL", f"WSDL accessible: {path}", Severity.MEDIUM,
                f"SOAP WSDL exposes API: {', '.join(operations[:10]) if operations else 'operations detected'}.",
                "Restrict WSDL access in production.",
                confidence=Confidence.FIRM))
            return
        except requests.RequestException:
            pass




def check_banner_grab(url: str, result: ScanResult):
    """Banner grabbing on open ports."""
    parsed = urllib.parse.urlparse(url)
    host = parsed.hostname

    ports_to_grab = {
        21: b"",  # FTP sends banner on connect
        22: b"",  # SSH sends banner on connect
        25: b"EHLO qscan\r\n",
        110: b"",
        143: b"",
        3306: b"",  # MySQL sends banner
    }

    for port, probe in ports_to_grab.items():
        try:
            with socket.create_connection((host, port), timeout=3) as sock:
                if probe:
                    sock.sendall(probe)
                banner = sock.recv(1024).decode("utf-8", errors="replace").strip()
                if not banner or len(banner) <= 3:
                    continue
                version_info = re.findall(r'[\d]+\.[\d]+\.[\d]+', banner)
                sev = Severity.MEDIUM if version_info else Severity.LOW
                result.add(Finding("Banner Grabbing",
                    f"Port {port}: {banner[:80]}", sev,
                    f"Service banner on port {port} reveals info: {banner[:120]}.",
                    f"Mask or remove service banner on port {port}.",
                    confidence=Confidence.CONFIRMED))
        except (socket.timeout, ConnectionRefusedError, OSError):
            pass




def check_reporting_headers(session: requests.Session, url: str, result: ScanResult):
    """Expect-CT, Report-To, NEL, Report-URI headers."""
    try:
        resp = session.get(url, timeout=10)
    except requests.RequestException:
        return

    headers_lower = {k.lower(): v for k, v in resp.headers.items()}

    # Expect-CT
    if "expect-ct" not in headers_lower:
        parsed = urllib.parse.urlparse(url)
        if parsed.scheme == "https":
            result.add(Finding("Reporting", "No Expect-CT", Severity.INFO,
                "Expect-CT not set (header deprecated since 2021, Chrome ignores it).",
                "",
                confidence=Confidence.CONFIRMED))

    # Report-To
    if "report-to" in headers_lower:
        result.add(Finding("Reporting", "Report-To configured", Severity.INFO,
            f"Report-To: {headers_lower['report-to'][:100]}",
            confidence=Confidence.CONFIRMED))
    else:
        result.add(Finding("Reporting", "No Report-To", Severity.INFO,
            "Report-To header not set — no issue report aggregation.",
            confidence=Confidence.CONFIRMED))

    # NEL (Network Error Logging)
    if "nel" in headers_lower:
        result.add(Finding("Reporting", "NEL configured", Severity.INFO,
            f"Network Error Logging: {headers_lower['nel'][:100]}",
            confidence=Confidence.CONFIRMED))

    # CSP report-uri
    csp = headers_lower.get("content-security-policy", "")
    if csp and "report-uri" not in csp and "report-to" not in csp:
        result.add(Finding("Reporting", "CSP without report-uri/report-to", Severity.LOW,
            "CSP has no report-uri/report-to — violations not logged.",
            "Add report-uri or report-to to CSP for violation monitoring.",
            confidence=Confidence.CONFIRMED))




def check_dom_xss(session: requests.Session, url: str, result: ScanResult):
    """DOM-based XSS indicators in JavaScript."""
    try:
        resp = session.get(url, timeout=15)
    except requests.RequestException:
        return

    body = resp.text

    # DOM XSS sources
    sources = [
        (r'document\.location', "document.location"),
        (r'document\.URL', "document.URL"),
        (r'document\.documentURI', "document.documentURI"),
        (r'document\.referrer', "document.referrer"),
        (r'window\.location', "window.location"),
        (r'location\.hash', "location.hash"),
        (r'location\.search', "location.search"),
        (r'location\.href', "location.href"),
        (r'window\.name', "window.name"),
        (r'document\.cookie', "document.cookie (read)"),
        (r'localStorage\.\w+', "localStorage"),
        (r'sessionStorage\.\w+', "sessionStorage"),
        (r'URLSearchParams', "URLSearchParams"),
        (r'history\.pushState', "history.pushState"),
        (r'postMessage', "postMessage"),
    ]

    # DOM XSS sinks
    sinks = [
        (r'\.innerHTML\s*=', "innerHTML"),
        (r'\.outerHTML\s*=', "outerHTML"),
        (r'document\.write\s*\(', "document.write()"),
        (r'document\.writeln\s*\(', "document.writeln()"),
        (r'eval\s*\(', "eval()"),
        (r'setTimeout\s*\(\s*["\']', "setTimeout(string)"),
        (r'setInterval\s*\(\s*["\']', "setInterval(string)"),
        (r'Function\s*\(', "Function()"),
        (r'\.insertAdjacentHTML\s*\(', "insertAdjacentHTML"),
        (r'\.src\s*=', ".src assignment"),
        (r'\.href\s*=', ".href assignment"),
        (r'jquery.*\.html\s*\(', "jQuery.html()"),
        (r'\$\s*\(\s*["\'][^"\']*[<>]', "jQuery selector with HTML"),
    ]

    found_sources = set()
    found_sinks = set()

    for pattern, name in sources:
        if re.search(pattern, body, re.IGNORECASE):
            found_sources.add(name)

    for pattern, name in sinks:
        if re.search(pattern, body, re.IGNORECASE):
            found_sinks.add(name)

    if found_sources and found_sinks:
        dangerous_combos = found_sources & {"location.hash", "location.search",
            "document.referrer", "window.name", "postMessage"}
        dangerous_sinks = found_sinks & {"innerHTML", "document.write()", "eval()",
            "outerHTML", "jQuery.html()"}

        if dangerous_combos and dangerous_sinks:
            result.add(Finding("DOM XSS",
                f"Potential DOM XSS: {len(dangerous_combos)} sources + {len(dangerous_sinks)} sinks",
                Severity.MEDIUM,
                f"Sources: {', '.join(dangerous_combos)}. Sinks: {', '.join(dangerous_sinks)}.",
                "Check if data flows from source to sink without sanitization.",
                confidence=Confidence.TENTATIVE))
        elif len(found_sinks) > 3:
            result.add(Finding("DOM XSS",
                f"Multiple DOM sinks ({len(found_sinks)})", Severity.LOW,
                f"Sinks: {', '.join(list(found_sinks)[:5])}.",
                "Minimize use of innerHTML, document.write, eval.",
                confidence=Confidence.TENTATIVE))




def check_mass_assignment(session: requests.Session, url: str, result: ScanResult):
    """Mass assignment vulnerability indicators."""
    base = url.rstrip("/")
    # Check API endpoints for mass assignment
    api_paths = ["/api/user", "/api/users", "/api/profile", "/api/account",
                 "/api/v1/user", "/api/v1/users", "/api/me",
                 "/user/profile", "/account", "/profile"]

    privilege_fields = {
        "role": "admin",
        "is_admin": True,
        "isAdmin": True,
        "admin": True,
        "is_superuser": True,
        "is_staff": True,
        "permissions": ["admin"],
        "group": "admin",
        "level": 999,
        "type": "admin",
        "verified": True,
        "is_verified": True,
        "email_verified": True,
        "active": True,
    }

    for path in api_paths:
        try:
            resp_get = session.get(f"{base}{path}", timeout=8)
            if resp_get.status_code not in (200, 401):
                continue
            if _is_spa_response(resp_get, result):
                continue

            # If 200, check JSON response
            if resp_get.status_code == 200:
                try:
                    data = resp_get.json()
                    if isinstance(data, dict):
                        # Look for privilege-related fields
                        priv_fields = [k for k in data.keys()
                                      if k.lower() in {f.lower() for f in privilege_fields}]
                        if priv_fields:
                            result.add(Finding("Mass Assignment",
                                f"Privilege fields in {path}: {', '.join(priv_fields)}",
                                Severity.LOW,
                                f"API {path} returns privilege fields: {', '.join(priv_fields)}. "
                                "If PUT/PATCH accepts these fields, mass assignment is possible.",
                                "Use a whitelist of fields the user is allowed to modify.",
                    confidence=Confidence.TENTATIVE))
                            break
                except (ValueError, AttributeError):
                    pass
        except requests.RequestException:
            pass




def check_password_policy(session: requests.Session, url: str, result: ScanResult):
    """Password policy check on registration pages."""
    base = url.rstrip("/")
    reg_paths = ["/register", "/signup", "/sign-up", "/registration",
                 "/api/register", "/api/signup", "/api/users",
                 "/account/register", "/user/register", "/join"]

    for path in reg_paths:
        try:
            resp = session.get(f"{base}{path}", timeout=8)
            if resp.status_code != 200:
                continue
            if _is_spa_response(resp, result):
                continue

            weak_passwords = [
                ("123456", "very weak password"),
                ("password", "dictionary password"),
                ("a", "1-character password"),
            ]

            for weak_pw, desc in weak_passwords:
                try:
                    r = session.post(f"{base}{path}", timeout=8,
                        data={
                            "username": f"qscan_test_{int(time.time())}",
                            "email": f"qscan_{int(time.time())}@test.invalid",
                            "password": weak_pw,
                            "password_confirm": weak_pw,
                            "password2": weak_pw,
                            "confirm_password": weak_pw,
                        },
                        allow_redirects=False)

                    if _is_spa_response(r, result):
                        continue
                    if r.status_code in (302, 201):
                        result.add(Finding("Password Policy",
                            f"Accepted {desc}: '{weak_pw}'", Severity.MEDIUM,
                            f"Registration form {path} accepts {desc}.",
                            "Set minimum password length (8+), require mixed character types.",
                    confidence=Confidence.TENTATIVE))
                        return
                    if r.status_code == 200:
                        body = r.text.lower()
                        if any(kw in body for kw in [
                            "success", "registered", "welcome", "account created",
                            "registration successful", "account created"
                        ]):
                            result.add(Finding("Password Policy",
                                f"Accepted {desc}: '{weak_pw}'", Severity.MEDIUM,
                                f"Registration form {path} accepts {desc}.",
                                "Strengthen the password policy.",
                    confidence=Confidence.TENTATIVE))
                            return
                except requests.RequestException:
                    pass
            break
        except requests.RequestException:
            pass




def check_idor(session: requests.Session, url: str, result: ScanResult):
    """IDOR via sequential IDs."""
    base = url.rstrip("/")
    idor_paths = [
        "/api/users/{id}", "/api/user/{id}", "/api/v1/users/{id}",
        "/api/orders/{id}", "/api/order/{id}",
        "/api/invoices/{id}", "/api/invoice/{id}",
        "/api/documents/{id}", "/api/files/{id}",
        "/user/{id}", "/profile/{id}",
        "/api/messages/{id}", "/api/notifications/{id}",
    ]

    for path_template in idor_paths:
        responses = {}
        spa_detected = False
        is_json_api = False
        for test_id in [1, 2, 3, 100, 999]:
            path = path_template.replace("{id}", str(test_id))
            try:
                resp = session.get(f"{base}{path}", timeout=5)
                if resp.status_code == 200:
                    if _is_spa_response(resp, result):
                        spa_detected = True
                        break
                    ct = resp.headers.get("Content-Type", "").lower()
                    if "json" in ct:
                        is_json_api = True
                    responses[test_id] = resp.text
            except requests.RequestException:
                pass

        if spa_detected:
            continue

        if len(responses) >= 2:
            # Verify responses differ (different users/objects)
            unique_responses = set(responses.values())
            # HTML pages on dynamic sites always differ — not IDOR
            if len(unique_responses) >= 2 and is_json_api:
                ids_found = list(responses.keys())
                result.add(Finding("IDOR",
                    f"Sequential IDs accessible: {path_template}", Severity.MEDIUM,
                    f"API {path_template} returns data for IDs: {ids_found}. "
                    "If authorization is not checked — IDOR.",
                    "Check authorization when accessing objects by ID. Use UUIDs.",
                    confidence=Confidence.TENTATIVE))
                return




def check_csp_bypass(session: requests.Session, url: str, result: ScanResult):
    """CSP bypass vector analysis."""
    try:
        resp = session.get(url, timeout=10)
    except requests.RequestException:
        return

    csp = resp.headers.get("Content-Security-Policy", "")
    if not csp:
        return

    bypasses = []

    # JSONP endpoints in whitelisted domains
    jsonp_domains = [
        "accounts.google.com", "www.google.com", "apis.google.com",
        "www.googleapis.com", "ajax.googleapis.com",
        "cdnjs.cloudflare.com", "cdn.jsdelivr.net",
        "code.jquery.com", "unpkg.com",
        "*.facebook.com", "connect.facebook.net",
    ]
    # Split CSP into tokens for exact domain matching (strip semicolons, quotes)
    csp_tokens = [t.rstrip(";").strip("'\"") for t in csp.split()]
    for domain in jsonp_domains:
        clean_domain = domain.replace("*.", "")
        # Check exact token match or wildcard subdomain match in CSP tokens
        if any(token == clean_domain or token.endswith("." + clean_domain)
               or token == "*." + clean_domain for token in csp_tokens):
            bypasses.append(f"{domain} (JSONP/callback available)")

    # base-uri missing
    if "base-uri" not in csp:
        bypasses.append("missing base-uri (base tag injection)")

    # object-src not 'none'
    if "object-src" not in csp:
        bypasses.append("missing object-src (Flash/PDF XSS)")
    elif "'none'" not in csp.split("object-src")[1].split(";")[0]:
        bypasses.append("object-src not 'none'")

    # script-src with 'strict-dynamic' + nonce/hash
    if "'strict-dynamic'" in csp and "'unsafe-inline'" in csp:
        # strict-dynamic overrides unsafe-inline in supporting browsers
        # but unsafe-inline still works in older ones
        bypasses.append("strict-dynamic + unsafe-inline (legacy browser fallback)")

    # CDN hosting (can upload custom scripts)
    cdn_hosting = ["cdn.jsdelivr.net", "unpkg.com", "raw.githubusercontent.com",
                   "cdnjs.cloudflare.com", "cdn.statically.io"]
    for cdn in cdn_hosting:
        if any(token == cdn or token.endswith("." + cdn) for token in csp_tokens):
            bypasses.append(f"{cdn} (arbitrary JS upload possible)")

    # Angular in whitelist (CSP bypass via ng-csp)
    if any(t == "ajax.googleapis.com" or t.endswith(".ajax.googleapis.com") for t in csp_tokens) or \
       any(t == "cdnjs.cloudflare.com" or t.endswith(".cdnjs.cloudflare.com") for t in csp_tokens):
        if "'unsafe-eval'" not in csp:
            bypasses.append("AngularJS CSP bypass possible via whitelisted CDN")

    if bypasses:
        result.add(Finding("CSP Bypass", f"CSP bypassable ({len(bypasses)} vector(s))",
            Severity.MEDIUM,
            f"Possible CSP bypasses: {'; '.join(bypasses[:5])}.",
            "Use nonce/hash instead of domain whitelist. Add base-uri 'none'.",
            confidence=Confidence.FIRM))




def check_api_auth(session: requests.Session, url: str, result: ScanResult):
    """Unauthenticated API endpoint check."""
    base = url.rstrip("/")
    api_paths = [
        "/api/users", "/api/v1/users", "/api/v2/users",
        "/api/admin", "/api/v1/admin",
        "/api/config", "/api/settings", "/api/v1/config",
        "/api/logs", "/api/audit", "/api/events",
        "/api/database", "/api/backup", "/api/export",
        "/api/tokens", "/api/keys", "/api/secrets",
        "/api/internal", "/api/debug", "/api/system",
        "/api/health", "/api/status", "/api/info",
        "/api/metrics", "/api/stats",
        "/api/upload", "/api/files", "/api/documents",
        "/api/payments", "/api/billing",
        "/api/notifications", "/api/messages",
    ]

    # Paths that should typically require authentication
    sensitive = {"/api/admin", "/api/v1/admin", "/api/config", "/api/settings",
                "/api/v1/config", "/api/logs", "/api/audit", "/api/database",
                "/api/backup", "/api/export", "/api/tokens", "/api/keys",
                "/api/secrets", "/api/internal", "/api/debug", "/api/system"}

    # Paths that are commonly public and should not be flagged
    public_paths = {"/api/health", "/api/status", "/api/info",
                    "/api/metrics", "/api/stats"}

    # Keywords in JSON response that suggest sensitive data exposure
    sensitive_data_keywords = [b'"password"', b'"token"', b'"secret"', b'"api_key"',
                               b'"email"', b'"user"', b'"admin"', b'"role"',
                               b'"session"', b'"credit_card"', b'"ssn"',
                               b'"private"', b'"credential"']

    for path in api_paths:
        if path in public_paths:
            continue
        try:
            resp = session.get(f"{base}{path}", timeout=5, stream=True)
            if resp.status_code == 200:
                chunk = resp.raw.read(2048, decode_content=True)
                resp.close()
                if len(chunk) > 50 and chunk.lstrip()[:1] in (b'{', b'['):
                    chunk_lower = chunk.lower()
                    has_sensitive_data = any(kw in chunk_lower for kw in sensitive_data_keywords)
                    is_sensitive_path = path in sensitive
                    # Only report if the path should require auth OR response contains sensitive data
                    if is_sensitive_path or has_sensitive_data:
                        sev = Severity.HIGH if is_sensitive_path else Severity.MEDIUM
                        result.add(Finding("Unauthenticated API",
                            f"{path} accessible", sev,
                            f"API endpoint {path} returns JSON without authentication.",
                            f"Add authentication for {path}.",
                            confidence=Confidence.FIRM if is_sensitive_path else Confidence.TENTATIVE))
            else:
                resp.close()
        except requests.RequestException:
            pass




def check_source_leak(session: requests.Session, url: str, result: ScanResult):
    """Source code leak via common paths."""
    base = url.rstrip("/")
    source_paths = [
        ("/source", "Source code"),
        ("/src", "Source directory"),
        ("/.svn/wc.db", "SVN database"),
        ("/.hg/store/data", "Mercurial data"),
        ("/CVS/Root", "CVS Root"),
        ("/CVS/Entries", "CVS Entries"),
        ("/WEB-INF/web.xml", "Java web.xml"),
        ("/WEB-INF/classes/", "Java classes"),
        ("/META-INF/MANIFEST.MF", "Java manifest"),
        ("/_wpeprivate/config.json", "WP Engine config"),
        ("/app/config/parameters.yml", "Symfony parameters"),
        ("/config/database.yml", "Rails DB config"),
        ("/conf/server.xml", "Tomcat server.xml"),
        ("/application.properties", "Spring properties"),
        ("/application.yml", "Spring YAML config"),
    ]

    # Keywords that indicate actual secrets in the content
    secret_keywords = ["password", "secret", "token", "api_key", "apikey",
                       "private_key", "aws_access", "credentials"]
    # Keywords that indicate source code / config structure (not secrets themselves)
    code_keywords = ["<?php", "<?xml", "import ", "from ", "class ",
                     "database", "server", "host", "port", "driver", "key="]

    for path, desc in source_paths:
        try:
            resp = session.get(f"{base}{path}", timeout=5, allow_redirects=False)
            if resp.status_code == 200 and len(resp.content) > 20:
                if _is_spa_response(resp, result):
                    continue
                body_lower = resp.text[:2000].lower()
                has_secrets = any(kw in body_lower for kw in secret_keywords)
                has_code = any(kw in body_lower for kw in code_keywords)
                if has_secrets or has_code:
                    # CRITICAL only if actual secrets found, HIGH for code/config exposure
                    sev = Severity.CRITICAL if has_secrets else Severity.HIGH
                    detail_suffix = ("Contains sensitive credentials."
                                     if has_secrets else "Source/config file exposed.")
                    result.add(Finding("Source Code Leak", f"Leak: {path} ({desc})",
                        sev,
                        f"{desc} publicly accessible ({len(resp.content)} bytes). "
                        f"{detail_suffix}",
                        f"Immediately restrict access to {path}.",
                        confidence=Confidence.FIRM))
        except requests.RequestException:
            pass




def check_ip_spoofing(session: requests.Session, url: str, result: ScanResult):
    """X-Forwarded-For trust check (IP restriction bypass)."""
    try:
        resp1 = session.get(url, timeout=8)

        resp2 = session.get(url, timeout=8, headers={
            "X-Forwarded-For": "127.0.0.1",
            "X-Real-IP": "127.0.0.1",
            "X-Client-IP": "127.0.0.1",
            "CF-Connecting-IP": "127.0.0.1",
            "True-Client-IP": "127.0.0.1",
        })

        # Compare status codes and content lengths between normal and spoofed requests.
        # A status code change (e.g. 403->200) is a strong signal the server trusts headers.
        # A significant content length difference with same status also indicates trust.
        status_changed = resp1.status_code != resp2.status_code
        size_diff = abs(len(resp1.text) - len(resp2.text))
        min_size = max(len(resp1.text), 1)
        significant_size_diff = size_diff / min_size > 0.25

        if status_changed:
            # Status code change is a strong indicator (e.g. 403->200 bypass)
            result.add(Finding("IP Spoofing",
                "Server trusts X-Forwarded-For", Severity.MEDIUM,
                f"Status code changed from {resp1.status_code} to {resp2.status_code} "
                "when spoofing IP via X-Forwarded-For/X-Real-IP headers.",
                "Do not trust client IP headers. Use IP from the last trusted proxy.",
                confidence=Confidence.FIRM))
        elif significant_size_diff:
            # Same status but very different content length
            result.add(Finding("IP Spoofing",
                "Server trusts X-Forwarded-For", Severity.MEDIUM,
                f"Response size changed significantly ({len(resp1.text)} -> {len(resp2.text)} bytes) "
                "when spoofing IP via X-Forwarded-For/X-Real-IP headers.",
                "Do not trust client IP headers. Use IP from the last trusted proxy.",
                confidence=Confidence.TENTATIVE))
    except requests.RequestException:
        pass




def check_certificate_transparency(session: requests.Session, url: str, result: ScanResult):
    """Query crt.sh for certificate transparency logs to discover subdomains."""
    parsed = urllib.parse.urlparse(url)
    domain = parsed.hostname
    if not domain:
        return
    try:
        resp = session.get(f"https://crt.sh/?q=%.{domain}&output=json", timeout=10)
        if resp.status_code != 200:
            return
        entries = resp.json()
        subdomains = set()
        for entry in entries:
            name_value = entry.get("name_value", "")
            for name in name_value.split("\n"):
                name = name.strip().lstrip("*.")
                if name and name.endswith(domain):
                    subdomains.add(name)
        if subdomains:
            count = len(subdomains)
            evidence = ", ".join(sorted(subdomains)[:20])
            if count > 20:
                evidence += f" ... and {count - 20} more"
            result.add(Finding("Certificate Transparency",
                f"{count} subdomains found via CT logs", Severity.INFO,
                f"Certificate Transparency logs reveal {count} subdomains for {domain}.",
                evidence=evidence,
                confidence=Confidence.CONFIRMED))
            if count > 50:
                result.add(Finding("Certificate Transparency",
                    "Large attack surface", Severity.LOW,
                    f"Over 50 subdomains ({count}) found — large attack surface.",
                    "Audit all subdomains and decommission unused ones.",
                    confidence=Confidence.FIRM))
    except (requests.RequestException, json.JSONDecodeError, KeyError):
        pass


def check_cors_preflight(session: requests.Session, url: str, result: ScanResult):
    """Check CORS preflight with credentials — specifically OPTIONS + Allow-Credentials."""
    try:
        resp = session.request("OPTIONS", url, headers={
            "Origin": "https://evil.com",
            "Access-Control-Request-Method": "POST",
            "Access-Control-Request-Headers": "Content-Type",
        }, timeout=10)
        acao = resp.headers.get("Access-Control-Allow-Origin", "")
        acac = resp.headers.get("Access-Control-Allow-Credentials", "").lower()

        if acac == "true":
            if acao == "https://evil.com":
                result.add(Finding("CORS Preflight",
                    "Preflight reflects Origin with credentials", Severity.HIGH,
                    "OPTIONS preflight reflects arbitrary Origin with Allow-Credentials: true. "
                    "Attacker can make credentialed cross-origin requests.",
                    "Whitelist allowed origins explicitly. Never reflect arbitrary origins with credentials.",
                    evidence=f"Origin: https://evil.com -> ACAO: {acao} | ACAC: {acac}",
                    confidence=Confidence.FIRM))
            elif acao == "null":
                result.add(Finding("CORS Preflight",
                    "Preflight allows null Origin with credentials", Severity.MEDIUM,
                    "OPTIONS preflight allows null Origin with Allow-Credentials: true. "
                    "Bypassable via sandboxed iframe.",
                    "Do not allow null origin with credentials.",
                    evidence=f"Origin: null allowed | ACAC: {acac}",
                    confidence=Confidence.FIRM))
    except requests.RequestException:
        pass


def check_cookie_flags(session: requests.Session, url: str, result: ScanResult):
    """Check Set-Cookie flags: Secure, HttpOnly, SameSite."""
    try:
        resp = session.get(url, timeout=10)
    except requests.RequestException:
        return

    set_cookies = resp.headers.get("Set-Cookie", "")
    if not set_cookies:
        return

    # Try to get all Set-Cookie headers via raw headers
    cookie_strings = []
    try:
        raw_hdrs = resp.raw.headers
        if hasattr(raw_hdrs, "getlist"):
            cookie_strings = raw_hdrs.getlist("Set-Cookie")
        else:
            cookie_strings = [set_cookies]
    except Exception:
        cookie_strings = [set_cookies]

    session_names = {"sessionid", "phpsessid", "jsessionid", "token", "session",
                     "sid", "csrf", "csrftoken", "auth", "jwt", "access_token"}

    for cookie_str in cookie_strings:
        parts = cookie_str.split(";")
        if not parts:
            continue
        name_val = parts[0].strip()
        name = name_val.split("=")[0].strip().lower() if "=" in name_val else ""
        flags_lower = cookie_str.lower()
        is_session = any(sn in name for sn in session_names)

        if is_session and "secure" not in flags_lower:
            result.add(Finding("Cookie Flags",
                f"Cookie '{name}' missing Secure flag", Severity.MEDIUM,
                f"Session cookie '{name}' can be sent over unencrypted HTTP.",
                "Add Secure flag to all session cookies.",
                evidence=cookie_str[:200],
                confidence=Confidence.CONFIRMED))

        if is_session and "httponly" not in flags_lower:
            result.add(Finding("Cookie Flags",
                f"Cookie '{name}' missing HttpOnly flag", Severity.MEDIUM,
                f"Session cookie '{name}' accessible via JavaScript (XSS risk).",
                "Add HttpOnly flag to session cookies.",
                evidence=cookie_str[:200],
                confidence=Confidence.CONFIRMED))

        if "samesite" not in flags_lower:
            sev = Severity.LOW
            result.add(Finding("Cookie Flags",
                f"Cookie '{name}' missing SameSite attribute", sev,
                f"Cookie '{name}' has no SameSite attribute (CSRF risk).",
                "Add SameSite=Lax or SameSite=Strict.",
                evidence=cookie_str[:200],
                confidence=Confidence.CONFIRMED))


def check_mixed_content(session: requests.Session, url: str, result: ScanResult):
    """Check for mixed content (HTTP resources on HTTPS page) or plain HTTP site."""
    parsed = urllib.parse.urlparse(url)
    if parsed.scheme != "https":
        # Site serves over plain HTTP — flag it
        result.add(Finding("Mixed Content",
            "Site served over plain HTTP", Severity.HIGH,
            "The entire site is served over unencrypted HTTP. "
            "All traffic (including credentials and personal data) can be intercepted.",
            "Enable HTTPS with a valid TLS certificate (e.g. Let's Encrypt). "
            "Redirect all HTTP traffic to HTTPS.",
            confidence=Confidence.CONFIRMED))
        return
    try:
        resp = session.get(url, timeout=15)
    except requests.RequestException:
        return

    body = resp.text
    # Scripts loaded over HTTP — most dangerous
    for match in re.finditer(r'<script[^>]+src=["\']http://([^"\']+)["\']', body, re.I):
        result.add(Finding("Mixed Content",
            "Script loaded over HTTP", Severity.MEDIUM,
            f"JavaScript loaded over insecure HTTP on HTTPS page.",
            "Load all scripts over HTTPS.",
            evidence=f"http://{match.group(1)}",
            confidence=Confidence.CONFIRMED))
    # Stylesheets over HTTP
    for match in re.finditer(r'<link[^>]+href=["\']http://([^"\']+)["\'][^>]*rel=["\']stylesheet["\']', body, re.I):
        result.add(Finding("Mixed Content",
            "Stylesheet loaded over HTTP", Severity.LOW,
            f"CSS loaded over insecure HTTP on HTTPS page.",
            "Load all stylesheets over HTTPS.",
            evidence=f"http://{match.group(1)}",
                confidence=Confidence.CONFIRMED))
    for match in re.finditer(r'<link[^>]+rel=["\']stylesheet["\'][^>]*href=["\']http://([^"\']+)["\']', body, re.I):
        result.add(Finding("Mixed Content",
            "Stylesheet loaded over HTTP", Severity.LOW,
            f"CSS loaded over insecure HTTP on HTTPS page.",
            "Load all stylesheets over HTTPS.",
            evidence=f"http://{match.group(1)}",
                confidence=Confidence.CONFIRMED))
    # Iframes over HTTP
    for match in re.finditer(r'<iframe[^>]+src=["\']http://([^"\']+)["\']', body, re.I):
        result.add(Finding("Mixed Content",
            "Iframe loaded over HTTP", Severity.MEDIUM,
            f"Iframe loaded over insecure HTTP on HTTPS page.",
            "Load all iframes over HTTPS.",
            evidence=f"http://{match.group(1)}",
                confidence=Confidence.CONFIRMED))


def check_dangling_markup(session: requests.Session, url: str, result: ScanResult):
    """Check for dangling markup injection vectors."""
    try:
        resp = session.get(url, timeout=10)
    except requests.RequestException:
        return

    body = resp.text
    parsed = urllib.parse.urlparse(url)
    own_domain = parsed.hostname or ""

    # Check for <base href> pointing to external domain
    base_tags = re.findall(r'<base[^>]+href=["\']([^"\']+)["\']', body, re.I)
    for base_href in base_tags:
        base_parsed = urllib.parse.urlparse(base_href)
        if base_parsed.hostname and base_parsed.hostname != own_domain:
            result.add(Finding("Dangling Markup",
                "External <base> tag detected", Severity.MEDIUM,
                f"<base href> points to external domain '{base_parsed.hostname}'. "
                "All relative URLs on the page will resolve to the external domain.",
                "Remove external <base> tags or ensure they point to your domain.",
                evidence=f"<base href=\"{base_href}\">",
                confidence=Confidence.TENTATIVE))

    # Check for forms posting to external HTTP
    form_actions = re.findall(r'<form[^>]+action=["\']([^"\']+)["\']', body, re.I)
    for action in form_actions:
        action_parsed = urllib.parse.urlparse(action)
        if action_parsed.scheme == "http" and action_parsed.hostname and action_parsed.hostname != own_domain:
            result.add(Finding("Dangling Markup",
                "Form posts to external HTTP", Severity.MEDIUM,
                f"Form action points to external HTTP URL.",
                "Use HTTPS for form actions and avoid posting to external domains.",
                evidence=f"action=\"{action[:200]}\"",
                confidence=Confidence.TENTATIVE))


def check_bucket_bruteforce(session: requests.Session, url: str, result: ScanResult):
    """Bruteforce common cloud storage bucket names derived from domain.

    Only reports buckets with OPEN listing AND content that references the
    target domain, to avoid false positives from unrelated public buckets
    that happen to share a common name prefix (e.g. 'scan-dev' for scan.qpus.su).
    """
    parsed = urllib.parse.urlparse(url)
    domain = parsed.hostname
    if not domain:
        return
    # Extract base name (e.g. "example" from "www.example.com")
    parts = domain.replace("www.", "").split(".")
    base_name = parts[0] if parts else domain

    # Skip very short or too-generic names — they produce only false positives
    GENERIC_NAMES = {
        "www", "app", "api", "web", "cdn", "dev", "test", "data", "scan",
        "mail", "ftp", "docs", "blog", "shop", "admin", "info", "my",
        "static", "media", "files", "assets", "img", "images", "video",
        "public", "private", "backup", "staging", "prod", "demo", "beta",
        "portal", "home", "site", "page", "host", "server", "cloud",
        "store", "storage", "download", "uploads", "content", "ns",
    }
    if len(base_name) < 4 or base_name.lower() in GENERIC_NAMES:
        return

    # Use full domain parts for more specific names
    # e.g. "mycompany" from mycompany.com, "mycompany-app" from app.mycompany.com
    domain_clean = domain.replace("www.", "")
    domain_no_tld = ".".join(domain_clean.split(".")[:-1]) if "." in domain_clean else domain_clean

    suffixes = ["", "-assets", "-backup", "-dev", "-staging",
                "-uploads", "-static", "-public", "-data"]

    open_buckets = []

    for suffix in suffixes:
        name = base_name + suffix
        # Check S3
        try:
            r = session.get(f"https://{name}.s3.amazonaws.com/", timeout=5)
            if "<ListBucketResult" in r.text:
                # Verify: does bucket content reference the target domain?
                related = _bucket_looks_related(r.text, domain_no_tld, base_name)
                sev = Severity.HIGH if related else Severity.LOW
                desc_extra = "" if related else " Bucket may not belong to this site."
                open_buckets.append(name)
                result.add(Finding("Cloud Bucket",
                    f"S3 bucket '{name}' listing open", sev,
                    f"AWS S3 bucket '{name}' allows public listing of files.{desc_extra}",
                    "Restrict bucket ACL and disable public listing.",
                    evidence=f"https://{name}.s3.amazonaws.com/",
                    confidence=Confidence.FIRM if related else Confidence.TENTATIVE))
        except requests.RequestException:
            pass

        # Check GCS
        try:
            r = session.get(f"https://storage.googleapis.com/{name}/", timeout=5)
            if r.status_code == 200 and ("<ListBucketResult" in r.text or "Contents" in r.text):
                related = _bucket_looks_related(r.text, domain_no_tld, base_name)
                sev = Severity.HIGH if related else Severity.LOW
                desc_extra = "" if related else " Bucket may not belong to this site."
                open_buckets.append(name)
                result.add(Finding("Cloud Bucket",
                    f"GCS bucket '{name}' listing open", sev,
                    f"Google Cloud Storage bucket '{name}' allows public listing.{desc_extra}",
                    "Restrict bucket permissions.",
                    evidence=f"https://storage.googleapis.com/{name}/",
                    confidence=Confidence.FIRM if related else Confidence.TENTATIVE))
        except requests.RequestException:
            pass

    if not open_buckets:
        result.add(Finding("Cloud Bucket",
            "No open cloud buckets found", Severity.INFO,
            f"Checked S3/GCS buckets with prefix '{base_name}' — none allow public listing.",
                confidence=Confidence.CONFIRMED))


def _bucket_looks_related(listing_xml: str, domain_no_tld: str, base_name: str) -> bool:
    """Check if bucket listing XML contains references to the target domain.

    Returns True if file keys in the listing mention the domain or base name
    in a meaningful way, suggesting the bucket belongs to the scanned site.
    """
    text_lower = listing_xml.lower()
    # Look for domain references in <Key> tags (file paths inside bucket)
    domain_parts = domain_no_tld.lower().split(".")
    for part in domain_parts:
        if len(part) >= 4 and part in text_lower:
            return True
    return False


def check_favicon_fingerprint(session: requests.Session, url: str, result: ScanResult):
    """Fingerprint technology by favicon hash."""
    import hashlib
    KNOWN_FAVICONS = {
        "4644f2d45601037b8423d45e13194c93": "Apache Default",
        "d41d8cd98f00b204e9800998ecf8427e": "Empty file",
        "2cc72f9c1bc4944e3fa060ac313aee07": "Spring Boot",
        "a3559e23c795d5e38e9e4556b083e893": "WordPress",
        "71e30c507ca3fa005e2d1322a5aa8fb4": "Drupal",
        "3ca02c3514e68b94f33cdb3d2b488019": "Tomcat",
        "f276b19aabcb4ae8cda4d22625c6735f": "Django",
        "56f31a8e19d3697bfb5e7e2a9a2b6c31": "Laravel",
        "d4a1bd6b63ab61cf83a72b4233c5a57b": "Nginx Default",
        "a5b1f2a9fbc8b01e7abcdef012345678": "IIS Default",
    }
    base = url.rstrip("/")
    try:
        resp = session.get(f"{base}/favicon.ico", timeout=8)
        if resp.status_code != 200 or len(resp.content) < 10:
            return
        md5_hash = hashlib.md5(resp.content).hexdigest()
        if md5_hash in KNOWN_FAVICONS:
            tech = KNOWN_FAVICONS[md5_hash]
            result.add(Finding("Favicon Fingerprint",
                f"Favicon matches {tech}", Severity.INFO,
                f"The favicon hash matches known {tech} default favicon.",
                "Use a custom favicon to avoid technology fingerprinting.",
                evidence=f"MD5: {md5_hash}",
                    confidence=Confidence.TENTATIVE))
    except requests.RequestException:
        pass


def check_dns_caa(url: str, result: ScanResult):
    """Check DNS CAA records."""
    if not HAS_DNS:
        return
    parsed = urllib.parse.urlparse(url)
    domain = parsed.hostname
    if not domain:
        return
    try:
        answers = dns.resolver.resolve(domain, "CAA")
        issuers = []
        for rdata in answers:
            issuers.append(str(rdata))
        if issuers:
            result.add(Finding("DNS CAA",
                f"CAA record restricts certificate issuance", Severity.INFO,
                f"CAA records found: {', '.join(issuers[:5])}.",
                evidence="; ".join(issuers[:10]),
                    confidence=Confidence.CONFIRMED))
    except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers):
        result.add(Finding("DNS CAA",
            "No CAA record", Severity.LOW,
            "No CAA DNS record found. Any CA can issue certificates for this domain.",
            "Add a CAA record to restrict which CAs can issue certificates.",
            confidence=Confidence.CONFIRMED))
    except dns.exception.Timeout:
        # DNS timeout — still report as unknown
        result.add(Finding("DNS CAA",
            "CAA record check timed out", Severity.INFO,
            "DNS query for CAA records timed out. Unable to verify.",
            "Ensure DNS is properly configured and responds to CAA queries.",
                confidence=Confidence.CONFIRMED))
    except Exception:
        pass


def check_http3(session: requests.Session, url: str, result: ScanResult):
    """Check if HTTP/3 is supported via Alt-Svc header."""
    try:
        resp = session.get(url, timeout=10)
        alt_svc = resp.headers.get("Alt-Svc", "")
        if "h3" in alt_svc:
            result.add(Finding("HTTP/3",
                "HTTP/3 supported", Severity.INFO,
                "Server advertises HTTP/3 support via Alt-Svc header.",
                evidence=f"Alt-Svc: {alt_svc[:200]}",
                    confidence=Confidence.CONFIRMED))
    except requests.RequestException:
        pass


def check_deprecated_tls(url: str, result: ScanResult):
    """Check if server accepts deprecated TLS 1.0 or TLS 1.1."""
    parsed = urllib.parse.urlparse(url)
    host = parsed.hostname
    port = parsed.port or 443
    if not host:
        return

    for version_name, max_ver in [("TLS 1.0", ssl.TLSVersion.TLSv1),
                                   ("TLS 1.1", ssl.TLSVersion.TLSv1_1)]:
        try:
            ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
            ctx.check_hostname = False
            ctx.verify_mode = ssl.CERT_NONE
            ctx.maximum_version = max_ver
            ctx.minimum_version = max_ver
            with socket.create_connection((host, port), timeout=5) as sock:
                with ctx.wrap_socket(sock, server_hostname=host) as ssock:
                    result.add(Finding("Deprecated TLS",
                        f"{version_name} accepted", Severity.MEDIUM,
                        f"Server accepts {version_name}, which is deprecated and insecure.",
                        f"Disable {version_name} on the server. Only allow TLS 1.2+.",
                        evidence=f"Negotiated: {ssock.version()}",
                            confidence=Confidence.CONFIRMED))
        except (ssl.SSLError, socket.error, OSError, AttributeError):
            pass


def check_hsts_preload(session: requests.Session, url: str, result: ScanResult):
    """Check HSTS preload eligibility and status."""
    parsed = urllib.parse.urlparse(url)
    domain = parsed.hostname
    if not domain:
        return
    try:
        resp = session.get(url, timeout=10)
        hsts = resp.headers.get("Strict-Transport-Security", "")
        if not hsts:
            return  # No HSTS at all — already covered by check_headers

        has_include_sub = "includesubdomains" in hsts.lower()
        has_preload = "preload" in hsts.lower()
        max_age_match = re.search(r"max-age=(\d+)", hsts)
        max_age = int(max_age_match.group(1)) if max_age_match else 0

        # Check hstspreload.org API
        try:
            preload_resp = session.get(
                f"https://hstspreload.org/api/v2/status?domain={domain}", timeout=8)
            if preload_resp.status_code == 200:
                data = preload_resp.json()
                status = data.get("status", "unknown")
                if status == "preloaded":
                    result.add(Finding("HSTS Preload",
                        "Domain in HSTS preload list", Severity.INFO,
                        f"Domain '{domain}' is in the HSTS preload list.",
                        evidence=f"Status: {status}",
                            confidence=Confidence.CONFIRMED))
                    return
        except (requests.RequestException, json.JSONDecodeError):
            pass

        if has_include_sub and has_preload and max_age >= 31536000:
            result.add(Finding("HSTS Preload",
                "Eligible for HSTS preload but not submitted", Severity.LOW,
                f"HSTS header is eligible for preload but domain is not in the preload list.",
                "Submit domain to hstspreload.org.",
                evidence=f"HSTS: {hsts[:200]}",
                    confidence=Confidence.CONFIRMED))
        else:
            missing = []
            if not has_include_sub:
                missing.append("includeSubDomains")
            if not has_preload:
                missing.append("preload")
            if max_age < 31536000:
                missing.append(f"max-age >= 31536000 (current: {max_age})")
            result.add(Finding("HSTS Preload",
                "Not eligible for HSTS preload", Severity.INFO,
                f"HSTS header missing requirements for preload: {', '.join(missing)}.",
                evidence=f"HSTS: {hsts[:200]}",
                    confidence=Confidence.CONFIRMED))
    except requests.RequestException:
        pass


def check_sri(session: requests.Session, url: str, result: ScanResult):
    """Check external resources for Subresource Integrity."""
    try:
        resp = session.get(url, timeout=15)
    except requests.RequestException:
        return

    body = resp.text
    parsed = urllib.parse.urlparse(url)
    own_domain = parsed.hostname or ""

    # Find external scripts without integrity
    scripts_no_sri = []
    for match in re.finditer(
            r'<script[^>]+src=["\']([^"\']+)["\'][^>]*>', body, re.I):
        tag = match.group(0)
        src = match.group(1)
        src_parsed = urllib.parse.urlparse(src)
        if src_parsed.hostname and src_parsed.hostname != own_domain:
            if "integrity=" not in tag.lower():
                scripts_no_sri.append(src_parsed.hostname)

    # Find external stylesheets without integrity
    styles_no_sri = []
    for match in re.finditer(
            r'<link[^>]+href=["\']([^"\']+)["\'][^>]*>', body, re.I):
        tag = match.group(0)
        href = match.group(1)
        if 'rel="stylesheet"' not in tag.lower() and "rel='stylesheet'" not in tag.lower():
            continue
        href_parsed = urllib.parse.urlparse(href)
        if href_parsed.hostname and href_parsed.hostname != own_domain:
            if "integrity=" not in tag.lower():
                styles_no_sri.append(href_parsed.hostname)

    total = len(scripts_no_sri) + len(styles_no_sri)
    if total > 0:
        domains = sorted(set(scripts_no_sri + styles_no_sri))
        result.add(Finding("Subresource Integrity",
            f"{total} external resources without SRI", Severity.LOW,
            f"{total} external scripts/styles lack integrity attribute. "
            "A compromised CDN could inject malicious code.",
            "Add integrity and crossorigin attributes to external resources.",
            evidence=f"Domains: {', '.join(domains[:10])}",
            confidence=Confidence.FIRM))


def check_meta_refresh(session: requests.Session, url: str, result: ScanResult):
    """Check for meta refresh redirects."""
    try:
        resp = session.get(url, timeout=10)
    except requests.RequestException:
        return

    body = resp.text
    parsed = urllib.parse.urlparse(url)
    own_domain = parsed.hostname or ""

    for match in re.finditer(
            r'<meta[^>]+http-equiv=["\']refresh["\'][^>]+content=["\']([^"\']+)["\']', body, re.I):
        content = match.group(1)
        url_match = re.search(r'url=(.+)', content, re.I)
        if url_match:
            redirect_url = url_match.group(1).strip().rstrip("'\"")
            redirect_parsed = urllib.parse.urlparse(redirect_url)

            if redirect_parsed.scheme == "http" and parsed.scheme == "https":
                result.add(Finding("Meta Refresh",
                    "Meta refresh downgrades to HTTP", Severity.MEDIUM,
                    "Meta refresh tag redirects from HTTPS to HTTP.",
                    "Use HTTPS for meta refresh redirect URLs.",
                    evidence=f"<meta refresh> -> {redirect_url[:200]}",
                        confidence=Confidence.CONFIRMED))
            elif redirect_parsed.hostname and redirect_parsed.hostname != own_domain:
                result.add(Finding("Meta Refresh",
                    "Meta refresh redirect to external site", Severity.LOW,
                    f"Meta refresh redirects to external domain '{redirect_parsed.hostname}'.",
                    "Verify external redirect is intentional.",
                    evidence=f"<meta refresh> -> {redirect_url[:200]}",
                        confidence=Confidence.CONFIRMED))

    # Also match content="...;url=..." variant with content before http-equiv
    for match in re.finditer(
            r'<meta[^>]+content=["\']([^"\']+)["\'][^>]+http-equiv=["\']refresh["\']', body, re.I):
        content = match.group(1)
        url_match = re.search(r'url=(.+)', content, re.I)
        if url_match:
            redirect_url = url_match.group(1).strip().rstrip("'\"")
            redirect_parsed = urllib.parse.urlparse(redirect_url)
            if redirect_parsed.scheme == "http" and parsed.scheme == "https":
                result.add(Finding("Meta Refresh",
                    "Meta refresh downgrades to HTTP", Severity.MEDIUM,
                    "Meta refresh tag redirects from HTTPS to HTTP.",
                    "Use HTTPS for meta refresh redirect URLs.",
                    evidence=f"<meta refresh> -> {redirect_url[:200]}",
                        confidence=Confidence.CONFIRMED))
            elif redirect_parsed.hostname and redirect_parsed.hostname != own_domain:
                result.add(Finding("Meta Refresh",
                    "Meta refresh redirect to external site", Severity.LOW,
                    f"Meta refresh redirects to external domain '{redirect_parsed.hostname}'.",
                    "Verify external redirect is intentional.",
                    evidence=f"<meta refresh> -> {redirect_url[:200]}",
                        confidence=Confidence.CONFIRMED))


def check_sitemap(session: requests.Session, url: str, result: ScanResult):
    """Check sitemap.xml for interesting paths."""
    base = url.rstrip("/")
    interesting_keywords = ["admin", "api", "internal", "staging", "test", "debug",
                            "backup", "dev", "private", "secret"]

    for path in ["/sitemap.xml", "/sitemap_index.xml"]:
        try:
            resp = session.get(f"{base}{path}", timeout=8)
            if resp.status_code != 200 or "<?xml" not in resp.text[:100]:
                continue
            if _is_spa_response(resp, result):
                continue

            urls_found = re.findall(r'<loc>([^<]+)</loc>', resp.text)
            count = len(urls_found)
            if count == 0:
                continue

            interesting = []
            for u in urls_found:
                u_lower = u.lower()
                for kw in interesting_keywords:
                    if f"/{kw}" in u_lower:
                        interesting.append(u)
                        break

            if interesting:
                result.add(Finding("Sitemap",
                    f"Sitemap with interesting paths", Severity.LOW,
                    f"Sitemap at {path} contains {count} URLs including potentially sensitive paths.",
                    "Review sitemap entries and remove internal/staging URLs.",
                    evidence=", ".join(interesting[:5]),
                        confidence=Confidence.CONFIRMED))
            else:
                result.add(Finding("Sitemap",
                    f"Sitemap found with {count} URLs", Severity.INFO,
                    f"Sitemap found at {path} with {count} URLs.",
                    evidence=path,
                        confidence=Confidence.CONFIRMED))
            return
        except requests.RequestException:
            pass


def check_deprecated_html(session: requests.Session, url: str, result: ScanResult):
    """Check for deprecated/dangerous HTML elements."""
    try:
        resp = session.get(url, timeout=10)
    except requests.RequestException:
        return

    body = resp.text
    deprecated = []

    if re.search(r'<frame[\s>]', body, re.I):
        deprecated.append("<frame>")
    if re.search(r'<frameset[\s>]', body, re.I):
        deprecated.append("<frameset>")
    if re.search(r'<applet[\s>]', body, re.I):
        deprecated.append("<applet>")
    if re.search(r'<object[^>]+type=["\']application/x-shockwave-flash["\']', body, re.I):
        deprecated.append("<object type=flash>")
    if re.search(r'<embed[^>]+type=["\']application/x-shockwave-flash["\']', body, re.I):
        deprecated.append("<embed type=flash>")

    if deprecated:
        result.add(Finding("Deprecated HTML",
            f"Deprecated HTML elements: {', '.join(deprecated)}", Severity.LOW,
            f"Page uses deprecated HTML elements that may pose security risks.",
            "Remove deprecated elements. Replace frames with modern alternatives.",
            evidence=", ".join(deprecated),
                confidence=Confidence.CONFIRMED))


def check_third_party_scripts(session: requests.Session, url: str, result: ScanResult):
    """Analyze third-party script domains."""
    try:
        resp = session.get(url, timeout=15)
    except requests.RequestException:
        return

    body = resp.text
    parsed = urllib.parse.urlparse(url)
    own_domain = parsed.hostname or ""

    known_domains = {
        "cdn.jsdelivr.net", "cdnjs.cloudflare.com", "unpkg.com",
        "ajax.googleapis.com", "fonts.googleapis.com", "fonts.gstatic.com",
        "www.google-analytics.com", "www.googletagmanager.com",
        "connect.facebook.net", "platform.twitter.com",
        "cdn.jquery.com", "code.jquery.com", "stackpath.bootstrapcdn.com",
        "maxcdn.bootstrapcdn.com", "use.fontawesome.com",
    }

    external_domains = defaultdict(int)
    for match in re.finditer(r'<script[^>]+src=["\']([^"\']+)["\']', body, re.I):
        src = match.group(1)
        src_parsed = urllib.parse.urlparse(src)
        if src_parsed.hostname and src_parsed.hostname != own_domain:
            external_domains[src_parsed.hostname] += 1

    if not external_domains:
        return

    count = len(external_domains)
    domain_list = sorted(external_domains.keys())[:15]
    unknown = [d for d in external_domains if d not in known_domains]

    result.add(Finding("Third-Party Scripts",
        f"{count} third-party script domains", Severity.INFO,
        f"Page loads scripts from {count} external domains.",
        evidence=", ".join(domain_list),
            confidence=Confidence.CONFIRMED))

    if count > 10:
        result.add(Finding("Third-Party Scripts",
            f"Excessive third-party scripts ({count} domains)", Severity.LOW,
            f"Page loads scripts from {count} external domains, increasing attack surface.",
            "Minimize third-party dependencies. Self-host critical scripts.",
            evidence=", ".join(domain_list),
                confidence=Confidence.CONFIRMED))

    if unknown:
        unknown_list = sorted(unknown)[:10]
        result.add(Finding("Third-Party Scripts",
            f"{len(unknown)} non-standard script domains", Severity.INFO,
            f"Scripts loaded from non-standard/unknown domains.",
            evidence=", ".join(unknown_list),
                confidence=Confidence.CONFIRMED))


def check_domain_expiry(session: requests.Session, url: str, result: ScanResult):
    """Check domain expiration via RDAP."""
    parsed = urllib.parse.urlparse(url)
    domain = parsed.hostname
    if not domain:
        return
    # Strip subdomains — use registrable domain
    parts = domain.split(".")
    if len(parts) > 2:
        domain = ".".join(parts[-2:])

    try:
        resp = session.get(f"https://rdap.org/domain/{domain}", timeout=10)
        if resp.status_code != 200:
            return
        data = resp.json()
        events = data.get("events", [])
        expiry_date = None
        for event in events:
            if event.get("eventAction") == "expiration":
                date_str = event.get("eventDate", "")
                try:
                    expiry_date = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
                except (ValueError, TypeError):
                    pass
                break

        if not expiry_date:
            return

        now = datetime.now(timezone.utc)
        days_left = (expiry_date - now).days

        if days_left < 30:
            result.add(Finding("Domain Expiry",
                f"Domain expires in {days_left} days", Severity.MEDIUM,
                f"Domain '{domain}' expires on {expiry_date.strftime('%Y-%m-%d')}.",
                "Renew domain registration immediately.",
                evidence=f"Expires: {expiry_date.strftime('%Y-%m-%d')}",
                    confidence=Confidence.CONFIRMED))
        elif days_left < 90:
            result.add(Finding("Domain Expiry",
                f"Domain expires in {days_left} days", Severity.LOW,
                f"Domain '{domain}' expires on {expiry_date.strftime('%Y-%m-%d')}.",
                "Renew domain registration soon.",
                evidence=f"Expires: {expiry_date.strftime('%Y-%m-%d')}",
                    confidence=Confidence.CONFIRMED))
        else:
            result.add(Finding("Domain Expiry",
                f"Domain valid until {expiry_date.strftime('%Y-%m-%d')}", Severity.INFO,
                f"Domain '{domain}' is valid for {days_left} more days.",
                evidence=f"Expires: {expiry_date.strftime('%Y-%m-%d')}",
                    confidence=Confidence.CONFIRMED))
    except (requests.RequestException, json.JSONDecodeError, KeyError):
        pass


# ═══════════════════════════════════════════════════════════════════════════
# NEW CHECKS — Batch 2 (15 additional checks)
# ═══════════════════════════════════════════════════════════════════════════


def check_cors_deep(session: requests.Session, url: str, result: ScanResult):
    """Deep CORS misconfiguration check.

    Tests for:
    - null origin reflection (sandbox iframe bypass)
    - Regex bypass (evil.com.target.com, target.com.evil.com)
    - Subdomain wildcard reflection
    - Credentials with reflected origin
    """
    parsed = urllib.parse.urlparse(url)
    domain = parsed.hostname or ""

    test_origins = [
        ("null", "null origin", "Sandbox iframe can bypass CORS"),
        (f"https://evil.{domain}", "subdomain injection",
         f"evil.{domain} reflected — wildcard subdomain CORS"),
        (f"https://{domain}.evil.com", "postfix bypass",
         f"{domain}.evil.com reflected — regex CORS bypass"),
        (f"https://evil-{domain}", "prefix bypass",
         f"evil-{domain} reflected — prefix matching bypass"),
    ]

    for origin_val, test_name, desc in test_origins:
        try:
            resp = session.get(url, headers={"Origin": origin_val}, timeout=8)
            acao = resp.headers.get("Access-Control-Allow-Origin", "")
            acac = resp.headers.get("Access-Control-Allow-Credentials", "").lower()

            if acao == origin_val:
                sev = Severity.HIGH if acac == "true" else Severity.MEDIUM
                result.add(Finding("CORS Misconfiguration",
                    f"CORS {test_name}: origin reflected",
                    sev,
                    f"{desc}. "
                    f"{'Credentials allowed — full account takeover possible.' if acac == 'true' else 'No credentials, but data leakage possible.'}",
                    "Whitelist specific trusted origins. Never reflect arbitrary Origin.",
                    evidence=f"Origin: {origin_val}\nAccess-Control-Allow-Origin: {acao}\nCredentials: {acac}",
                    confidence=Confidence.FIRM))
        except requests.RequestException:
            pass


def check_tls_chain(url: str, result: ScanResult):
    """Verify TLS certificate chain completeness.

    Checks that the server sends all intermediate certificates,
    not just the leaf certificate. Missing intermediates cause
    trust failures on some clients (especially mobile).
    """
    import ssl
    import socket

    parsed = urllib.parse.urlparse(url)
    hostname = parsed.hostname
    port = parsed.port or 443
    if not hostname or parsed.scheme != "https":
        return

    try:
        ctx = ssl.create_default_context()
        with socket.create_connection((hostname, port), timeout=10) as sock:
            with ctx.wrap_socket(sock, server_hostname=hostname) as ssock:
                chain = ssock.get_verified_chain()
                if chain is None:
                    # Python < 3.13 — try getpeercert
                    cert = ssock.getpeercert()
                    if cert:
                        result.add(Finding("TLS Chain",
                            "Certificate chain check limited", Severity.INFO,
                            "Python version does not support full chain inspection.",
                            evidence=f"Python {sys.version.split()[0]}",
                                confidence=Confidence.CONFIRMED))
                    return

                chain_len = len(chain)
                if chain_len < 2:
                    result.add(Finding("TLS Chain",
                        "Incomplete certificate chain", Severity.MEDIUM,
                        "Server sends only the leaf certificate without intermediate CAs. "
                        "Some clients (mobile, older browsers) may fail to verify trust.",
                        "Configure the server to send the full certificate chain.",
                        evidence=f"Chain length: {chain_len} (expected >= 2)",
                            confidence=Confidence.CONFIRMED))
                else:
                    # Check if self-signed (leaf == root)
                    leaf = chain[0]
                    root = chain[-1]
                    leaf_subj = dict(x[0] for x in leaf.get("subject", []))
                    leaf_issuer = dict(x[0] for x in leaf.get("issuer", []))
                    if leaf_subj == leaf_issuer and chain_len == 1:
                        result.add(Finding("TLS Chain",
                            "Self-signed certificate", Severity.HIGH,
                            "Certificate is self-signed — browsers will show security warning.",
                            "Use a certificate from a trusted CA (Let's Encrypt, etc.).",
                            evidence=f"Subject: {leaf_subj.get('commonName', '?')}",
                                confidence=Confidence.CONFIRMED))
                    else:
                        subjects = []
                        for c in chain:
                            s = dict(x[0] for x in c.get("subject", []))
                            subjects.append(s.get("commonName", "?"))
                        result.add(Finding("TLS Chain",
                            f"Certificate chain complete ({chain_len} certs)", Severity.INFO,
                            f"Full chain: {' → '.join(subjects)}.",
                            evidence=f"Chain depth: {chain_len}",
                                confidence=Confidence.CONFIRMED))
    except AttributeError:
        # get_verified_chain not available (Python < 3.13)
        pass
    except ssl.SSLCertVerificationError as e:
        result.add(Finding("TLS Chain",
            "Certificate chain verification failed", Severity.HIGH,
            f"SSL verification error: {str(e)[:200]}.",
            "Fix the certificate chain — ensure all intermediates are included.",
            evidence=str(e)[:300],
                confidence=Confidence.CONFIRMED))
    except (socket.timeout, socket.error, OSError):
        pass


def check_redirect_chain(session: requests.Session, url: str, result: ScanResult):
    """Check for redirect chain issues.

    Detects:
    - HTTPS→HTTP downgrade in chain
    - Excessive redirects (>3 hops)
    - Redirect loops
    """
    try:
        resp = session.get(url, timeout=10, allow_redirects=True)
        history = resp.history

        if len(history) > 3:
            chain = " → ".join(r.headers.get("Location", "?")[:60] for r in history)
            result.add(Finding("Redirect Chain",
                f"Excessive redirects ({len(history)} hops)", Severity.LOW,
                f"Request goes through {len(history)} redirects before reaching destination. "
                "This slows page load and may indicate misconfiguration.",
                "Minimize redirect chain to 1-2 hops max.",
                evidence=chain[:500],
                    confidence=Confidence.CONFIRMED))

        # Check for HTTPS→HTTP downgrade in chain
        for i, r in enumerate(history):
            location = r.headers.get("Location", "")
            if r.url.startswith("https://") and location.startswith("http://"):
                result.add(Finding("Redirect Chain",
                    "HTTPS to HTTP downgrade in redirect chain", Severity.HIGH,
                    f"Step {i+1}: {r.url[:80]} redirects to insecure HTTP ({location[:80]}). "
                    "This exposes traffic to interception.",
                    "Ensure all redirects stay on HTTPS.",
                    evidence=f"Step {i+1}: {r.url[:100]} → {location[:100]}",
                        confidence=Confidence.CONFIRMED))
                break

    except requests.TooManyRedirects:
        result.add(Finding("Redirect Chain",
            "Redirect loop detected", Severity.MEDIUM,
            "Request exceeded maximum redirects — likely a redirect loop.",
            "Fix redirect configuration to avoid loops.",
            confidence=Confidence.FIRM))
    except requests.RequestException:
        pass


def check_api_versioning(session: requests.Session, url: str, result: ScanResult):
    """Check for exposed old API versions.

    Old API versions often lack security patches and may expose
    deprecated endpoints without proper authentication.
    """
    base = url.rstrip("/")
    versions = [
        ("api/v1", "API v1"), ("api/v2", "API v2"), ("api/v3", "API v3"),
        ("v1", "v1"), ("v2", "v2"), ("v3", "v3"),
        ("api/v1/docs", "API v1 docs"), ("api/v2/docs", "API v2 docs"),
        ("api/v1/swagger.json", "API v1 Swagger"),
        ("api/v2/swagger.json", "API v2 Swagger"),
    ]

    active_versions = []

    for path, desc in versions:
        try:
            resp = session.get(f"{base}/{path}", timeout=8, allow_redirects=False)
            if resp.status_code in (200, 301, 302) and not _is_spa_response(resp, result):
                ct = resp.headers.get("Content-Type", "")
                # Skip if it's an HTML page (likely SPA or generic landing)
                if "text/html" in ct and resp.status_code == 200:
                    continue
                active_versions.append((path, resp.status_code, ct[:50]))
        except requests.RequestException:
            pass

    if len(active_versions) >= 2:
        paths = ", ".join(f"/{v[0]} ({v[1]})" for v in active_versions[:5])
        result.add(Finding("API Versioning",
            f"{len(active_versions)} API versions accessible", Severity.LOW,
            f"Multiple API versions are publicly accessible: {paths}. "
            "Old versions may lack security patches or expose deprecated endpoints.",
            "Deprecate old API versions. Restrict access or remove them.",
            evidence="\n".join(f"/{v[0]} — HTTP {v[1]}, {v[2]}" for v in active_versions),
            confidence=Confidence.FIRM))
    elif len(active_versions) == 1:
        v = active_versions[0]
        result.add(Finding("API Versioning",
            f"API endpoint found: /{v[0]}", Severity.INFO,
            f"API version /{v[0]} is accessible (HTTP {v[1]}).",
            evidence=f"/{v[0]} — HTTP {v[1]}, {v[2]}",
                confidence=Confidence.CONFIRMED))


def check_admin_panels(session: requests.Session, url: str, result: ScanResult):
    """Check for exposed admin panels and management interfaces.

    Scans ~50 common admin panel paths across different CMS and frameworks.
    """
    base = url.rstrip("/")
    admin_paths = [
        # Generic
        "admin", "admin/", "administrator", "admin/login", "admin.php",
        "adminpanel", "admin/dashboard", "admin/index", "controlpanel",
        "management", "manage", "manager", "panel", "panel/",
        # CMS-specific
        "wp-admin/", "wp-login.php",
        "administrator/index.php",  # Joomla
        "user/login",  # Drupal
        "admin/config/",  # Drupal
        "ghost/",  # Ghost CMS
        "modx/",  # MODX
        "bitrix/admin/",  # 1C-Bitrix
        "typo3/",  # TYPO3
        "umbraco/",  # Umbraco
        "sitecore/login/",  # Sitecore
        "sitefinity",  # Sitefinity
        "kentico/",  # Kentico
        # Frameworks
        "admin/login.html", "admin/login.php", "admin/login.asp",
        "backend/", "dashboard/", "dashboard/login",
        "cpanel/", "webmail/", "plesk/",
        # Database tools
        "phpmyadmin/", "pma/", "adminer.php", "adminer/",
        "pgadmin/", "dbadmin/",
        # Monitoring
        "grafana/", "kibana/", "nagios/",
        "prometheus/", "traefik/", "portainer/",
        # Dev tools
        "_profiler/", "_debugbar/",  # Symfony, Laravel
        "telescope/", "horizon/",  # Laravel
        "elmah.axd", "trace.axd",  # ASP.NET
        "console/", "terminal/",
    ]

    # Soft-404 detection
    import hashlib as _hl
    try:
        r404 = session.get(f"{base}/qxz_admin_panel_8471_nonexist/", timeout=8, allow_redirects=False)
        soft404_hash = _hl.sha256(r404.content).hexdigest() if r404.status_code == 200 else None
        soft404_size = len(r404.content) if r404.status_code == 200 else 0
    except requests.RequestException:
        soft404_hash = None
        soft404_size = 0

    found_panels = []

    def check_one(path):
        try:
            resp = session.get(f"{base}/{path}", timeout=8, allow_redirects=False)
            return (path, resp.status_code, len(resp.content), resp)
        except requests.RequestException:
            return None

    with ThreadPoolExecutor(max_workers=10) as pool:
        futures = {pool.submit(check_one, p): p for p in admin_paths}
        for future in as_completed(futures):
            r = future.result()
            if r is None:
                continue
            path, status, size, resp = r

            if status == 200:
                if _is_spa_response(resp, result):
                    continue
                if soft404_hash and _hl.sha256(resp.content).hexdigest() == soft404_hash:
                    continue
                if soft404_size > 500 and abs(size - soft404_size) / max(soft404_size, 1) < 0.05:
                    continue
                ct = resp.headers.get("Content-Type", "")
                if "text/html" not in ct and "application/json" not in ct:
                    continue
                found_panels.append((path, status, "accessible"))
            elif status in (301, 302):
                location = resp.headers.get("Location", "")
                # Admin panel that redirects to login = it exists
                if any(kw in location.lower() for kw in ["login", "auth", "signin", "sso"]):
                    found_panels.append((path, status, f"→ login ({location[:60]})"))

    if found_panels:
        for path, status, note in found_panels[:5]:
            sev = Severity.MEDIUM if "accessible" in note else Severity.LOW
            result.add(Finding("Admin Panel",
                f"Admin panel found: /{path}", sev,
                f"Admin interface at /{path} is {'publicly accessible' if 'accessible' in note else 'present (redirects to login)'}. "
                f"HTTP {status}, {note}.",
                "Restrict admin panel access by IP or VPN. Add 2FA.",
                evidence=f"/{path} — HTTP {status} — {note}",
                confidence=Confidence.FIRM))


def check_cookie_consent(session: requests.Session, url: str, result: ScanResult):
    """Check if cookies are set before user consent (GDPR compliance).

    Makes a clean request (no prior cookies) and checks if the server
    sets tracking or analytics cookies before any consent interaction.
    """
    tracking_prefixes = [
        "_ga", "_gid", "_gat", "_gtag",  # Google Analytics
        "_fbp", "_fbc", "fr",  # Facebook
        "_ym_uid", "_ym_d", "_ym_isad",  # Yandex Metrica
        "__utma", "__utmb", "__utmc", "__utmz",  # Classic GA
        "_hjid", "_hjSession",  # Hotjar
        "mp_", "mixpanel",  # Mixpanel
        "ajs_",  # Segment
        "_clck", "_clsk",  # Clarity
        "hubspotutk",  # HubSpot
        "intercom-",  # Intercom
    ]

    try:
        # Fresh session — no cookies
        fresh = requests.Session()
        fresh.headers["User-Agent"] = session.headers.get("User-Agent", "Mozilla/5.0")
        resp = fresh.get(url, timeout=10, allow_redirects=True)

        tracking_cookies = []
        # Check both response cookies and the cookie jar
        all_cookies = list(resp.cookies)
        for cookie in fresh.cookies:
            if cookie not in all_cookies:
                all_cookies.append(cookie)

        for cookie in all_cookies:
            name_lower = cookie.name.lower()
            is_tracking = False
            for prefix in tracking_prefixes:
                if name_lower.startswith(prefix.lower()):
                    is_tracking = True
                    break
            # Also detect by common patterns
            if not is_tracking and any(kw in name_lower for kw in
                                        ["analytics", "tracking", "pixel", "adroll",
                                         "doubleclick", "adsense", "_gcl_", "criteo",
                                         "optimizely", "amplitude"]):
                is_tracking = True
            if is_tracking:
                tracking_cookies.append(f"{cookie.name} ({cookie.domain})")

        if tracking_cookies:
            result.add(Finding("Cookie Consent",
                f"{len(tracking_cookies)} tracking cookie(s) set without consent",
                Severity.LOW,
                f"Tracking cookies set before user consent: {', '.join(tracking_cookies[:5])}. "
                "This may violate GDPR Article 5(3) of the ePrivacy Directive.",
                "Implement a cookie consent banner. Only set tracking cookies after user approval.",
                evidence="\n".join(tracking_cookies[:10]),
                confidence=Confidence.FIRM))
        fresh.close()
    except requests.RequestException:
        pass


def check_dnssec(url: str, result: ScanResult):
    """Check if domain uses DNSSEC.

    DNSSEC protects against DNS spoofing and cache poisoning by
    signing DNS records with cryptographic signatures.
    """
    if not HAS_DNS:
        return

    parsed = urllib.parse.urlparse(url)
    domain = parsed.hostname
    if not domain:
        return

    try:
        # Check for DNSKEY record (indicates DNSSEC is configured)
        try:
            answers = dns.resolver.resolve(domain, "DNSKEY")
            key_count = len(list(answers))
            result.add(Finding("DNSSEC",
                f"DNSSEC enabled ({key_count} key(s))", Severity.INFO,
                f"Domain has {key_count} DNSKEY record(s). DNS responses are cryptographically signed.",
                evidence=f"DNSKEY records: {key_count}",
                    confidence=Confidence.CONFIRMED))
        except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN):
            result.add(Finding("DNSSEC",
                "DNSSEC not configured", Severity.LOW,
                "No DNSKEY records found. Domain is vulnerable to DNS spoofing and cache poisoning.",
                "Enable DNSSEC at your domain registrar.",
                confidence=Confidence.FIRM))
        except dns.resolver.NoNameservers:
            pass
    except Exception:
        pass


def check_sri_verify(session: requests.Session, url: str, result: ScanResult):
    """Verify that existing SRI hashes are correct.

    If a script has integrity="sha384-XXXX" but the actual hash doesn't match,
    the browser will block the script — this could break the site.
    Also checks for weak hash algorithms (sha256 preferred, sha384/512 better).
    """
    import hashlib
    import base64

    try:
        resp = session.get(url, timeout=10)
    except requests.RequestException:
        return

    body = resp.text
    parsed = urllib.parse.urlparse(url)

    # Find tags with integrity attribute
    sri_pattern = re.compile(
        r'<(?:script|link)\s[^>]*'
        r'(?:src|href)\s*=\s*["\']([^"\']+)["\']'
        r'[^>]*integrity\s*=\s*["\']([^"\']+)["\']',
        re.I
    )

    mismatches = []
    weak_algos = []

    for match in sri_pattern.finditer(body):
        resource_url = match.group(1)
        integrity_val = match.group(2).strip()

        # Resolve relative URLs
        if resource_url.startswith("//"):
            resource_url = parsed.scheme + ":" + resource_url
        elif resource_url.startswith("/"):
            resource_url = f"{parsed.scheme}://{parsed.netloc}{resource_url}"
        elif not resource_url.startswith("http"):
            continue

        # Skip same-origin resources
        res_parsed = urllib.parse.urlparse(resource_url)
        if res_parsed.netloc == parsed.netloc:
            continue

        # Check hash algorithm strength
        parts = integrity_val.split("-", 1)
        if len(parts) == 2:
            algo = parts[0].lower()
            if algo == "sha256":
                weak_algos.append((resource_url.split("/")[-1][:40], algo))

        # Verify hash (download the resource)
        try:
            res_resp = session.get(resource_url, timeout=10)
            if res_resp.status_code != 200:
                continue

            content = res_resp.content
            # SRI can have multiple hashes separated by space
            valid = False
            for hash_entry in integrity_val.split():
                h_parts = hash_entry.split("-", 1)
                if len(h_parts) != 2:
                    continue
                h_algo, h_val = h_parts
                h_algo = h_algo.lower().replace("sha", "sha")
                algo_map = {"sha256": "sha256", "sha384": "sha384", "sha512": "sha512"}
                if h_algo not in algo_map:
                    continue
                computed = base64.b64encode(
                    hashlib.new(algo_map[h_algo], content).digest()
                ).decode()
                if computed == h_val:
                    valid = True
                    break

            if not valid:
                short_url = resource_url.split("/")[-1][:50]
                mismatches.append(short_url)
        except requests.RequestException:
            pass

    if mismatches:
        result.add(Finding("SRI Verification",
            f"{len(mismatches)} SRI hash mismatch(es)", Severity.MEDIUM,
            f"Integrity hashes don't match actual content for: {', '.join(mismatches[:3])}. "
            "Browser will block these resources, potentially breaking the site.",
            "Regenerate SRI hashes. Use: shasum -b -a 384 file.js | xxd -r -p | base64",
            evidence="\n".join(mismatches[:5]),
            confidence=Confidence.FIRM))


def check_js_framework_cve(session: requests.Session, url: str, result: ScanResult):
    """Detect client-side JavaScript framework versions and known CVEs.

    Checks for jQuery, Angular, React, Vue, Lodash, Bootstrap, Moment.js
    version numbers in page source and cross-references with known CVEs.
    """
    try:
        resp = session.get(url, timeout=10)
    except requests.RequestException:
        return

    body = resp.text
    detected = {}

    # jQuery version patterns
    for m in re.finditer(r'jquery[./\s-]v?(\d+\.\d+\.\d+)', body, re.I):
        detected["jQuery"] = m.group(1)
    if not detected.get("jQuery"):
        for m in re.finditer(r'jQuery\s+v?(\d+\.\d+\.\d+)', body):
            detected["jQuery"] = m.group(1)

    # Angular version
    for m in re.finditer(r'angular[./\s-]v?(\d+\.\d+\.\d+)', body, re.I):
        detected["AngularJS"] = m.group(1)
    for m in re.finditer(r'@angular/core@(\d+\.\d+\.\d+)', body):
        detected["Angular"] = m.group(1)

    # Vue version
    for m in re.finditer(r'vue[./\s-]v?(\d+\.\d+\.\d+)', body, re.I):
        detected["Vue.js"] = m.group(1)

    # React version
    for m in re.finditer(r'react(?:\.production)?[./\s-]v?(\d+\.\d+\.\d+)', body, re.I):
        detected["React"] = m.group(1)

    # Lodash version
    for m in re.finditer(r'lodash[./\s-]v?(\d+\.\d+\.\d+)', body, re.I):
        detected["Lodash"] = m.group(1)

    # Bootstrap version
    for m in re.finditer(r'bootstrap[./\s-]v?(\d+\.\d+\.\d+)', body, re.I):
        detected["Bootstrap"] = m.group(1)

    # Moment.js
    for m in re.finditer(r'moment[./\s-]v?(\d+\.\d+\.\d+)', body, re.I):
        detected["Moment.js"] = m.group(1)

    # Known CVEs for client-side libraries
    JS_CVE_DB = [
        ("jQuery", "3.5.0", "CVE-2020-11022", "XSS in jQuery.htmlPrefilter", "HIGH"),
        ("jQuery", "3.0.0", "CVE-2019-11358", "Prototype pollution via extend()", "MEDIUM"),
        ("jQuery", "1.12.0", "CVE-2015-9251", "XSS via cross-domain ajax", "MEDIUM"),
        ("jQuery", "2.2.0", "CVE-2015-9251", "XSS via cross-domain ajax", "MEDIUM"),
        ("AngularJS", "1.6.9", "CVE-2022-25869", "XSS via $sanitize", "MEDIUM"),
        ("AngularJS", "1.8.0", "Multiple", "AngularJS 1.x is EOL since Dec 2021", "MEDIUM"),
        ("Lodash", "4.17.21", "CVE-2021-23337", "Command injection via template()", "HIGH"),
        ("Lodash", "4.17.15", "CVE-2020-8203", "Prototype pollution", "HIGH"),
        ("Lodash", "4.17.11", "CVE-2019-10744", "Prototype pollution via defaultsDeep", "HIGH"),
        ("Bootstrap", "4.3.1", "CVE-2019-8331", "XSS via tooltip/popover", "MEDIUM"),
        ("Bootstrap", "3.4.1", "CVE-2019-8331", "XSS via tooltip/popover", "MEDIUM"),
        ("Moment.js", "99.99.99", "Deprecated", "Moment.js is in maintenance mode (deprecated)", "LOW"),
        ("Vue.js", "2.7.0", "EOL", "Vue 2.x reached EOL Dec 2023", "LOW"),
    ]

    for lib, version in detected.items():
        for cve_lib, max_ver, cve_id, cve_desc, cve_sev in JS_CVE_DB:
            if lib == cve_lib and _version_lt(version, max_ver):
                sev_map = {"HIGH": Severity.HIGH, "MEDIUM": Severity.MEDIUM,
                           "LOW": Severity.LOW}
                result.add(Finding("JS Framework CVE",
                    f"{lib} {version}: {cve_id}", sev_map.get(cve_sev, Severity.MEDIUM),
                    f"{lib} {version} is affected by {cve_id}: {cve_desc}.",
                    f"Update {lib} to the latest version.",
                    evidence=f"Detected: {lib} {version}, Vulnerable below: {max_ver}",
                    confidence=Confidence.FIRM))
                break  # Only first (most severe) CVE per library

    # Report detected libraries without CVEs as INFO
    for lib, version in detected.items():
        has_cve = any(
            lib == c[0] and _version_lt(version, c[1]) for c in JS_CVE_DB
        )
        if not has_cve:
            result.add(Finding("JS Framework",
                f"Detected: {lib} {version}", Severity.INFO,
                f"Client-side library {lib} version {version}.",
                evidence=f"{lib} {version}",
                    confidence=Confidence.CONFIRMED))


def check_env_leak(session: requests.Session, url: str, result: ScanResult):
    """Check for environment variable leaks in HTML and JavaScript.

    Scans page source for patterns like DB_HOST=, SECRET_KEY=, API_KEY=,
    AWS credentials, database connection strings that indicate
    server-side environment variables leaked into client-side code.
    """
    try:
        resp = session.get(url, timeout=10)
    except requests.RequestException:
        return

    body = resp.text
    if len(body) < 100:
        return

    env_patterns = [
        (r'(?:DB_(?:HOST|PASSWORD|USER|NAME|PORT)|DATABASE_URL)\s*[:=]\s*["\']?[^"\'\s,;]{3,}',
         "Database credentials"),
        (r'(?:SECRET_KEY|APP_SECRET|JWT_SECRET|ENCRYPTION_KEY)\s*[:=]\s*["\']?[^"\'\s,;]{8,}',
         "Secret key"),
        (r'(?:SMTP_PASSWORD|MAIL_PASSWORD|EMAIL_PASSWORD)\s*[:=]\s*["\']?[^"\'\s,;]{3,}',
         "Email credentials"),
        (r'(?:REDIS_URL|REDIS_PASSWORD|CACHE_URL)\s*[:=]\s*["\']?[^"\'\s,;]{3,}',
         "Cache credentials"),
        (r'(?:STRIPE_SECRET_KEY|PAYMENT_SECRET)\s*[:=]\s*["\']?sk_(?:live|test)_[A-Za-z0-9]{10,}',
         "Payment credentials"),
        (r'(?:SENTRY_DSN)\s*[:=]\s*["\']?https://[a-f0-9]+@[^"\'\s]+',
         "Sentry DSN"),
        (r'(?:TWILIO_AUTH_TOKEN|TWILIO_SID)\s*[:=]\s*["\']?[a-f0-9]{20,}',
         "Twilio credentials"),
    ]

    found_leaks = []
    for pattern, desc in env_patterns:
        for m in re.finditer(pattern, body, re.I):
            matched = m.group(0)
            # Skip if it looks like documentation or placeholder
            lower = matched.lower()
            if any(ph in lower for ph in ["example", "xxx", "your_", "placeholder",
                                           "change_me", "todo", "fixme", "replace",
                                           "insert_", "enter_", "put_your"]):
                continue
            # Mask the value for reporting
            key_part = matched.split("=")[0].split(":")[0].strip()
            found_leaks.append((key_part, desc))

    if found_leaks:
        unique = list({k: d for k, d in found_leaks}.items())[:5]
        result.add(Finding("Environment Leak",
            f"{len(unique)} environment variable(s) exposed", Severity.CRITICAL,
            f"Server-side environment variables found in page source: "
            f"{', '.join(f'{k[0]} ({k[1]})' for k in unique)}. "
            "These should never be visible in client-side code.",
            "Move secrets to server-side environment. Never inject env vars into HTML/JS templates.",
            evidence="\n".join(f"{k[0]} — {k[1]}" for k in unique),
            confidence=Confidence.FIRM))

    # Check for exposed .env files
    base = url.rstrip("/")
    env_paths = ["/.env", "/.env.local", "/.env.production", "/.env.backup"]
    for path in env_paths:
        try:
            resp2 = session.get(f"{base}{path}", timeout=8)
            if resp2.status_code == 200 and len(resp2.text) > 10:
                text = resp2.text[:500].lower()
                # Must look like a real .env file (KEY=VALUE lines)
                if re.search(r'^[A-Z_]+=.+', resp2.text, re.MULTILINE) and \
                   any(kw in text for kw in ["password", "secret", "key", "token",
                                              "database", "db_", "api_", "smtp",
                                              "redis", "mongo", "aws_"]):
                    result.add(Finding("Environment Leak",
                        f"Exposed {path} file", Severity.CRITICAL,
                        f"The file {path} is publicly accessible and contains "
                        "what appears to be environment configuration with secrets.",
                        f"Block access to {path} in your web server configuration. "
                        "Add .env* to .gitignore and remove from public directories.",
                        confidence=Confidence.CONFIRMED))
                    break  # One .env finding is enough
        except requests.RequestException:
            pass


def check_email_spoofing(session: requests.Session, url: str, result: ScanResult):
    """Check if domain is vulnerable to email spoofing.

    Combines SPF and DMARC analysis:
    - SPF with ~all or -all but DMARC p=none = spoofing possible
    - No DMARC at all = spoofing possible
    - DMARC p=quarantine or p=reject = good
    """
    if not HAS_DNS:
        return

    parsed = urllib.parse.urlparse(url)
    domain = parsed.hostname
    if not domain:
        return

    # Strip subdomains for email domain
    parts = domain.split(".")
    if len(parts) > 2:
        domain = ".".join(parts[-2:])

    spf_policy = None
    dmarc_policy = None

    # Check SPF
    try:
        answers = dns.resolver.resolve(domain, "TXT")
        for rdata in answers:
            txt = str(rdata).strip('"')
            if txt.startswith("v=spf1"):
                if "+all" in txt:
                    spf_policy = "permissive"
                elif "~all" in txt:
                    spf_policy = "softfail"
                elif "-all" in txt:
                    spf_policy = "strict"
                elif "?all" in txt:
                    spf_policy = "neutral"
                else:
                    spf_policy = "present"
                break
    except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers):
        spf_policy = "missing"
    except Exception:
        return

    # Check DMARC
    try:
        answers = dns.resolver.resolve(f"_dmarc.{domain}", "TXT")
        for rdata in answers:
            txt = str(rdata).strip('"')
            if "v=DMARC1" in txt:
                if "p=reject" in txt:
                    dmarc_policy = "reject"
                elif "p=quarantine" in txt:
                    dmarc_policy = "quarantine"
                elif "p=none" in txt:
                    dmarc_policy = "none"
                else:
                    dmarc_policy = "present"
                break
    except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers):
        dmarc_policy = "missing"
    except Exception:
        return

    # Evaluate spoofing risk
    evidence = f"SPF: {spf_policy or 'unknown'}, DMARC: {dmarc_policy or 'unknown'}"

    if spf_policy == "permissive":
        result.add(Finding("Email Spoofing",
            "SPF allows any sender (+all)", Severity.HIGH,
            f"SPF record uses +all — anyone can send email as @{domain}.",
            "Change SPF to use -all (hard fail) and configure DMARC with p=reject.",
            evidence=evidence,
                confidence=Confidence.CONFIRMED))
    elif dmarc_policy == "missing" and spf_policy != "strict":
        result.add(Finding("Email Spoofing",
            f"No DMARC + weak SPF: email spoofing possible", Severity.MEDIUM,
            f"Domain {domain} has no DMARC record and SPF is '{spf_policy}'. "
            "Attackers can send emails pretending to be from this domain.",
            "Add DMARC record: _dmarc.{domain} TXT \"v=DMARC1; p=reject; rua=mailto:dmarc@{domain}\"",
            evidence=evidence,
            confidence=Confidence.FIRM))
    elif dmarc_policy == "none":
        result.add(Finding("Email Spoofing",
            "DMARC p=none: email spoofing possible", Severity.MEDIUM,
            f"DMARC is set to p=none for {domain} — spoofed emails are delivered. "
            "This is a monitoring-only mode, not enforcement.",
            "Change DMARC policy to p=quarantine or p=reject after reviewing reports.",
            evidence=evidence,
            confidence=Confidence.FIRM))
    elif dmarc_policy in ("reject", "quarantine") and spf_policy in ("strict", "softfail"):
        result.add(Finding("Email Spoofing",
            f"Email spoofing protected (DMARC={dmarc_policy})", Severity.INFO,
            f"Domain {domain} has SPF ({spf_policy}) + DMARC (p={dmarc_policy}).",
            evidence=evidence,
                confidence=Confidence.CONFIRMED))


def check_csp_report(session: requests.Session, url: str, result: ScanResult):
    """Detailed Content-Security-Policy analysis.

    Evaluates each CSP directive individually and provides
    a comprehensive breakdown with specific recommendations.
    """
    try:
        resp = session.get(url, timeout=10)
    except requests.RequestException:
        return

    csp = resp.headers.get("Content-Security-Policy", "")
    if not csp:
        return  # Already flagged by check_headers

    directives = {}
    for part in csp.split(";"):
        part = part.strip()
        if not part:
            continue
        tokens = part.split()
        if tokens:
            directives[tokens[0].lower()] = tokens[1:] if len(tokens) > 1 else []

    issues = []

    # Check for dangerous source values
    dangerous_sources = {
        "'unsafe-inline'": "Allows inline <script>/<style> — XSS vector",
        "'unsafe-eval'": "Allows eval()/Function() — code injection vector",
        "data:": "Allows data: URIs — can bypass CSP for script/style injection",
        "blob:": "Allows blob: URIs — can execute arbitrary code",
    }

    for directive in ("script-src", "default-src"):
        values = directives.get(directive, [])
        for danger, desc in dangerous_sources.items():
            if danger in values:
                issues.append(f"{directive}: {danger} — {desc}")

    # Check for wildcard or overly broad sources
    for directive, values in directives.items():
        if "*" in values:
            issues.append(f"{directive}: * (wildcard) — allows loading from any source")
        for v in values:
            if v.startswith("*.") and directive in ("script-src", "default-src"):
                issues.append(f"{directive}: {v} — wildcard subdomain, attackable via subdomain takeover")

    # Check for missing critical directives
    critical_missing = []
    if "default-src" not in directives and "script-src" not in directives:
        critical_missing.append("script-src (no restriction on script sources)")
    if "object-src" not in directives and "default-src" not in directives:
        critical_missing.append("object-src (Flash/Java plugins not blocked)")
    if "base-uri" not in directives:
        critical_missing.append("base-uri (allows <base> tag hijacking)")
    if "form-action" not in directives:
        critical_missing.append("form-action (forms can submit to any destination)")
    if "frame-ancestors" not in directives:
        critical_missing.append("frame-ancestors (clickjacking not prevented by CSP)")

    if critical_missing:
        issues.append(f"Missing directives: {'; '.join(critical_missing)}")

    # Check for report-uri/report-to
    has_reporting = "report-uri" in directives or "report-to" in directives

    if issues:
        result.add(Finding("CSP Report",
            f"CSP analysis: {len(issues)} issue(s)", Severity.INFO,
            "Detailed CSP breakdown:\n" + "\n".join(f"• {i}" for i in issues[:10]),
            "Review each CSP directive. Use https://csp-evaluator.withgoogle.com/ for detailed guidance.",
            evidence=f"CSP: {csp[:300]}{'...' if len(csp) > 300 else ''}",
            confidence=Confidence.CONFIRMED))

    if not has_reporting:
        # Already flagged by check_reporting_headers, skip duplicate
        pass


def check_dns_propagation(url: str, result: ScanResult):
    """Check DNS consistency across nameservers.

    Queries each NS record for the domain and verifies they all
    return the same A/AAAA records. Inconsistency can indicate
    DNS hijacking, misconfiguration, or propagation issues.
    """
    if not HAS_DNS:
        return

    parsed = urllib.parse.urlparse(url)
    domain = parsed.hostname
    if not domain:
        return

    # Get NS records
    try:
        ns_answers = dns.resolver.resolve(domain, "NS")
        nameservers = [str(ns).rstrip(".") for ns in ns_answers]
    except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers):
        # Try parent domain
        parts = domain.split(".")
        if len(parts) > 2:
            parent = ".".join(parts[-2:])
            try:
                ns_answers = dns.resolver.resolve(parent, "NS")
                nameservers = [str(ns).rstrip(".") for ns in ns_answers]
            except Exception:
                return
        else:
            return
    except Exception:
        return

    if len(nameservers) < 2:
        result.add(Finding("DNS Propagation",
            "Only 1 nameserver", Severity.LOW,
            f"Domain has only {len(nameservers)} nameserver. "
            "If it goes down, the domain becomes unreachable.",
            "Add at least 2 nameservers for redundancy.",
            evidence=f"NS: {', '.join(nameservers)}",
                confidence=Confidence.CONFIRMED))
        return

    # Query each NS for A records
    ns_results = {}
    for ns in nameservers[:6]:  # Limit to 6 NS
        try:
            ns_ip = str(dns.resolver.resolve(ns, "A")[0])
            resolver = dns.resolver.Resolver(configure=False)
            resolver.nameservers = [ns_ip]
            resolver.timeout = 5
            resolver.lifetime = 5
            answers = resolver.resolve(domain, "A")
            ips = sorted(str(a) for a in answers)
            ns_results[ns] = ips
        except Exception:
            ns_results[ns] = ["ERROR"]

    # Check consistency
    valid_results = {ns: ips for ns, ips in ns_results.items() if ips != ["ERROR"]}
    if len(valid_results) < 2:
        return

    ip_sets = list(valid_results.values())
    all_same = all(ips == ip_sets[0] for ips in ip_sets)

    if all_same:
        result.add(Finding("DNS Propagation",
            f"DNS consistent across {len(valid_results)} NS", Severity.INFO,
            f"All nameservers return the same A records.",
            evidence=f"NS: {', '.join(valid_results.keys())}\nA: {', '.join(ip_sets[0])}",
                confidence=Confidence.CONFIRMED))
    else:
        details = [f"{ns}: {', '.join(ips)}" for ns, ips in valid_results.items()]
        result.add(Finding("DNS Propagation",
            "DNS inconsistency detected", Severity.MEDIUM,
            "Nameservers return different A records. This can indicate "
            "DNS hijacking, ongoing migration, or propagation delay.",
            "Ensure all nameservers have identical zone records.",
            evidence="\n".join(details),
            confidence=Confidence.FIRM))


def check_bimi(url: str, result: ScanResult):
    """Check for BIMI (Brand Indicators for Message Identification).

    BIMI allows brands to display their logo next to emails in
    supporting clients (Gmail, Apple Mail). Requires DMARC with
    p=quarantine or p=reject.
    """
    if not HAS_DNS:
        return

    parsed = urllib.parse.urlparse(url)
    domain = parsed.hostname
    if not domain:
        return

    # Strip subdomains
    parts = domain.split(".")
    if len(parts) > 2:
        domain = ".".join(parts[-2:])

    try:
        answers = dns.resolver.resolve(f"default._bimi.{domain}", "TXT")
        for rdata in answers:
            txt = str(rdata).strip('"')
            if "v=BIMI1" in txt:
                has_logo = "l=" in txt and "l=;" not in txt and "l= " not in txt
                has_vmc = "a=" in txt and "a=;" not in txt
                result.add(Finding("BIMI",
                    f"BIMI configured" + (" with logo" if has_logo else ""),
                    Severity.INFO,
                    f"BIMI record found for {domain}. "
                    f"{'Logo URL present.' if has_logo else 'No logo URL.'} "
                    f"{'VMC certificate present.' if has_vmc else 'No VMC certificate.'}",
                    evidence=txt[:200],
                        confidence=Confidence.CONFIRMED))
                return
    except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers):
        result.add(Finding("BIMI",
            "No BIMI record", Severity.INFO,
            f"No BIMI record found for {domain}. Email clients won't display brand logo.",
            "Configure BIMI: add default._bimi.{domain} TXT \"v=BIMI1; l=https://example.com/logo.svg\"",
                confidence=Confidence.CONFIRMED))
    except Exception:
        pass


def scan_site(url: str, full: bool = False, progress_callback=None) -> ScanResult:
    """Run all checks for a single site."""
    global _rate_limiter
    _rate_limiter = _RateLimiter(max_concurrent=5, min_delay=0.08)

    if not url.startswith(("http://", "https://")):
        url = "https://" + url

    result = ScanResult(url=url)
    result.timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
    start = time.time()

    session = make_session()

    # SPA detection: fingerprint the homepage
    try:
        import hashlib
        home_resp = session.get(url, timeout=10)
        result.spa_hash = hashlib.sha256(home_resp.content).hexdigest()
        result.spa_size = len(home_resp.content)
    except requests.RequestException:
        pass

    # Phase 1: sequential checks that other checks depend on
    phase1 = [
        ("SSL/TLS", lambda s: check_ssl(url, result)),
        ("HTTP Headers", lambda s: check_headers(s, url, result)),
        ("Reporting Headers", lambda s: check_reporting_headers(s, url, result)),
        ("HTTP->HTTPS Redirect", lambda s: check_http_redirect(s, url, result)),
    ]

    # Phase 2: parallel checks (independent, each gets its own session)
    phase2 = [
        ("Clickjacking", lambda s: check_clickjacking(s, url, result)),
        ("CORS", lambda s: check_cors(s, url, result)),
        ("HTTP Methods", lambda s: check_methods(s, url, result)),
        ("Sensitive Files", lambda s: check_sensitive_paths(s, url, result)),
        ("Backup Files", lambda s: check_backup_files(s, url, result)),
        ("Git Exposure", lambda s: check_git_exposure(s, url, result)),
        ("Source Code Leak", lambda s: check_source_leak(s, url, result)),
        ("Debug Endpoints", lambda s: check_debug_endpoints(s, url, result)),
        ("Directory Listing", lambda s: check_directory_listing(s, url, result)),
        ("robots.txt Analysis", lambda s: check_robots_analysis(s, url, result)),
        ("Info Disclosure", lambda s: check_info_disclosure(s, url, result)),
        ("Error Handling", lambda s: check_error_handling(s, url, result)),
        ("Content-Type", lambda s: check_content_type(s, url, result)),
        ("Cookie Scope", lambda s: check_cookie_scope(s, url, result)),
        ("XSS / SQLi / SSTI", lambda s: check_injection_points(s, url, result)),
        ("DOM XSS", lambda s: check_dom_xss(s, url, result)),
        ("Command Injection", lambda s: check_command_injection(s, url, result)),
        ("NoSQL Injection", lambda s: check_nosql_injection(s, url, result)),
        ("LFI / RFI", lambda s: check_lfi_rfi(s, url, result)),
        ("CRLF Injection", lambda s: check_crlf(s, url, result)),
        ("Host Header Injection", lambda s: check_host_header(s, url, result)),
        ("Open Redirect", lambda s: check_open_redirect(s, url, result)),
        ("XXE", lambda s: check_xxe(s, url, result)),
        ("SSRF Params", lambda s: check_ssrf_params(s, url, result)),
        ("Prototype Pollution", lambda s: check_prototype_pollution(s, url, result)),
        ("HTTP Parameter Pollution", lambda s: check_hpp(s, url, result)),
        ("Request Smuggling", lambda s: check_request_smuggling(s, url, result)),
        ("JSONP", lambda s: check_jsonp(s, url, result)),
        ("HTML Analysis", lambda s: check_html_content(s, url, result)),
        ("Secrets in JS", lambda s: check_js_secrets(s, url, result)),
        ("JWT Analysis", lambda s: check_jwt(s, url, result)),
        ("Deserialization", lambda s: check_deserialization(s, url, result)),
        ("Cache Poisoning", lambda s: check_cache_poisoning(s, url, result)),
        ("IP Spoofing", lambda s: check_ip_spoofing(s, url, result)),
        ("CSP Bypass", lambda s: check_csp_bypass(s, url, result)),
        ("WAF Detection", lambda s: check_waf(s, url, result)),
        ("CMS Detection", lambda s: check_cms(s, url, result)),
        ("GraphQL", lambda s: check_graphql(s, url, result)),
        ("OpenAPI/Swagger", lambda s: check_openapi(s, url, result)),
        ("WSDL", lambda s: check_wsdl(s, url, result)),
        ("WebSocket", lambda s: check_websocket(s, url, result)),
        ("Unauthenticated API", lambda s: check_api_auth(s, url, result)),
        ("User Enumeration", lambda s: check_user_enumeration(s, url, result)),
        ("Timing Enumeration", lambda s: check_timing_enum(s, url, result)),
        ("403 Bypass", lambda s: check_403_bypass(s, url, result)),
        ("Verb Tampering", lambda s: check_verb_tampering(s, url, result)),
        ("Session Fixation", lambda s: check_session_fixation(s, url, result)),
        ("IDOR", lambda s: check_idor(s, url, result)),
        ("Mass Assignment", lambda s: check_mass_assignment(s, url, result)),
        ("Cloud Storage", lambda s: check_cloud_storage(s, url, result)),
        ("BREACH", lambda s: check_breach(s, url, result)),
        ("SNI Mismatch", lambda s: check_sni(url, result)),
        ("security.txt", lambda s: check_security_txt(s, url, result)),
        ("Privacy Policy", lambda s: check_privacy_policy(s, url, result)),
        ("Response Time", lambda s: check_response_time(s, url, result)),
        ("Favicon Fingerprint", lambda s: check_favicon_fingerprint(s, url, result)),
        ("DNS CAA", lambda s: check_dns_caa(url, result)),
        ("HTTP/3", lambda s: check_http3(s, url, result)),
        ("HSTS Preload", lambda s: check_hsts_preload(s, url, result)),
        ("Subresource Integrity", lambda s: check_sri(s, url, result)),
        ("Meta Refresh", lambda s: check_meta_refresh(s, url, result)),
        ("Sitemap", lambda s: check_sitemap(s, url, result)),
        ("Deprecated HTML", lambda s: check_deprecated_html(s, url, result)),
        ("Third-Party Scripts", lambda s: check_third_party_scripts(s, url, result)),
        ("Domain Expiry", lambda s: check_domain_expiry(s, url, result)),
        ("Cookie Flags", lambda s: check_cookie_flags(s, url, result)),
        ("Mixed Content", lambda s: check_mixed_content(s, url, result)),
        ("Dangling Markup", lambda s: check_dangling_markup(s, url, result)),
        ("Redirect Chain", lambda s: check_redirect_chain(s, url, result)),
        ("Env Leak", lambda s: check_env_leak(s, url, result)),
        ("CSP Report", lambda s: check_csp_report(s, url, result)),
        ("JS Framework CVE", lambda s: check_js_framework_cve(s, url, result)),
        ("Cookie Consent", lambda s: check_cookie_consent(s, url, result)),
    ]

    # Phase 3: checks that depend on phase 1/2 findings
    phase3 = [
        ("Technologies", lambda s: check_technology(s, url, result)),
        ("Known CVEs", lambda s: check_cve(s, url, result)),
    ]

    if full:
        phase2.extend([
            ("Ports", lambda s: check_open_ports(url, result)),
            ("Banner Grabbing", lambda s: check_banner_grab(url, result)),
            ("DNS/Email", lambda s: check_dns(url, result)),
            ("Email Security", lambda s: check_email_security(s, url, result)),
            ("Subdomains", lambda s: check_subdomains(url, result)),
            ("Subdomain Takeover", lambda s: check_subdomain_takeover(s, url, result)),
            ("WP Plugins", lambda s: check_wp_plugins(s, url, result)),
            ("Password Policy", lambda s: check_password_policy(s, url, result)),
            ("Rate Limiting", lambda s: check_rate_limiting(s, url, result)),
            ("HTTP/2 & Compression", lambda s: check_http2(s, url, result)),
            ("Certificate Transparency", lambda s: check_certificate_transparency(s, url, result)),
            ("CORS Preflight", lambda s: check_cors_preflight(s, url, result)),
            ("Bucket Bruteforce", lambda s: check_bucket_bruteforce(s, url, result)),
            ("Deprecated TLS", lambda s: check_deprecated_tls(url, result)),
            ("CORS Deep", lambda s: check_cors_deep(s, url, result)),
            ("TLS Chain", lambda s: check_tls_chain(url, result)),
            ("API Versioning", lambda s: check_api_versioning(s, url, result)),
            ("Admin Panels", lambda s: check_admin_panels(s, url, result)),
            ("DNSSEC", lambda s: check_dnssec(url, result)),
            ("SRI Verify", lambda s: check_sri_verify(s, url, result)),
            ("Email Spoofing", lambda s: check_email_spoofing(s, url, result)),
            ("DNS Propagation", lambda s: check_dns_propagation(url, result)),
            ("BIMI", lambda s: check_bimi(url, result)),
        ])

    total = len(phase1) + len(phase2) + len(phase3)
    done_count = [0]
    print_lock = threading.Lock()

    def run_check(name, check_fn, use_own_session=False):
        s = make_session() if use_own_session else session
        try:
            check_fn(s)
            status = "ok"
            error = None
        except Exception as e:
            status = "error"
            error = str(e)
        with print_lock:
            done_count[0] += 1
            pct = int(done_count[0] / total * 100)
            if error:
                print(f"  {RED}[{pct:3d}%]{RESET} {name}: {error}")
                result.errors.append(f"{name}: {error}")
            else:
                print(f"  {GREEN}[{pct:3d}%]{RESET} {name}")
            if progress_callback:
                progress_callback(name, done_count[0], total, status)

    # Phase 1: sequential
    for name, check_fn in phase1:
        run_check(name, check_fn)

    # Phase 2: parallel (10 workers, each with own session)
    with ThreadPoolExecutor(max_workers=10) as pool:
        futures = []
        for name, check_fn in phase2:
            futures.append(pool.submit(run_check, name, check_fn, True))
        for f in as_completed(futures):
            pass  # results handled in run_check

    # Phase 3: sequential (depends on earlier findings)
    for name, check_fn in phase3:
        run_check(name, check_fn)

    result.duration = time.time() - start
    return result




def _calc_score(summary: dict, findings: list = None) -> tuple:
    """Calculate security score and grade.

    If findings list is provided, uses confidence-weighted scoring:
    each finding's penalty is multiplied by its confidence weight
    (Confirmed=1.0, Firm=0.75, Tentative=0.3).

    If only summary dict is provided (backward compat), uses flat scoring.
    """
    if findings is not None:
        # Confidence-weighted scoring
        penalty_map = {
            Severity.CRITICAL: 25,
            Severity.HIGH: 10,
            Severity.MEDIUM: 3,
            Severity.LOW: 1,
            Severity.INFO: 0,
        }
        total_penalty = 0.0
        low_penalty = 0.0
        for f in findings:
            base = penalty_map.get(f.severity, 0)
            if base == 0:
                continue
            weighted = base * f.confidence.weight
            if f.severity == Severity.LOW:
                low_penalty += weighted
            else:
                total_penalty += weighted
        # Cap LOW penalty at 5 (same as before, but weighted)
        total_penalty += min(low_penalty, 5)
        score = max(0, round(100 - total_penalty))
    else:
        # Flat scoring (backward compat for summary-only callers)
        crit = summary.get("CRITICAL", 0)
        high = summary.get("HIGH", 0)
        med = summary.get("MEDIUM", 0)
        low = summary.get("LOW", 0)
        score = max(0, 100 - crit * 25 - high * 10 - med * 3 - min(low, 5))

    if score >= 85:
        grade = "A"
    elif score >= 70:
        grade = "B"
    elif score >= 50:
        grade = "C"
    elif score >= 35:
        grade = "D"
    else:
        grade = "F"
    return score, grade


def generate_summary(result: ScanResult) -> str:
    """Generate text summary of scan results."""
    summary = result.summary
    score, grade = _calc_score(summary, result.findings)
    crit = summary.get("CRITICAL", 0)
    high = summary.get("HIGH", 0)
    med = summary.get("MEDIUM", 0)
    low = summary.get("LOW", 0)

    # --- Overall Score ---
    if score >= 85:
        overall = "Excellent security posture."
    elif score >= 70:
        overall = "Good security posture with minor issues."
    elif score >= 50:
        overall = "Average security, notable issues found."
    elif score >= 35:
        overall = "Poor security, attention required."
    else:
        overall = "Critical security level, urgent action required."

    lines = [f"Score: {grade} ({score}/100). {overall}"]

    # --- Group by category ---
    by_cat = defaultdict(lambda: defaultdict(list))
    for f in result.findings:
        if f.severity != Severity.INFO:
            by_cat[f.category][f.severity].append(f)

    # --- Strengths (categories with no issues) ---
    all_cats = set()
    problem_cats = set()
    for f in result.findings:
        all_cats.add(f.category)
        if f.severity != Severity.INFO:
            problem_cats.add(f.category)

    clean_cats = all_cats - problem_cats
    # Show only key clean categories
    key_clean = [c for c in ["SSL/TLS", "HTTP Headers", "Cookies", "CORS",
                              "Redirects", "CSP Analysis"] if c in clean_cats]
    if key_clean:
        lines.append(f"Strengths: {', '.join(key_clean)} — no issues.")

    # --- Critical and high-severity issues ---
    if crit or high:
        urgent = []
        for cat, sevs in sorted(by_cat.items()):
            cat_items = []
            for sev in (Severity.CRITICAL, Severity.HIGH):
                for f in sevs.get(sev, []):
                    cat_items.append(f.title)
            if cat_items:
                if len(cat_items) <= 2:
                    urgent.append(f"{cat}: {'; '.join(cat_items)}")
                else:
                    urgent.append(f"{cat}: {cat_items[0]} and {len(cat_items) - 1} more")
        lines.append("Critical/high issues: " + ". ".join(urgent) + ".")

    # --- Medium issues (brief) ---
    if med:
        med_cats = []
        for cat, sevs in sorted(by_cat.items()):
            med_findings = sevs.get(Severity.MEDIUM, [])
            if med_findings:
                med_cats.append(f"{cat} ({len(med_findings)})")
        lines.append(f"Medium issues ({med}): {', '.join(med_cats)}.")

    # --- Low severity ---
    if low:
        lines.append(f"Low severity findings: {low}.")

    # --- Top recommendations (up to 3) ---
    recs = []
    severity_order = [Severity.CRITICAL, Severity.HIGH, Severity.MEDIUM]
    for sev in severity_order:
        for f in result.findings:
            if f.severity == sev and f.recommendation and f.recommendation not in recs:
                recs.append(f.recommendation)
                if len(recs) >= 3:
                    break
        if len(recs) >= 3:
            break

    if recs:
        lines.append("Recommendations: " + " ".join(f"{i+1}) {r}" for i, r in enumerate(recs)))

    # --- Not covered (active attacks) ---
    lines.append("Not tested (requires separate audit): SSH/FTP/MySQL brute-force, "
                  "web form password guessing, credential strength testing. "
                  "Use Nmap scripts (ssh-brute, ftp-brute, http-form-brute) "
                  "or Hydra for these checks.")

    return "\n".join(lines)


def print_report(result: ScanResult):
    """Print scan report."""
    print()
    print(f"{BOLD}{'=' * 74}{RESET}")
    print(f"{BOLD}  REPORT: {result.url}{RESET}")
    print(f"  Time: {result.timestamp}  |  Duration: {result.duration:.1f}s")
    print(f"  Found: {len(result.findings)} findings")
    print(f"{'=' * 74}")

    by_cat = defaultdict(list)
    for f in result.findings:
        by_cat[f.category].append(f)

    # Critical categories first
    severity_order = {Severity.CRITICAL: 0, Severity.HIGH: 1, Severity.MEDIUM: 2,
                      Severity.LOW: 3, Severity.INFO: 4}

    def cat_priority(cat_findings):
        min_sev = min(severity_order.get(f.severity, 99) for f in cat_findings)
        return min_sev

    sorted_cats = sorted(by_cat.items(), key=lambda x: cat_priority(x[1]))

    for cat, findings in sorted_cats:
        non_info = [f for f in findings if f.severity != Severity.INFO]
        info_count = len(findings) - len(non_info)

        print(f"\n  {BOLD}> {cat}{RESET}", end="")
        if info_count and not non_info:
            print(f"  {GRAY}({info_count} info){RESET}")
            continue  # Skip INFO-only categories
        elif info_count:
            print(f"  {GRAY}(+{info_count} info){RESET}")
        else:
            print()

        for f in sorted(non_info, key=lambda x: severity_order.get(x.severity, 99)):
            sev = f.severity
            icon = "[!]" if sev in (Severity.HIGH, Severity.CRITICAL) else "[*]"
            conf_colors = {Confidence.CONFIRMED: GREEN, Confidence.FIRM: "\033[33m", Confidence.TENTATIVE: GRAY}
            conf_color = conf_colors.get(f.confidence, GRAY)
            conf_tag = f" {conf_color}({f.confidence.value}){RESET}"
            print(f"    {sev.color}{icon} [{sev.value:8s}]{RESET} {f.title}{conf_tag}")
            print(f"      {GRAY}{f.description}{RESET}")
            if f.evidence:
                for eline in f.evidence.split("\n")[:3]:
                    print(f"      {GRAY}  | {eline[:120]}{RESET}")
            if f.recommendation:
                print(f"      {GREEN}-> {f.recommendation}{RESET}")

    summary = result.summary
    print(f"\n{'-' * 74}")

    score, grade_letter = _calc_score(summary, result.findings)

    if score >= 70:
        grade = f"{GREEN}{grade_letter}{RESET}"
    elif score >= 50:
        grade = f"{YELLOW}{grade_letter}{RESET}"
    else:
        grade = f"{RED}{grade_letter}{RESET}"

    print(f"  Security score: {grade} ({score}/100)")
    parts = []
    for sev in Severity:
        count = summary.get(sev.value, 0)
        if count:
            parts.append(f"{sev.color}{sev.value}: {count}{RESET}")
    print(f"  Total: {' | '.join(parts)}")

    text_summary = generate_summary(result)
    print(f"\n{'-' * 74}")
    print(f"  {BOLD}Summary:{RESET}")
    for line in text_summary.split("\n"):
        print(f"    {line}")

    if result.errors:
        print(f"\n  {RED}Scan errors ({len(result.errors)}):{RESET}")
        for e in result.errors[:10]:
            print(f"    - {e}")

    print(f"{'=' * 74}\n")


def save_json(results: list, filename: str):
    """Save results to JSON."""
    data = []
    for r in results:
        summary = r.summary
        score, _ = _calc_score(summary, r.findings)

        data.append({
            "url": r.url,
            "timestamp": r.timestamp,
            "duration": round(r.duration, 2),
            "score": score,
            "summary": r.summary,
            "text_summary": generate_summary(r),
            "findings": [
                {
                    "category": f.category,
                    "title": f.title,
                    "severity": f.severity.value,
                    "confidence": f.confidence.value,
                    "description": f.description,
                    "recommendation": f.recommendation,
                }
                for f in r.findings
            ],
            "errors": r.errors,
        })

    with open(filename, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    print(f"{GREEN}Report saved: {filename}{RESET}")


def save_html(results: list, filename: str):
    """Save report as HTML."""
    sev_colors = {
        "CRITICAL": "#dc3545", "HIGH": "#fd7e14",
        "MEDIUM": "#ffc107", "LOW": "#17a2b8", "INFO": "#6c757d"
    }

    html = ["""<!DOCTYPE html>
<html lang="ru">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Site Scanner Report</title>
<style>
  body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
         max-width: 960px; margin: 0 auto; padding: 20px; background: #0d1117; color: #c9d1d9; }
  h1 { color: #58a6ff; border-bottom: 1px solid #30363d; padding-bottom: 10px; }
  h2 { color: #c9d1d9; margin-top: 30px; }
  h3 { color: #8b949e; }
  .site { background: #161b22; border: 1px solid #30363d; border-radius: 8px;
          padding: 20px; margin: 20px 0; }
  .finding { padding: 8px 12px; margin: 4px 0; border-radius: 4px;
             border-left: 4px solid; background: #1c2128; }
  .badge { display: inline-block; padding: 2px 8px; border-radius: 12px;
           font-size: 12px; font-weight: 600; color: #fff; }
  .conf-badge { display: inline-block; padding: 1px 6px; border-radius: 8px;
                font-size: 10px; font-weight: 500; margin-left: 4px; border: 1px solid; }
  .conf-confirmed { color: #3fb950; border-color: #23603a; }
  .conf-firm { color: #d29922; border-color: #6e5600; }
  .conf-tentative { color: #8b949e; border-color: #484f58; }
  .score { font-size: 48px; font-weight: bold; text-align: center; }
  .rec { color: #3fb950; font-size: 13px; margin-top: 4px; }
  .meta { color: #8b949e; font-size: 14px; }
  .cat { margin: 15px 0; }
  table { width: 100%; border-collapse: collapse; }
  td { padding: 8px; border-bottom: 1px solid #30363d; }
</style>
</head>
<body>
<h1>Site Scanner Report</h1>
"""]

    for r in results:
        summary = r.summary
        score, _ = _calc_score(summary, r.findings)
        score_color = "#3fb950" if score >= 70 else "#ffc107" if score >= 50 else "#dc3545"

        html.append(f'<div class="site">')
        html.append(f'<h2>{r.url}</h2>')
        html.append(f'<p class="meta">{r.timestamp} | {r.duration:.1f}s | {len(r.findings)} findings</p>')
        html.append(f'<div class="score" style="color:{score_color}">{score}/100</div>')

        html.append('<p style="text-align:center">')
        for sev_name in ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"]:
            cnt = summary.get(sev_name, 0)
            if cnt:
                html.append(f'<span class="badge" style="background:{sev_colors[sev_name]}">'
                           f'{sev_name}: {cnt}</span> ')
        html.append('</p>')

        text_summary = generate_summary(r)
        html.append('<div style="background:#1c2128;border:1px solid #30363d;border-radius:6px;'
                     'padding:16px;margin:16px 0">')
        html.append('<h3 style="margin-top:0;color:#58a6ff">Conclusion</h3>')
        for line in text_summary.split("\n"):
            html.append(f'<p style="margin:4px 0;line-height:1.5">{line}</p>')
        html.append('</div>')

        by_cat = defaultdict(list)
        for f in r.findings:
            by_cat[f.category].append(f)

        for cat, findings in by_cat.items():
            non_info = [f for f in findings if f.severity.value != "INFO"]
            if not non_info:
                continue
            html.append(f'<div class="cat"><h3>{cat}</h3>')
            for f in sorted(non_info, key=lambda x: list(Severity).index(x.severity)):
                color = sev_colors[f.severity.value]
                conf_cls = f"conf-{f.confidence.value.lower()}"
                conf_label = f.confidence.value
                html.append(f'<div class="finding" style="border-left-color:{color}">')
                html.append(f'<span class="badge" style="background:{color}">{f.severity.value}</span>'
                           f'<span class="conf-badge {conf_cls}">{conf_label}</span> '
                           f'<strong>{f.title}</strong>')
                html.append(f'<br><small>{f.description}</small>')
                if f.recommendation:
                    html.append(f'<div class="rec">-> {f.recommendation}</div>')
                html.append('</div>')
            html.append('</div>')

        html.append('</div>')

    html.append(f'<p class="meta" style="text-align:center">Generated by Site Scanner v3.0</p>')
    html.append('</body></html>')

    with open(filename, "w", encoding="utf-8") as f:
        f.write("\n".join(html))
    print(f"{GREEN}HTML report saved: {filename}{RESET}")




def _result_to_dict(r: ScanResult) -> dict:
    """Convert ScanResult to dict for JSON/agent."""
    summary = r.summary
    score, _ = _calc_score(summary, r.findings)
    return {
        "url": r.url,
        "timestamp": r.timestamp,
        "duration": round(r.duration, 2),
        "score": score,
        "summary": r.summary,
        "text_summary": generate_summary(r),
        "findings": [
            {
                "category": f.category,
                "title": f.title,
                "severity": f.severity.value,
                "confidence": f.confidence.value,
                "description": f.description,
                "recommendation": f.recommendation,
                **({"evidence": f.evidence} if f.evidence else {}),
            }
            for f in sorted(r.findings,
                           key=lambda f: {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2,
                                          "LOW": 3, "INFO": 4}.get(f.severity.value, 5))
        ],
        "errors": r.errors,
    }




class _AgentReporter:
    """Report progress and results to scan.qpus.su."""

    def __init__(self, server: str, scan_id: str, token: str):
        import requests as _req
        self._req = _req
        self.server = server.rstrip("/")
        self.scan_id = scan_id
        self.token = token
        self.headers = {"Authorization": f"Bearer {token}"}

    def progress(self, check_name: str, index: int, total: int, status: str):
        pct = int(index / total * 100) if total > 0 else 0
        try:
            self._req.post(
                f"{self.server}/api/scan/{self.scan_id}/progress",
                json={"check_name": check_name, "index": index, "total": total, "pct": pct, "status": status},
                headers=self.headers, timeout=5,
            )
        except Exception:
            pass

    def result(self, data: dict):
        try:
            resp = self._req.post(
                f"{self.server}/api/scan/{self.scan_id}/result",
                json=data, headers=self.headers, timeout=30,
            )
            if resp.status_code == 200:
                print(f"\n  Result sent to server.")
                print(f"  View: {self.server}?id={self.scan_id}")
            else:
                print(f"\n  Send error: {resp.status_code} {resp.text[:200]}")
        except Exception as e:
            print(f"\n  Failed to send result: {e}")

    def error(self, msg: str):
        try:
            self._req.post(
                f"{self.server}/api/scan/{self.scan_id}/error",
                json={"error": msg}, headers=self.headers, timeout=5,
            )
        except Exception:
            pass


def _run_agent(args):
    """Agent mode: scan and report results to server."""
    reporter = _AgentReporter(args.server, args.scan_id, args.token)

    print(f"  Site Scanner Agent")
    print(f"  Target: {args.targets[0]}")
    print(f"  Server: {args.server}")
    print(f"  Mode: {'full' if args.full else 'standard'}")
    print()

    try:
        result = scan_site(args.targets[0], full=args.full, progress_callback=reporter.progress)
        data = _result_to_dict(result)
        reporter.result(data)

        s, g = _calc_score(result.summary, result.findings)
        print(f"\n  Score: {g} ({s}/100)")
        print(f"  Duration: {result.duration:.1f}s")

    except KeyboardInterrupt:
        print("\n  Interrupted")
        reporter.error("Interrupted by user")
        sys.exit(1)
    except Exception as e:
        print(f"\n  Error: {e}")
        reporter.error(str(e))
        sys.exit(1)


def main():
    parser = argparse.ArgumentParser(
        description="Site Scanner v3.0 — website vulnerability scanner",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python scanner.py https://example.com
  python scanner.py https://site1.com https://site2.com
  python scanner.py urls.txt --full --json report.json --html report.html
  python scanner.py qpus.su --full

Agent mode (for scan.qpus.su):
  python scanner.py https://example.com --token ABC --scan-id XYZ

Standard mode (59 checks):
  SSL/TLS, HTTP/reporting headers, CSP bypass, Cookies (+scope),
  Clickjacking, CORS, HTTP methods/WebDAV, ~120 sensitive paths,
  backup files, Git exposure, source code leak, debug endpoints,
  directory listing, robots.txt, info disclosure, error handling,
  Content-Type, XSS (reflected+DOM), SQLi, NoSQLi, SSTI,
  Command Injection, LFI/RFI, Path Traversal, CRLF injection,
  Host Header injection, Open Redirect, XXE, SSRF,
  Prototype Pollution, HPP, Request Smuggling, JSONP,
  HTML analysis, secrets in JS (25+ patterns), JWT, deserialization,
  cache poisoning, IP spoofing, CSP bypass vectors,
  WAF, CMS, technologies (50+), GraphQL, OpenAPI/Swagger, WSDL,
  WebSocket, API auth, user/timing enumeration, 403 bypass,
  verb tampering, session fixation, IDOR, mass assignment,
  cloud storage, BREACH, SNI mismatch, security.txt.

Full mode (--full, +11 checks):
  Ports (60+), banner grabbing, DNS (SPF, DMARC, CAA, DNSSEC, MX, NS,
  zone transfer), email security (MTA-STS, BIMI), wildcard DNS,
  subdomains (60+), subdomain takeover, WP plugins (40+),
  password policy, rate limiting, HTTP/2.
        """
    )
    parser.add_argument("targets", nargs="+",
        help="Site URLs or file with URL list (one per line)")
    parser.add_argument("--full", action="store_true",
        help="Full scan (ports, DNS, subdomains, rate limiting)")
    parser.add_argument("--json", metavar="FILE",
        help="Save report as JSON")
    parser.add_argument("--html", metavar="FILE",
        help="Save report as HTML")
    parser.add_argument("--token",
        help="Agent token (for scan.qpus.su mode)")
    parser.add_argument("--scan-id",
        help="Scan ID (for scan.qpus.su mode)")
    parser.add_argument("--server", default="https://scan.qpus.su",
        help="Server URL (for agent mode)")

    args = parser.parse_args()

    # Agent mode — if --token and --scan-id are provided
    if args.token and args.scan_id:
        _run_agent(args)
        return

    urls = []
    for target in args.targets:
        path = Path(target)
        if path.is_file():
            urls.extend(
                line.strip() for line in path.read_text().splitlines()
                if line.strip() and not line.startswith("#")
            )
        else:
            urls.append(target)

    if not urls:
        print("No URLs to scan.")
        sys.exit(1)

    print(f"\n{BOLD}Site Scanner v3.0{RESET}")
    total_std, total_full = 59, 70
    print(f"Targets: {len(urls)} site(s)  |  Mode: {'full (' + str(total_full) + ' checks)' if args.full else 'standard (' + str(total_std) + ' checks)'}")
    if not HAS_DNS and args.full:
        print(f"{YELLOW}  [!] dnspython not installed -- DNS checks will be limited (pip install dnspython){RESET}")
    print()

    results = []
    for i, u in enumerate(urls, 1):
        print(f"{BOLD}[{i}/{len(urls)}] Scanning: {u}{RESET}")
        r = scan_site(u, full=args.full)
        results.append(r)
        print_report(r)

    if args.json:
        save_json(results, args.json)
    if args.html:
        save_html(results, args.html)

    if len(results) > 1:
        print(f"\n{BOLD}{'=' * 74}{RESET}")
        print(f"{BOLD}  Summary for {len(results)} sites:{RESET}")
        total = defaultdict(int)
        for r in results:
            for sev, cnt in r.summary.items():
                total[sev] += cnt
        parts = []
        for sev in Severity:
            count = total.get(sev.value, 0)
            if count:
                parts.append(f"{sev.color}{sev.value}: {count}{RESET}")
        print(f"  {' | '.join(parts)}")

        # Per-site summary
        print(f"\n  {BOLD}Per site:{RESET}")
        for r in results:
            s, g = _calc_score(r.summary, r.findings)
            color = GREEN if s >= 80 else YELLOW if s >= 60 else RED
            crit_high = r.summary.get("CRITICAL", 0) + r.summary.get("HIGH", 0)
            note = f"  {RED}(critical/high: {crit_high}){RESET}" if crit_high else ""
            print(f"    {color}{g}{RESET} ({s}/100) — {r.url}{note}")

        print(f"{'=' * 74}\n")


if __name__ == "__main__":
    main()
