#!/usr/bin/env python3
"""
mkmd5.py
Recursively find platform zip files and generate MD5 checksums for their contents.
Reads zip contents directly without extracting to disk.

Usage: python3 mkmd5.py [directory] [--dry-run]
       If no directory is specified, current directory is used.
"""

import sys
import hashlib
import zipfile
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Lock

PLATFORMS = ["Linux", "Mac", "Windows"]
CHUNK_SIZE = 1024 * 1024  # 1MB chunks

print_lock = Lock()


def safe_print(msg: str, error: bool = False):
    with print_lock:
        print(msg, file=sys.stderr if error else sys.stdout)


def get_md5_streaming(zf: zipfile.ZipFile, filename: str) -> str:
    md5 = hashlib.md5()
    with zf.open(filename) as f:
        while chunk := f.read(CHUNK_SIZE):
            md5.update(chunk)
    return md5.hexdigest()


def generate_checksums(zip_path: Path, platform: str, dry_run: bool) -> bool:
    checksum_file = zip_path.parent / f"checksum-{platform}.txt"
    tmp_file = checksum_file.with_suffix(".txt.tmp")

    try:
        with zipfile.ZipFile(zip_path, "r") as zf:
            bad_file = zf.testzip()
            if bad_file:
                safe_print(f"Error: '{zip_path}' is corrupted at '{bad_file}'", error=True)
                return False

            files = sorted(
                (i for i in zf.infolist() if not i.is_dir()),
                key=lambda x: x.filename
            )

            entries = []
            for info in files:
                checksum = get_md5_streaming(zf, info.filename)
                entries.append(f"{checksum}  {info.filename}")

            if dry_run:
                safe_print(f"Would create: {checksum_file} ({len(entries)} files)")
                return True

            tmp_file.write_text("\n".join(entries) + "\n")
            tmp_file.rename(checksum_file)
            safe_print(f"Created: {checksum_file}")
            return True

    except zipfile.BadZipFile:
        safe_print(f"Error: '{zip_path}' is not a valid zip file", error=True)
        return False
    except Exception as e:
        safe_print(f"Error processing '{zip_path}': {e}", error=True)
        if tmp_file.exists():
            tmp_file.unlink()
        return False


def create_checksum_symlink(zip_path: Path, platform: str, target_platform: str, dry_run: bool) -> bool:
    checksum_link = zip_path.parent / f"checksum-{platform}.txt"
    checksum_target = Path(f"checksum-{target_platform}.txt")

    try:
        if dry_run:
            safe_print(f"Would link: {checksum_link} -> {checksum_target}")
            return True

        if checksum_link.is_symlink() or checksum_link.exists():
            checksum_link.unlink()
        checksum_link.symlink_to(checksum_target)
        safe_print(f"Linked: {checksum_link} -> {checksum_target}")
        return True
    except Exception as e:
        safe_print(f"Error creating symlink '{checksum_link}': {e}", error=True)
        return False


def main():
    args = sys.argv[1:]
    dry_run = "--dry-run" in args
    args = [a for a in args if a != "--dry-run"]

    search_dir = Path(args[0]) if args else Path(".")

    if not search_dir.is_dir():
        safe_print(f"Error: Directory '{search_dir}' does not exist", error=True)
        sys.exit(1)

    if dry_run:
        safe_print("=== DRY RUN - no files will be modified ===\n")

    real_zips: list[tuple[Path, str]] = []
    symlink_zips: list[tuple[Path, str]] = []

    for platform in PLATFORMS:
        for zip_path in search_dir.rglob(f"{platform}.zip"):
            if zip_path.is_symlink():
                symlink_zips.append((zip_path, platform))
            else:
                real_zips.append((zip_path, platform))

    found = 0
    max_workers = min(8, len(real_zips)) or 1

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {
            executor.submit(generate_checksums, zp, pl, dry_run): (zp, pl)
            for zp, pl in real_zips
        }
        for future in as_completed(futures):
            if future.result():
                found += 1

    for zip_path, platform in symlink_zips:
        target = Path(zip_path.parent / zip_path.readlink())
        target_platform = target.stem
        if create_checksum_symlink(zip_path, platform, target_platform, dry_run):
            found += 1

    if found == 0:
        safe_print(f"No platform zip files found in '{search_dir}'")
        safe_print(f"Looking for: {', '.join(p + '.zip' for p in PLATFORMS)}")
    else:
        safe_print(f"Done. Processed {found} archive(s).")


if __name__ == "__main__":
    main()
