import httpx
import os
import json
import re
import asyncio
import patcher
import subprocess
import argparse
import shutil
from json import JSONDecodeError

TEMP_DIR = "temp/"
DEST_DIR = "watermarked/"
HEADERS = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0"}
COOKIES = {
    "XSRF-TOKEN": "",
    "doujinio_powered_by_j18_session":""
}
PAGE_NAME_RE = re.compile("Page ([0-9]+)")

def clear_directory(dir_path):
    if not os.path.exists(dir_path):
        print(f"Directory {dir_path} does not exist.")
        return
    
    for filename in os.listdir(dir_path):
        file_path = os.path.join(dir_path, filename)
        
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.remove(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
            print(f"Deleted: {file_path}")
        except Exception as e:
            print(f"Error deleting {file_path}: {e}")

def safe_mkdir(directory: str):
    os.makedirs(directory, exist_ok=True)

def extract_oid(url: str) -> str | None:
    match = re.search(r"manga/(\d+)/", url)
    return match.group(1) if match else None

def fetch_meta_info(oid: str, output_dir: str):
    if not os.path.isfile("doujinio.json"):
        response = httpx.post(
            "https://doujin.io/api/mangas/newest", 
            headers=HEADERS, 
            cookies=COOKIES, 
            json={"limit": 1000, "offset": 0}
        )
        with open("doujinio.json", "w") as f:
            f.write(response.text)

    clear_directory(TEMP_DIR)
    # clear_directory(output_dir)

    safe_mkdir(TEMP_DIR)
    safe_mkdir(f"{TEMP_DIR}meta")
    safe_mkdir(f"{TEMP_DIR}{DEST_DIR}")
    safe_mkdir(output_dir)
    safe_mkdir(f"{TEMP_DIR}meta/{oid}")
    
    response = httpx.get(f"https://doujin.io/api/chapters?manga_id={oid}", headers=HEADERS)
    if response.status_code == 200:
        with open(f"{TEMP_DIR}meta/{oid}/chapters.json", "w") as f:
            f.write(response.text)
        print(f"Got chapter info for {oid}")


def fetch_chapter_meta():
    meta_files = os.listdir(f"{TEMP_DIR}meta")

    for i, oid in enumerate(meta_files):
        with open(f"{TEMP_DIR}meta/{oid}/chapters.json") as f:
            chapters = json.load(f)["data"]

        for chapter in chapters:
            coid = chapter["optimus_id"]
            base_url = f"https://doujin.io/api/mangas/{oid}/{coid}/"
            for file_type in ("manifest", "chm"):
                file_name = f"{TEMP_DIR}meta/{oid}/{coid}.{file_type}.json"
                if not os.path.isfile(file_name):
                    headers = {
                        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0",
                        "Referer": f"https://doujin.io/manga/{oid}/chapter/{coid}"
                    }
                    response = httpx.get(base_url + file_type, headers=headers, cookies=COOKIES, follow_redirects=True)
                    try:
                        response.json()
                    except JSONDecodeError:
                        print('Error decoding JSON. Your cookies are likely wrong or unset.')
                        raise
                    if response.status_code != 200:
                        print(f"Failed to fetch {file_type} for {oid}/{coid}: {response.status_code}")
                    with open(file_name, "w") as f:
                        f.write(response.text)

        print(f"({i + 1}/{len(meta_files)}) {oid}")


def fetch_images():
    meta_files = os.listdir(f"{TEMP_DIR}meta")
    
    for i, oid in enumerate(meta_files):
        for chapter_file in os.listdir(f"{TEMP_DIR}meta/{oid}"):
            if chapter_file.endswith(".manifest.json"):
                with open(f"{TEMP_DIR}meta/{oid}/{chapter_file}") as f:
                    chapter_data = json.load(f)

                images = []
                last_page_num = -1

                for image in chapter_data["readingOrder"]:
                    if "/lastchapter/" in image["href"] or "/lastpage/" in image["href"]:
                        continue
                    
                    formats = image["alternate"]
                    formats.append({"href": image["href"], "width": image["width"], "height": image["height"], "type": image["type"]})
                    
                    best_format = {}
                    for form in formats:
                        if form["type"] not in ("image/jpeg", "image/webp"):
                            print(f"Unsupported image format {form['type']}. Exiting.")
                            exit()

                        if form["type"] not in best_format:
                            best_format[form["type"]] = form
                        elif form["width"] > best_format[form["type"]]["width"] and form["height"] > best_format[form["type"]]["height"]:
                            best_format[form["type"]] = form

                    if len(best_format.keys()) != 2:
                        print("Error: Expected two image formats.")
                        exit()

                    best_format_key = "image/jpeg" if best_format["image/jpeg"]["width"] > best_format["image/webp"]["width"] else "image/webp"
                    image_url = best_format[best_format_key]["href"]
                    page_num = int(PAGE_NAME_RE.fullmatch(image["title"]).group(1))

                    if page_num - 1 != last_page_num:
                        print("Page number mismatch detected.")
                        exit()

                    last_page_num = page_num
                    images.append(image_url)

                with open(f"{TEMP_DIR}meta/{oid}/{chapter_file.split('.')[0]}.images.json", "w") as f:
                    json.dump(images, f)


async def download_cover(oid: str, output_path: str):
    response = httpx.post(
        "https://doujin.io/api/pricesys/mangaads", 
        data={'manga_id': oid}, 
        headers=HEADERS
    )
    safe_mkdir(output_path)
    ad_data = response.json()['data']

    all_images = [
        file
        for ad in ad_data
        for ad_type in ad["types"]
        for file in ad_type["files"]
    ]

    async with httpx.AsyncClient() as client:
        for index, image in enumerate(all_images, 1):
            url = image["url"]
            ext = url.split('.')[-1]

            resp = await client.get(url)
            print(f"Downloading cover-{index}: {output_path}/cover-{index}.{ext}")

            with open(f"{output_path}/cover-{index}.{ext}", "wb") as f:
                f.write(resp.content)


async def download_images_for_chapter(oid: str, names: dict, count: int, total: int, output_dir: str):
    with open(f"{TEMP_DIR}meta/{oid}/chapters.json") as f:
        chapters = json.load(f)["data"]

    title = names[int(oid)]
    is_oneshot = len(chapters) == 1 and (chapters[0]["chapter_name"] == "Chapter 1" or chapters[0]["chapter_name"] == title)

    for chapter in chapters:
        coid = chapter["optimus_id"]
        chapter_path = f"{TEMP_DIR}{DEST_DIR}{title}"
        safe_mkdir(chapter_path)
        if not is_oneshot:
            chapter_path += f"/{chapter['chapter_name']}"
            safe_mkdir(chapter_path)
        
        print(f"Downloading images for {chapter['chapter_name']}")


        output_chapter_path = os.path.join(output_dir, title)

        await download_cover(oid, output_chapter_path)

        with open(f"{TEMP_DIR}meta/{oid}/{coid}.images.json") as f:
            images = json.load(f)

        for i, image_url in enumerate(images):
            ext = os.path.splitext(image_url)[1]
            filename = f"{str(i).zfill(3)}{ext}"
            fullpath = os.path.join(chapter_path, filename)
            if not os.path.isfile(fullpath):
                resp = None
                while resp is None:
                    try:
                        async with httpx.AsyncClient() as client:
                            resp = await client.get(image_url)
                    except httpx.TimeoutException:
                        print(f"Timeout for {oid} {fullpath} {image_url}. Retrying...")
                        await asyncio.sleep(2)

                if resp.status_code == 200:
                    with open(fullpath, "wb") as f:
                        f.write(resp.content)
                else:
                    print(f"Failed to download image {image_url} for {oid}.")


async def download_all_images(output_dir: str):
    semaphore = asyncio.Semaphore(20)
    tasks = []

    async def run_with_limit(oid, names, count, total):
        async with semaphore:
            await download_images_for_chapter(oid, names, count, total, output_dir)

    with open("doujinio.json") as f:
        dio = json.load(f)["data"]
    
    names = {d["optimus_id"]: d["title"] for d in dio}

    meta_files = os.listdir(f"{TEMP_DIR}meta")
    for i, oid in enumerate(meta_files):
        tasks.append(asyncio.create_task(run_with_limit(oid, names, i, len(meta_files))))

    await asyncio.gather(*tasks)


def patch_images(output_dir: str):
    with open("doujinio.json") as f:
        dio = json.load(f)["data"]

    names = {d["optimus_id"]: d["title"] for d in dio}

    meta_files = os.listdir(f"{TEMP_DIR}meta")
    for j, oid in enumerate(meta_files):
        with open(f"{TEMP_DIR}meta/{oid}/chapters.json") as f:
            chapters = json.load(f)["data"]

        title = names[int(oid)]
        is_oneshot = len(chapters) == 1 and (chapters[0]["chapter_name"] == "Chapter 1" or chapters[0]["chapter_name"] == title)

        print(f"Removing watermarks for {title}...")

        for chapter in chapters:
            coid = chapter["optimus_id"]
            chapter_path = title
            safe_mkdir(os.path.join(output_dir, chapter_path))
            if not is_oneshot:
                chapter_path += f"/{chapter['chapter_name']}"
                safe_mkdir(os.path.join(output_dir, chapter_path))

            with open(f"{TEMP_DIR}meta/{oid}/{coid}.images.json") as f:
                images = json.load(f)

            for i, image_url in enumerate(images):
                ext = os.path.splitext(image_url)[1]
                filename = f"{str(i).zfill(3)}{ext}"
                fullpath = os.path.join(TEMP_DIR, DEST_DIR, chapter_path, filename)
                fullpath2 = os.path.join(output_dir, chapter_path, filename)
                if os.path.isfile(fullpath) and not os.path.isfile(fullpath2):
                    with open(f"{TEMP_DIR}meta/{oid}/{coid}.chm.json") as f:
                        chmkeys = json.load(f)["chmkeys"]

                    with open(fullpath, "rb") as f:
                        image = f.read()

                    # print(f"Processing {i+1}")
                    processed_image = patcher.process_image(image, chmkeys)
                    with open(fullpath2, "wb") as f:
                        f.write(processed_image)

        # print(f"Watermark removal complete for {title}")


def main():
    parser = argparse.ArgumentParser(description="doujin.io downloader.")
    parser.add_argument("url", help="The URL of the doujin to download.")
    parser.add_argument("output_dir", help="Directory to output the images, cover, and info JSON files.")
    args = parser.parse_args()

    oid = extract_oid(args.url)
    fetch_meta_info(oid, args.output_dir)
    fetch_chapter_meta()
    fetch_images()
    asyncio.run(download_all_images(args.output_dir))
    patch_images(args.output_dir)


if __name__ == "__main__":
    main()
