import os
import re
import requests
from bs4 import BeautifulSoup

def extract_thread_number(url):
    match = re.search(r'/thread/(\d+)', url)
    if match:
        return match.group(1)
    else:
        return None

def format_comments(comments):
    formatted_comments = []
    for comment in comments:
        # Separate URLs from the rest of the text
        formatted_comment = re.sub(r'(https?://\S+)', r'\n\1', comment)
        # Separate comment number from the sentence
        formatted_comment = re.sub(r'^(>>\d+)(.*)', r'\1\n\2', formatted_comment)
        formatted_comments.append(formatted_comment.strip())
    return "\n".join(formatted_comments)

def get_4chan_text(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            post_texts = soup.find_all('blockquote', class_='postMessage')
            # Extract comments from each post
            comments = [post_text.get_text().strip() for post_text in post_texts]
            formatted_comments = format_comments(comments)
            return formatted_comments
        else:
            print("Failed to fetch the URL. Status code:", response.status_code)
            return None
    except Exception as e:
        print("An error occurred:", e)
        return None

def save_to_file(text, filepath):
    try:
        with open(filepath, 'w', encoding='utf-8') as file:
            file.write(text)
        print("Text saved to:", filepath)
    except Exception as e:
        print("Failed to save text to file:", e)

if __name__ == "__main__":
    while True:
        url = input("Enter the 4chan URL (or 'exit' to quit): ")
        if url.lower() == 'exit':
            break
        
        thread_number = extract_thread_number(url)
        if thread_number:
            text = get_4chan_text(url)
            if text:
                save_location = os.path.join("C:\\Users\\p2\\Desktop\\4read", thread_number + ".txt")
                save_to_file(text, save_location)
        else:
            print("Invalid 4chan URL. Please make sure it contains a thread number.")