import os import re import requests from bs4 import BeautifulSoup def extract_thread_number(url): match = re.search(r'/thread/(\d+)', url) if match: return match.group(1) else: return None def format_comments(comments): formatted_comments = [] for comment in comments: # Separate URLs from the rest of the text formatted_comment = re.sub(r'(https?://\S+)', r'\n\1', comment) # Separate comment number from the sentence formatted_comment = re.sub(r'^(>>\d+)(.*)', r'\1\n\2', formatted_comment) formatted_comments.append(formatted_comment.strip()) return "\n".join(formatted_comments) def get_4chan_text(url): try: response = requests.get(url) if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') post_texts = soup.find_all('blockquote', class_='postMessage') # Extract comments from each post comments = [post_text.get_text().strip() for post_text in post_texts] formatted_comments = format_comments(comments) return formatted_comments else: print("Failed to fetch the URL. Status code:", response.status_code) return None except Exception as e: print("An error occurred:", e) return None def save_to_file(text, filepath): try: with open(filepath, 'w', encoding='utf-8') as file: file.write(text) print("Text saved to:", filepath) except Exception as e: print("Failed to save text to file:", e) if __name__ == "__main__": while True: url = input("Enter the 4chan URL (or 'exit' to quit): ") if url.lower() == 'exit': break thread_number = extract_thread_number(url) if thread_number: text = get_4chan_text(url) if text: save_location = os.path.join("C:\\Users\\p2\\Desktop\\4read", thread_number + ".txt") save_to_file(text, save_location) else: print("Invalid 4chan URL. Please make sure it contains a thread number.")