#!/usr/bin/env python3 """ Cosmo Room Media Scraper Scrapes media from the Cosmo app for tripleS and artms members. """ import argparse import os import requests from pathlib import Path from urllib.parse import urlparse # ============== USER CONFIGURATION ============== # Hardcode your default download directory here (or leave as None for current directory) DEFAULT_DIRECTORY = None # Hardcode your access token here (or use --token or environment variable) COSMO_ACCESS_TOKEN = "" # ================================================ # Member mapping: official_group_number -> (api_id, member_name) MEMBER_MAP = { "tripleS": { 1: (1, "SeoYeon"), 2: (2, "HyeRin"), 3: (3, "JiWoo"), 4: (4, "ChaeYeon"), 5: (5, "YooYeon"), 6: (6, "SooMin"), 7: (7, "NaKyoung"), 8: (8, "YuBin"), 9: (9, "Kaede"), 10: (10, "DaHyun"), 11: (11, "Kotone"), 12: (12, "YeonJi"), 13: (13, "Nien"), 14: (14, "SoHyun"), 15: (19, "Xinyu"), 16: (21, "Mayu"), 17: (22, "Lynn"), 18: (23, "JooBin"), 19: (24, "HaYeon"), 20: (25, "ShiOn"), 21: (26, "ChaeWon"), 22: (27, "Sullin"), 23: (28, "SeoAh"), 24: (29, "JiYeon"), }, "artms": { 1: (15, "HeeJin"), 2: (16, "KimLip"), 3: (17, "JinSoul"), 4: (18, "Choerry"), 5: (20, "HaSeul"), }, } API_BASE_URL = "https://api.cosmo.fans/bff/v3/room-posts" def get_member_info(group: str, member_input: str) -> tuple[int, str] | None: """ Resolve member input (name or official number) to (api_id, member_name). Returns None if not found. """ member_input_lower = member_input.lower().strip() # Try parsing as official number first try: official_num = int(member_input) if official_num in MEMBER_MAP[group]: return MEMBER_MAP[group][official_num] except ValueError: pass # Try matching by name for official_num, (api_id, name) in MEMBER_MAP[group].items(): if name.lower() == member_input_lower: return (api_id, name) return None def get_all_members(group: str) -> list[tuple[int, str]]: """Get all members for a group as list of (api_id, member_name).""" return list(MEMBER_MAP[group].values()) def fetch_posts(token: str, artist_id: str, artist_member_id: int, skip: int = 0, take: int = 10) -> dict: """Fetch posts from the Cosmo API.""" headers = {"Authorization": f"Bearer {token}"} params = { "take": take, "kind": "post", "artistMemberId": artist_member_id, "skip": skip, "artistId": artist_id, } response = requests.get(API_BASE_URL, headers=headers, params=params) response.raise_for_status() return response.json() def get_basename(url: str) -> str: """Extract the filename (basename) from a URL.""" parsed = urlparse(url) return os.path.basename(parsed.path) def download_file(url: str, dest_path: Path) -> bool: """ Download a file from URL to dest_path. Returns True if successful, False otherwise. """ try: response = requests.get(url, stream=True) response.raise_for_status() with open(dest_path, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) return True except requests.RequestException as e: print(f" Error downloading {url}: {e}") return False def scrape_member(token: str, artist_id: str, api_member_id: int, member_name: str, base_dir: Path) -> tuple[int, int]: """ Scrape all media for a single member. Returns (files_downloaded, files_skipped). """ member_dir = base_dir / member_name member_dir.mkdir(parents=True, exist_ok=True) # Get existing files in member directory existing_files = {f.name for f in member_dir.iterdir() if f.is_file()} files_downloaded = 0 files_skipped = 0 skip = 0 print(f"\n[{member_name}] Starting scrape...") while True: data = fetch_posts(token, artist_id, api_member_id, skip=skip) posts = data.get("posts", []) if not posts: print(f" Reached end of posts.") break for post in posts: media_list = post.get("media", []) for media in media_list: url = media.get("url") if not url: continue filename = get_basename(url) if filename in existing_files: print(f" Skipping existing: {filename}") files_skipped += 1 continue dest_path = member_dir / filename print(f" Downloading: {filename}") if download_file(url, dest_path): files_downloaded += 1 existing_files.add(filename) skip += 10 return files_downloaded, files_skipped def main(): parser = argparse.ArgumentParser( description="Scrape media from the Cosmo app for tripleS and artms members." ) parser.add_argument( "group", choices=["tripleS", "artms"], help="Group name: tripleS or artms", ) parser.add_argument( "-d", "--directory", help="Master directory to download images to", ) parser.add_argument( "-m", "--members", help="Comma-separated list of member numbers or names (e.g., '8,19,chaewon')", ) parser.add_argument( "-t", "--token", help="Cosmo access token", ) args = parser.parse_args() artist_id = args.group # Determine token token = args.token or COSMO_ACCESS_TOKEN or os.environ.get("COSMO_ACCESS_TOKEN") if not token: print("Error: Access token required. Use --token, COSMO_ACCESS_TOKEN env var, or set in script.") return 1 # Determine directory directory = Path(args.directory) if args.directory else Path(DEFAULT_DIRECTORY) if DEFAULT_DIRECTORY else Path.cwd() # Resolve members to scrape members_to_scrape = [] if args.members: for member_input in args.members.split(","): member_input = member_input.strip() if not member_input: continue member_info = get_member_info(artist_id, member_input) if member_info: members_to_scrape.append(member_info) else: print(f"Warning: Unknown member '{member_input}' for group {args.group}") else: members_to_scrape = get_all_members(artist_id) if not members_to_scrape: print("Error: No valid members to scrape.") return 1 print(f"Group: {artist_id}") print(f"Directory: {directory}") print(f"Members to scrape: {', '.join(name for _, name in members_to_scrape)}") total_downloaded = 0 total_skipped = 0 for api_member_id, member_name in members_to_scrape: downloaded, skipped = scrape_member(token, artist_id, api_member_id, member_name, directory) total_downloaded += downloaded total_skipped += skipped print(f"\n=== Summary ===") print(f"Files downloaded: {total_downloaded}") print(f"Files skipped: {total_skipped}") return 0 if __name__ == "__main__": exit(main())