#!/usr/bin/env python3
"""
TheOldLLM Proxy - Combined Server
With proper streaming support (client stays open)
"""

import json
import uuid
import time
import logging
import traceback
from typing import Optional, List, Dict, AsyncGenerator
from pathlib import Path
from datetime import datetime

import httpx
from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse, JSONResponse, Response, HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
import uvicorn

# === Logging ===
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    datefmt='%H:%M:%S'
)
logger = logging.getLogger(__name__)

# === Configuration ===
UPSTREAM_ORIGIN = "https://theoldllm.vercel.app"
DEFAULT_CHAT_MODEL = "ent-gpt-4o"
TOKEN_FILE = Path("current_token.txt")
PORT = 8001

# === Global State ===
current_token: Optional[str] = None
token_updated_at: Optional[datetime] = None

# === Models (GPT and Claude only) ===
ALL_MODELS = [
    # GPT-5 Series
    {"id": "ent-gpt-5.2", "name": "GPT-5.2", "llmVersion": "gpt-5.2"},
    {"id": "ent-gpt-5.1", "name": "GPT-5.1", "llmVersion": "gpt-5.1"},
    {"id": "ent-gpt-5", "name": "GPT-5", "llmVersion": "gpt-5"},
    {"id": "ent-gpt-5-mini", "name": "GPT-5 Mini", "llmVersion": "gpt-5-mini"},
    {"id": "ent-gpt-5-nano", "name": "GPT-5 Nano", "llmVersion": "gpt-5-nano"},
    
    # O-Series
    {"id": "ent-o4-mini", "name": "O4 Mini", "llmVersion": "o4-mini"},
    {"id": "ent-o3", "name": "O3", "llmVersion": "o3"},
    {"id": "ent-o3-mini", "name": "O3 Mini", "llmVersion": "o3-mini"},
    {"id": "ent-o1", "name": "O1", "llmVersion": "o1"},
    {"id": "ent-o1-preview", "name": "O1 Preview", "llmVersion": "o1-preview"},
    {"id": "ent-o1-mini", "name": "O1 Mini", "llmVersion": "o1-mini"},
    
    # GPT-4 Series
    {"id": "ent-gpt-4.1", "name": "GPT-4.1", "llmVersion": "gpt-4.1"},
    {"id": "ent-gpt-4o", "name": "GPT-4o", "llmVersion": "gpt-4o"},
    {"id": "ent-gpt-4o-2024-08-06", "name": "GPT-4o (2024-08-06)", "llmVersion": "gpt-4o-2024-08-06"},
    {"id": "ent-gpt-4o-mini", "name": "GPT-4o Mini", "llmVersion": "gpt-4o-mini"},
    {"id": "ent-gpt-4-turbo", "name": "GPT-4 Turbo", "llmVersion": "gpt-4-turbo"},
    {"id": "ent-gpt-4-turbo-preview", "name": "GPT-4 Turbo Preview", "llmVersion": "gpt-4-turbo-preview"},
    {"id": "ent-gpt-4", "name": "GPT-4", "llmVersion": "gpt-4"},
    {"id": "ent-gpt-4-1106-preview", "name": "GPT-4 1106 Preview", "llmVersion": "gpt-4-1106-preview"},
    {"id": "ent-gpt-4-vision-preview", "name": "GPT-4 Vision Preview", "llmVersion": "gpt-4-vision-preview"},
    {"id": "ent-gpt-4-0613", "name": "GPT-4 (0613)", "llmVersion": "gpt-4-0613"},
    
    # GPT-3.5 Series
    {"id": "ent-gpt-3.5-turbo", "name": "GPT-3.5 Turbo", "llmVersion": "gpt-3.5-turbo"},
    {"id": "ent-gpt-3.5-turbo-0125", "name": "GPT-3.5 Turbo (0125)", "llmVersion": "gpt-3.5-turbo-0125"},
    {"id": "ent-gpt-3.5-turbo-16k", "name": "GPT-3.5 Turbo 16K", "llmVersion": "gpt-3.5-turbo-16k"},
    
    # Claude Opus 4.5
    {"id": "ent-claude-opus-4.5", "name": "Claude Opus 4.5", "llmVersion": "claude-opus-4-5"},
    {"id": "ent-claude-opus-4.5-20251101", "name": "Claude Opus 4.5 (20251101)", "llmVersion": "claude-opus-4-5-20251101"},
    
    # Claude Opus 4.1
    {"id": "ent-claude-opus-4.1", "name": "Claude Opus 4.1", "llmVersion": "claude-opus-4-1"},
    {"id": "ent-claude-opus-4.1-20250805", "name": "Claude Opus 4.1 (20250805)", "llmVersion": "claude-opus-4-1-20250805"},
    
    # Claude Opus 4
    {"id": "ent-claude-opus-4", "name": "Claude Opus 4", "llmVersion": "claude-opus-4-20250514"},
    {"id": "ent-claude-4-opus", "name": "Claude 4 Opus", "llmVersion": "claude-4-opus-20250514"},
    
    # Claude Sonnet 4.5
    {"id": "ent-claude-sonnet-4.5", "name": "Claude Sonnet 4.5", "llmVersion": "claude-sonnet-4-5"},
    {"id": "ent-claude-sonnet-4.5-20250929", "name": "Claude Sonnet 4.5 (20250929)", "llmVersion": "claude-sonnet-4-5-20250929"},
    
    # Claude Sonnet 4
    {"id": "ent-claude-sonnet-4", "name": "Claude Sonnet 4", "llmVersion": "claude-sonnet-4-20250514"},
    {"id": "ent-claude-4-sonnet", "name": "Claude 4 Sonnet", "llmVersion": "claude-4-sonnet-20250514"},
    
    # Claude 3.7 Sonnet
    {"id": "ent-claude-3.7-sonnet", "name": "Claude 3.7 Sonnet", "llmVersion": "claude-3-7-sonnet-latest"},
    {"id": "ent-claude-3.7-sonnet-20250219", "name": "Claude 3.7 Sonnet (20250219)", "llmVersion": "claude-3-7-sonnet-20250219"},
    
    # Claude 3.5 Sonnet
    {"id": "ent-claude-3.5-sonnet", "name": "Claude 3.5 Sonnet", "llmVersion": "claude-3-5-sonnet-latest"},
    {"id": "ent-claude-3.5-sonnet-20241022", "name": "Claude 3.5 Sonnet (20241022)", "llmVersion": "claude-3-5-sonnet-20241022"},
    {"id": "ent-claude-3.5-sonnet-20240620", "name": "Claude 3.5 Sonnet (20240620)", "llmVersion": "claude-3-5-sonnet-20240620"},
    
    # Claude Haiku 4.5
    {"id": "ent-claude-haiku-4.5", "name": "Claude Haiku 4.5", "llmVersion": "claude-haiku-4-5"},
    {"id": "ent-claude-haiku-4.5-20251001", "name": "Claude Haiku 4.5 (20251001)", "llmVersion": "claude-haiku-4-5-20251001"},
    
    # Claude 3.5 Haiku
    {"id": "ent-claude-3.5-haiku", "name": "Claude 3.5 Haiku", "llmVersion": "claude-3-5-haiku-latest"},
    {"id": "ent-claude-3.5-haiku-20241022", "name": "Claude 3.5 Haiku (20241022)", "llmVersion": "claude-3-5-haiku-20241022"},
    
    # Claude 3 Opus
    {"id": "ent-claude-3-opus", "name": "Claude 3 Opus", "llmVersion": "claude-3-opus-latest"},
    {"id": "ent-claude-3-opus-20240229", "name": "Claude 3 Opus (20240229)", "llmVersion": "claude-3-opus-20240229"},
    
    # Claude 3 Haiku
    {"id": "ent-claude-3-haiku", "name": "Claude 3 Haiku", "llmVersion": "claude-3-haiku-20240307"},
]


# === Token Management ===
def load_token_from_file() -> Optional[str]:
    """Load token from file."""
    global current_token, token_updated_at
    try:
        if TOKEN_FILE.exists():
            token = TOKEN_FILE.read_text().strip()
            if token:
                if not token.startswith("Bearer "):
                    token = f"Bearer {token}"
                current_token = token
                token_updated_at = datetime.fromtimestamp(TOKEN_FILE.stat().st_mtime)
                logger.info(f"📂 Loaded token: {token[:50]}...")
                return token
    except Exception as e:
        logger.error(f"❌ Token load error: {e}")
    return None


def save_token(token: str, source: str = "unknown") -> bool:
    """Save token to file and memory."""
    global current_token, token_updated_at
    try:
        token = token.strip()
        if not token:
            return False
        if not token.startswith("Bearer "):
            token = f"Bearer {token}"
        TOKEN_FILE.write_text(token)
        current_token = token
        token_updated_at = datetime.now()
        logger.info(f"✅ Token saved ({source}): {token[:60]}...")
        return True
    except Exception as e:
        logger.error(f"❌ Token save error: {e}")
        return False


def get_token() -> Optional[str]:
    """Get current token."""
    global current_token
    if current_token:
        return current_token
    return load_token_from_file()


def has_valid_token() -> bool:
    """Check if we have a token."""
    token = get_token()
    return token is not None and len(token) > 20


# === Headers ===
def get_headers(token: str) -> Dict[str, str]:
    """Generate browser-like headers."""
    return {
        "Host": "theoldllm.vercel.app",
        "connection": "keep-alive",
        "pragma": "no-cache",
        "cache-control": "no-cache",
        "sec-ch-ua": '"Chromium";v="137", "Not/A)Brand";v="24"',
        "sec-ch-ua-mobile": "?0",
        "sec-ch-ua-platform": '"Linux"',
        "dnt": "1",
        "upgrade-insecure-requests": "1",
        "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
        "accept": "*/*",
        "content-type": "application/json",
        "sec-fetch-site": "same-origin",
        "sec-fetch-mode": "cors",
        "sec-fetch-dest": "empty",
        "referer": "https://theoldllm.vercel.app/",
        "origin": "https://theoldllm.vercel.app",
        "accept-language": "en-US,en;q=0.9",
        "authorization": token,
        "priority": "u=1, i",
    }


# === Helpers ===
def convert_messages(messages: List[Dict]) -> str:
    """Convert OpenAI messages to prompt string."""
    if not messages:
        return ""
    parts = []
    for msg in messages:
        role = msg.get("role", "user").capitalize()
        content = msg.get("content", "")
        parts.append(f"{role}: {content}")
    return "\n\n".join(parts)


def get_model_config(model_id: str) -> Dict[str, str]:
    """Get model version and provider."""
    model = next((m for m in ALL_MODELS if m["id"] == model_id), None)
    version = model.get("llmVersion", model_id) if model else model_id
    provider = "Anthropic" if "claude" in version.lower() else "OpenAI"
    return {"version": version, "provider": provider}


# === FastAPI App ===
app = FastAPI(title="TheOldLLM Proxy", version="3.3.0")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


# === Ignore favicon requests ===
@app.get("/favicon.ico")
async def favicon():
    """Return empty response for favicon requests."""
    return Response(status_code=204)


@app.options("/{path:path}")
async def options_handler(path: str):
    return Response(status_code=200, headers={
        "Access-Control-Allow-Origin": "*",
        "Access-Control-Allow-Methods": "GET, POST, PUT, DELETE, OPTIONS",
        "Access-Control-Allow-Headers": "*",
    })


# === Dashboard ===
@app.get("/")
async def dashboard():
    """Dashboard with status."""
    token = get_token()
    has_token = token is not None and len(token) > 20
    token_preview = token[:60] + "..." if token else "None"
    token_age = ""
    if token_updated_at:
        age_seconds = (datetime.now() - token_updated_at).total_seconds()
        if age_seconds < 60:
            token_age = f"{int(age_seconds)}s ago"
        elif age_seconds < 3600:
            token_age = f"{int(age_seconds/60)}m ago"
        else:
            token_age = f"{int(age_seconds/3600)}h ago"
    
    status_class = "ok" if has_token else "error"
    status_text = "✅ Token Available" if has_token else "❌ No Token"
    
    return HTMLResponse(f"""
<!DOCTYPE html>
<html>
<head>
    <title>TheOldLLM Proxy - Port {PORT}</title>
    <meta charset="utf-8">
    <link rel="icon" href="data:,">
    <style>
        * {{ box-sizing: border-box; }}
        body {{ 
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
            max-width: 900px; margin: 0 auto; padding: 20px;
            background: #0d1117; color: #c9d1d9;
        }}
        h1 {{ color: #58a6ff; margin-bottom: 5px; }}
        .subtitle {{ color: #8b949e; margin-bottom: 30px; }}
        .card {{
            background: #161b22; border: 1px solid #30363d;
            border-radius: 8px; padding: 20px; margin: 15px 0;
        }}
        .card h3 {{ color: #58a6ff; margin-top: 0; }}
        .status {{ padding: 15px; border-radius: 6px; font-weight: 500; }}
        .status.ok {{ background: #238636; color: white; }}
        .status.error {{ background: #da3633; color: white; }}
        .token-preview {{
            font-family: monospace; font-size: 11px;
            background: #0d1117; padding: 12px; border-radius: 4px;
            word-break: break-all; margin-top: 10px; border: 1px solid #30363d;
        }}
        pre {{
            background: #0d1117; border: 1px solid #30363d;
            padding: 15px; border-radius: 6px; overflow-x: auto;
            font-size: 12px; color: #7ee787;
        }}
        button {{
            padding: 10px 20px; border: none; border-radius: 6px;
            cursor: pointer; font-weight: 500; margin-right: 10px; margin-top: 10px;
        }}
        .btn-primary {{ background: #238636; color: white; }}
        .btn-secondary {{ background: #30363d; color: #c9d1d9; }}
    </style>
</head>
<body>
    <h1>🔐 TheOldLLM Proxy</h1>
    <p class="subtitle">OpenAI-compatible proxy (GPT & Claude models)</p>
    
    <div class="card">
        <h3>Token Status</h3>
        <div class="status {status_class}">{status_text}</div>
        <div class="token-preview">
            <strong>Token:</strong> {token_preview}<br>
            <strong>Updated:</strong> {token_age or 'Never'}
        </div>
        <button class="btn-primary" onclick="window.open('https://theoldllm.vercel.app','_blank')">🌐 Open Target Site</button>
        <button class="btn-secondary" onclick="location.reload()">🔄 Refresh</button>
    </div>
    
    <div class="card">
        <h3>API Endpoint</h3>
        <pre>http://localhost:{PORT}/v1</pre>
    </div>
    
    <div class="card">
        <h3>Test (Streaming)</h3>
        <pre>curl http://localhost:{PORT}/v1/chat/completions \\
  -H "Content-Type: application/json" \\
  -d '{{
    "model": "ent-gpt-4o",
    "messages": [{{"role": "user", "content": "Count 1 to 10"}}],
    "stream": true
  }}'</pre>
    </div>
    
    <script>setTimeout(() => location.reload(), 15000);</script>
</body>
</html>
    """)


# === Token Endpoints ===
@app.post("/receive-token")
async def receive_token_endpoint(request: Request):
    """Receive token from TamperMonkey."""
    try:
        try:
            data = await request.json()
        except:
            body = await request.body()
            data = {"token": body.decode('utf-8')}
        
        token = data.get("token", "")
        source = data.get("source", "http")
        
        if not token:
            return JSONResponse(content={"error": "No token"}, status_code=400, headers={"Access-Control-Allow-Origin": "*"})
        
        if save_token(token, source):
            return JSONResponse(content={"status": "ok"}, headers={"Access-Control-Allow-Origin": "*"})
        return JSONResponse(content={"error": "Save failed"}, status_code=500, headers={"Access-Control-Allow-Origin": "*"})
    except Exception as e:
        return JSONResponse(content={"error": str(e)}, status_code=500, headers={"Access-Control-Allow-Origin": "*"})


@app.post("/internal/update-token")
async def update_token_endpoint(request: Request):
    return await receive_token_endpoint(request)


@app.get("/token-status")
async def token_status_endpoint():
    token = get_token()
    return JSONResponse(content={
        "has_token": has_valid_token(),
        "token_preview": token[:50] + "..." if token else None,
    }, headers={"Access-Control-Allow-Origin": "*"})


# === OpenAI Endpoints ===
@app.get("/v1/models")
async def list_models():
    return JSONResponse(content={
        "object": "list",
        "data": [{"id": m["id"], "object": "model", "created": int(time.time()), "owned_by": "theoldllm-proxy", "name": m["name"]} for m in ALL_MODELS]
    }, headers={"Access-Control-Allow-Origin": "*"})


@app.post("/v1/chat/completions")
async def chat_completions(request: Request):
    """Handle chat completions."""
    
    try:
        body = await request.json()
    except:
        return JSONResponse(
            content={"error": {"message": "Invalid JSON", "type": "invalid_request_error"}},
            status_code=400, headers={"Access-Control-Allow-Origin": "*"}
        )
    
    user_model = body.get("model", DEFAULT_CHAT_MODEL)
    is_stream = body.get("stream", False)
    messages = body.get("messages", [])
    
    logger.info(f"📨 Request: model={user_model}, stream={is_stream}, messages={len(messages)}")
    
    token = get_token()
    if not token:
        return JSONResponse(
            content={"error": {"message": "No token available. Visit target site first.", "type": "authentication_error"}},
            status_code=401, headers={"Access-Control-Allow-Origin": "*"}
        )
    
    config = get_model_config(user_model)
    actual_model = config["version"]
    provider = config["provider"]
    
    logger.info(f"   Model: {actual_model}, Provider: {provider}")
    
    if is_stream:
        return StreamingResponse(
            full_streaming_request(token, actual_model, provider, messages, user_model),
            media_type="text/event-stream",
            headers={
                "Access-Control-Allow-Origin": "*",
                "Cache-Control": "no-cache",
                "Connection": "keep-alive",
            }
        )
    else:
        return await non_streaming_request(token, actual_model, provider, messages, user_model)


async def full_streaming_request(
    token: str,
    actual_model: str,
    provider: str,
    messages: List[Dict],
    user_model: str
) -> AsyncGenerator[str, None]:
    """Complete streaming request."""
    chunk_id = f"chatcmpl-{uuid.uuid4()}"
    created = int(time.time())
    
    def make_chunk(content: str, finish_reason: Optional[str] = None) -> str:
        chunk = {
            "id": chunk_id,
            "object": "chat.completion.chunk",
            "created": created,
            "model": user_model,
            "choices": [{
                "index": 0,
                "delta": {"content": content} if content else {},
                "finish_reason": finish_reason
            }]
        }
        return f"data: {json.dumps(chunk)}\n\n"
    
    def make_error(msg: str) -> str:
        return make_chunk(f"[Error: {msg}]") + "data: [DONE]\n\n"
    
    async with httpx.AsyncClient(timeout=300.0) as client:
        headers = get_headers(token)
        
        try:
            # Step 1: Create Persona
            logger.info("[1/3] Creating persona...")
            persona_payload = {
                "name": f"{actual_model} Agent v{int(time.time()) % 10000}",
                "description": f"Direct chat with {provider}",
                "system_prompt": "You are a helpful assistant.",
                "task_prompt": "",
                "llm_model_provider_override": provider,
                "llm_model_version_override": actual_model,
                "tool_ids": [],
                "is_public": False,
                "include_citations": False,
                "num_chunks": 0,
                "datetime_aware": False,
                "llm_filter_extraction": False,
                "llm_relevance_filter": False,
                "document_set_ids": [],
                "recency_bias": "no_decay",
            }
            
            persona_resp = await client.post(
                f"{UPSTREAM_ORIGIN}/sv5/persona",
                headers=headers,
                json=persona_payload
            )
            
            if persona_resp.status_code == 403:
                logger.error("   ❌ 403 Forbidden")
                yield make_error("Token expired (403). Refresh target site.")
                return
            
            if persona_resp.status_code != 200:
                logger.error(f"   ❌ Persona failed: {persona_resp.status_code}")
                yield make_error(f"Persona failed: {persona_resp.status_code}")
                return
            
            try:
                persona_data = persona_resp.json()
                if not isinstance(persona_data, dict):
                    yield make_error(f"Invalid persona response type: {type(persona_data)}")
                    return
                persona_id = persona_data.get("id")
                if not persona_id:
                    yield make_error("No persona ID in response")
                    return
            except Exception as e:
                yield make_error(f"Persona parse error: {e}")
                return
            
            logger.info(f"   ✅ Persona: {persona_id}")
            
            # Step 2: Create Session
            logger.info("[2/3] Creating session...")
            session_resp = await client.post(
                f"{UPSTREAM_ORIGIN}/sv5/chat/create-chat-session",
                headers=headers,
                json={"persona_id": persona_id, "description": "Chat"}
            )
            
            if session_resp.status_code != 200:
                logger.error(f"   ❌ Session failed: {session_resp.status_code}")
                yield make_error(f"Session failed: {session_resp.status_code}")
                return
            
            try:
                session_data = session_resp.json()
                if not isinstance(session_data, dict):
                    yield make_error(f"Invalid session response type: {type(session_data)}")
                    return
                session_id = session_data.get("chat_session_id")
                if not session_id:
                    yield make_error("No session ID in response")
                    return
            except Exception as e:
                yield make_error(f"Session parse error: {e}")
                return
            
            logger.info(f"   ✅ Session: {session_id}")
            
            # Step 3: Send Message and Stream Response
            logger.info("[3/3] Sending message and streaming...")
            prompt = convert_messages(messages)
            msg_payload = {
                "chat_session_id": session_id,
                "parent_message_id": None,
                "message": prompt,
                "file_descriptors": [],
                "search_doc_ids": [],
                "retrieval_options": {},
            }
            
            chunk_count = 0
            
            async with client.stream(
                "POST",
                f"{UPSTREAM_ORIGIN}/sv5/chat/send-message",
                headers=headers,
                json=msg_payload,
                timeout=300.0
            ) as response:
                
                if response.status_code != 200:
                    logger.error(f"   ❌ Message failed: {response.status_code}")
                    yield make_error(f"Message failed: {response.status_code}")
                    return
                
                buffer = ""
                
                async for raw_chunk in response.aiter_text():
                    buffer += raw_chunk
                    
                    while "\n" in buffer:
                        line, buffer = buffer.split("\n", 1)
                        line = line.strip()
                        
                        if not line:
                            continue
                        
                        try:
                            data = json.loads(line)
                            if isinstance(data, dict):
                                obj = data.get("obj", {})
                                if isinstance(obj, dict) and obj.get("type") == "message_delta":
                                    content = obj.get("content", "")
                                    if content:
                                        chunk_count += 1
                                        yield make_chunk(content)
                        except json.JSONDecodeError:
                            continue
                
                # Process remaining buffer
                if buffer.strip():
                    try:
                        data = json.loads(buffer.strip())
                        if isinstance(data, dict):
                            obj = data.get("obj", {})
                            if isinstance(obj, dict) and obj.get("type") == "message_delta":
                                content = obj.get("content", "")
                                if content:
                                    chunk_count += 1
                                    yield make_chunk(content)
                    except:
                        pass
            
            yield make_chunk("", "stop")
            yield "data: [DONE]\n\n"
            
            logger.info(f"   ✅ Stream complete ({chunk_count} chunks)")
            
        except httpx.TimeoutException:
            logger.error("   ❌ Timeout")
            yield make_error("Request timeout")
        except Exception as e:
            logger.error(f"   ❌ Error: {e}")
            logger.error(traceback.format_exc())
            yield make_error(str(e))


async def non_streaming_request(
    token: str,
    actual_model: str,
    provider: str,
    messages: List[Dict],
    user_model: str
) -> JSONResponse:
    """Handle non-streaming request."""
    
    try:
        async with httpx.AsyncClient(timeout=300.0) as client:
            headers = get_headers(token)
            
            # Step 1: Create Persona
            logger.info("[1/3] Creating persona...")
            persona_payload = {
                "name": f"{actual_model} Agent v{int(time.time()) % 10000}",
                "description": f"Direct chat with {provider}",
                "system_prompt": "You are a helpful assistant.",
                "task_prompt": "",
                "llm_model_provider_override": provider,
                "llm_model_version_override": actual_model,
                "tool_ids": [],
                "is_public": False,
                "include_citations": False,
                "num_chunks": 0,
                "datetime_aware": False,
                "llm_filter_extraction": False,
                "llm_relevance_filter": False,
                "document_set_ids": [],
                "recency_bias": "no_decay",
            }
            
            persona_resp = await client.post(
                f"{UPSTREAM_ORIGIN}/sv5/persona",
                headers=headers,
                json=persona_payload
            )
            
            if persona_resp.status_code == 403:
                return JSONResponse(
                    content={"error": {"message": "Token expired (403)", "type": "authentication_error"}},
                    status_code=403, headers={"Access-Control-Allow-Origin": "*"}
                )
            
            if persona_resp.status_code != 200:
                return JSONResponse(
                    content={"error": {"message": f"Persona failed: {persona_resp.status_code}", "type": "api_error"}},
                    status_code=500, headers={"Access-Control-Allow-Origin": "*"}
                )
            
            persona_data = persona_resp.json()
            if not isinstance(persona_data, dict):
                return JSONResponse(
                    content={"error": {"message": "Invalid persona response", "type": "api_error"}},
                    status_code=500, headers={"Access-Control-Allow-Origin": "*"}
                )
            
            persona_id = persona_data.get("id")
            if not persona_id:
                return JSONResponse(
                    content={"error": {"message": "No persona ID", "type": "api_error"}},
                    status_code=500, headers={"Access-Control-Allow-Origin": "*"}
                )
            
            logger.info(f"   ✅ Persona: {persona_id}")
            
            # Step 2: Create Session
            logger.info("[2/3] Creating session...")
            session_resp = await client.post(
                f"{UPSTREAM_ORIGIN}/sv5/chat/create-chat-session",
                headers=headers,
                json={"persona_id": persona_id, "description": "Chat"}
            )
            
            if session_resp.status_code != 200:
                return JSONResponse(
                    content={"error": {"message": f"Session failed: {session_resp.status_code}", "type": "api_error"}},
                    status_code=500, headers={"Access-Control-Allow-Origin": "*"}
                )
            
            session_data = session_resp.json()
            if not isinstance(session_data, dict):
                return JSONResponse(
                    content={"error": {"message": "Invalid session response", "type": "api_error"}},
                    status_code=500, headers={"Access-Control-Allow-Origin": "*"}
                )
            
            session_id = session_data.get("chat_session_id")
            if not session_id:
                return JSONResponse(
                    content={"error": {"message": "No session ID", "type": "api_error"}},
                    status_code=500, headers={"Access-Control-Allow-Origin": "*"}
                )
            
            logger.info(f"   ✅ Session: {session_id}")
            
            # Step 3: Send Message
            logger.info("[3/3] Sending message...")
            prompt = convert_messages(messages)
            msg_payload = {
                "chat_session_id": session_id,
                "parent_message_id": None,
                "message": prompt,
                "file_descriptors": [],
                "search_doc_ids": [],
                "retrieval_options": {},
            }
            
            msg_resp = await client.post(
                f"{UPSTREAM_ORIGIN}/sv5/chat/send-message",
                headers=headers,
                json=msg_payload
            )
            
            if msg_resp.status_code != 200:
                return JSONResponse(
                    content={"error": {"message": f"Message failed: {msg_resp.status_code}", "type": "api_error"}},
                    status_code=500, headers={"Access-Control-Allow-Origin": "*"}
                )
            
            # Parse response
            full_content = ""
            for line in msg_resp.text.split("\n"):
                if not line.strip():
                    continue
                try:
                    data = json.loads(line)
                    if isinstance(data, dict):
                        obj = data.get("obj", {})
                        if isinstance(obj, dict) and obj.get("type") == "message_delta":
                            full_content += obj.get("content", "")
                except:
                    continue
            
            logger.info(f"   ✅ Response: {len(full_content)} chars")
            
            return JSONResponse(content={
                "id": f"chatcmpl-{uuid.uuid4()}",
                "object": "chat.completion",
                "created": int(time.time()),
                "model": user_model,
                "choices": [{"index": 0, "message": {"role": "assistant", "content": full_content}, "finish_reason": "stop"}],
                "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
            }, headers={"Access-Control-Allow-Origin": "*"})
    
    except httpx.TimeoutException:
        return JSONResponse(
            content={"error": {"message": "Timeout", "type": "timeout_error"}},
            status_code=504, headers={"Access-Control-Allow-Origin": "*"}
        )
    except Exception as e:
        logger.error(f"❌ Error: {e}")
        return JSONResponse(
            content={"error": {"message": str(e), "type": "internal_error"}},
            status_code=500, headers={"Access-Control-Allow-Origin": "*"}
        )


@app.on_event("startup")
async def startup():
    logger.info("=" * 50)
    logger.info("  TheOldLLM Proxy v3.3.0 (GPT & Claude)")
    logger.info(f"  Port: {PORT}")
    logger.info(f"  API: http://localhost:{PORT}/v1")
    logger.info("=" * 50)
    load_token_from_file()
    if has_valid_token():
        logger.info("✅ Token loaded")
    else:
        logger.warning("⚠️ No token - visit target site")


if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=PORT)