#!/usr/bin/env python3
"""
Fetch markets from Polymarket and Kalshi, match them, and generate arbitrage dashboard data
"""

import requests
import json
from datetime import datetime, timedelta
from typing import List, Dict
import os
from dotenv import load_dotenv
from difflib import SequenceMatcher

load_dotenv()

def fetch_polymarket_markets(hours=168):
    """Fetch active markets from Polymarket ending soon"""
    url = "https://gamma-api.polymarket.com/markets"
    
    now = datetime.now()
    cutoff = now + timedelta(hours=hours)
    
    all_markets = []
    offset = 0
    limit = 100
    
    while offset < 1000:
        params = {
            "closed": "false",
            "active": "true",
            "limit": limit,
            "offset": offset
        }
        
        response = requests.get(url, params=params)
        response.raise_for_status()
        markets = response.json()
        
        if not markets:
            break
            
        for market in markets:
            try:
                end_date_str = market.get("endDate", "")
                if end_date_str:
                    end_date = datetime.fromisoformat(end_date_str.replace('Z', '+00:00'))
                    hours_until = (end_date - now).total_seconds() / 3600
                    if 0 < hours_until <= hours:
                        all_markets.append(market)
            except Exception as e:
                continue
        
        offset += limit
        if len(markets) < limit:
            break
    
    return all_markets

def fetch_kalshi_markets(hours=168):
    """Fetch active markets from Kalshi ending soon"""
    api_key = os.getenv("KALSHI_API_KEY")
    if not api_key:
        print("Warning: KALSHI_API_KEY not found in .env")
        return []
    
    url = "https://api.elections.kalshi.com/trade-api/v2/markets"
    headers = {"Authorization": f"Bearer {api_key}"}
    
    now = datetime.now()
    
    all_markets = []
    cursor = None
    count = 0
    
    while count < 5:  # Limit to 5 pages
        params = {
            "limit": 200,
            "status": "open"
        }
        if cursor:
            params["cursor"] = cursor
        
        try:
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()
            data = response.json()
            markets = data.get("markets", [])
            
            for market in markets:
                try:
                    close_time_str = market.get("close_time", "")
                    if close_time_str:
                        close_time = datetime.fromisoformat(close_time_str.replace('Z', '+00:00'))
                        hours_until = (close_time - now).total_seconds() / 3600
                        if 0 < hours_until <= hours:
                            all_markets.append(market)
                except Exception as e:
                    continue
            
            cursor = data.get("cursor")
            count += 1
            if not cursor or not markets:
                break
                
        except Exception as e:
            print(f"Error fetching Kalshi: {e}")
            break
    
    return all_markets

def similarity(a, b):
    """Calculate similarity between two strings"""
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

def normalize_text(text):
    """Normalize text for better matching"""
    text = text.lower()
    # Remove punctuation
    for char in '?.,!;:"\'-':
        text = text.replace(char, ' ')
    return text

def extract_keywords(text):
    """Extract important keywords from market question"""
    common = {'will', 'the', 'be', 'in', 'on', 'at', 'to', 'a', 'an', 'by', 'for', 'of', 'or', 'and', 'is', 'are', 'have', 'has', 'this', 'that', 'from', 'with', 'as', 'it', 'its', 'vs', 'their', 'than', 'more', 'less', 'over', 'under'}
    words = normalize_text(text).split()
    return [w for w in words if w not in common and len(w) > 2]

def match_markets(poly_markets, kalshi_markets, threshold=0.35):
    """Match Polymarket and Kalshi markets based on similarity"""
    matches = []
    used_kalshi = set()
    
    for pm in poly_markets:
        pm_question = normalize_text(pm.get("question", ""))
        pm_keywords = set(extract_keywords(pm_question))
        
        if len(pm_keywords) < 2:
            continue
        
        best_match = None
        best_score = 0
        best_km_id = None
        
        for km in kalshi_markets:
            km_id = km.get("ticker", "")
            if km_id in used_kalshi:
                continue
                
            km_title = normalize_text(km.get("title", ""))
            km_keywords = set(extract_keywords(km_title))
            
            if len(km_keywords) < 2:
                continue
            
            # Calculate keyword overlap
            common_keywords = pm_keywords & km_keywords
            if len(common_keywords) < 2:
                continue
            
            # Calculate similarity
            text_sim = similarity(pm_question, km_title)
            keyword_overlap = len(common_keywords) / max(len(pm_keywords | km_keywords), 1)
            
            # Weight both text similarity and keyword overlap
            score = (text_sim * 0.4) + (keyword_overlap * 0.6)
            
            if score > best_score and score >= threshold:
                best_score = score
                best_match = km
                best_km_id = km_id
        
        if best_match:
            matches.append({
                "polymarket": pm,
                "kalshi": best_match,
                "match_score": best_score
            })
            used_kalshi.add(best_km_id)
    
    return matches

def calculate_arbitrage(poly_price, kalshi_yes_bid, kalshi_yes_ask):
    """Calculate potential arbitrage opportunity"""
    # Simple arbitrage: buy low on one platform, sell high on another
    # This is simplified - real arbitrage is more complex
    
    opportunities = []
    
    # Opportunity 1: Buy on Poly, Sell on Kalshi
    if poly_price < kalshi_yes_bid:
        profit = kalshi_yes_bid - poly_price
        opportunities.append({
            "type": "Buy Poly, Sell Kalshi",
            "profit_per_share": profit,
            "profit_pct": (profit / poly_price) * 100 if poly_price > 0 else 0
        })
    
    # Opportunity 2: Buy on Kalshi, Sell on Poly
    if kalshi_yes_ask < poly_price:
        profit = poly_price - kalshi_yes_ask
        opportunities.append({
            "type": "Buy Kalshi, Sell Poly",
            "profit_per_share": profit,
            "profit_pct": (profit / kalshi_yes_ask) * 100 if kalshi_yes_ask > 0 else 0
        })
    
    return opportunities

def prepare_dashboard_data(matches):
    """Prepare data for the HTML dashboard"""
    dashboard_data = []
    
    for match in matches:
        pm = match["polymarket"]
        km = match["kalshi"]
        
        # Parse Polymarket prices
        pm_prices = json.loads(pm.get("outcomePrices", "[0, 0]"))
        pm_yes_price = float(pm_prices[0]) if len(pm_prices) > 0 else 0
        
        # Get Kalshi prices
        km_yes_bid = km.get("yes_bid", 0) / 100  # Convert cents to dollars
        km_yes_ask = km.get("yes_ask", 0) / 100
        km_no_bid = km.get("no_bid", 0) / 100
        km_no_ask = km.get("no_ask", 0) / 100
        
        # Calculate arbitrage
        arb_opportunities = calculate_arbitrage(pm_yes_price, km_yes_bid, km_yes_ask)
        
        # Get end dates
        pm_end = pm.get("endDate", "")
        km_end = km.get("close_time", "")
        
        # Calculate days until close
        try:
            pm_end_dt = datetime.fromisoformat(pm_end.replace('Z', '+00:00'))
            days_until_close = (pm_end_dt - datetime.now()).days
        except:
            days_until_close = None
        
        dashboard_data.append({
            "match_score": match["match_score"],
            "polymarket": {
                "question": pm.get("question", ""),
                "slug": pm.get("slug", ""),
                "yes_price": pm_yes_price,
                "volume": pm.get("volumeNum", 0),
                "liquidity": pm.get("liquidityNum", 0),
                "end_date": pm_end,
                "url": f"https://polymarket.com/event/{pm.get('slug', '')}"
            },
            "kalshi": {
                "title": km.get("title", ""),
                "ticker": km.get("ticker", ""),
                "yes_bid": km_yes_bid,
                "yes_ask": km_yes_ask,
                "no_bid": km_no_bid,
                "no_ask": km_no_ask,
                "volume": km.get("volume", 0),
                "open_interest": km.get("open_interest", 0),
                "close_time": km_end,
                "url": f"https://kalshi.com/markets/{km.get('ticker', '')}"
            },
            "arbitrage": arb_opportunities,
            "days_until_close": days_until_close
        })
    
    # Sort by arbitrage profit potential
    dashboard_data.sort(key=lambda x: max([o.get("profit_pct", 0) for o in x["arbitrage"]], default=0), reverse=True)
    
    return dashboard_data

def main():
    hours = 168  # 7 days
    
    print(f"Fetching Polymarket markets ending in next {hours} hours ({hours//24} days)...")
    poly_markets = fetch_polymarket_markets(hours=hours)
    print(f"Found {len(poly_markets)} Polymarket markets")
    
    print(f"\nFetching Kalshi markets ending in next {hours} hours ({hours//24} days)...")
    kalshi_markets = fetch_kalshi_markets(hours=hours)
    print(f"Found {len(kalshi_markets)} Kalshi markets")
    
    if not kalshi_markets:
        print("\nWarning: No Kalshi markets fetched. Check your API key.")
        print("Saving Polymarket data only...")
        with open("dashboard_data.json", "w") as f:
            json.dump({"polymarket_only": poly_markets[:50]}, f, indent=2)
        return
    
    print("\nMatching markets...")
    matches = match_markets(poly_markets, kalshi_markets, threshold=0.35)
    print(f"Found {len(matches)} matched pairs")
    
    print("\nPreparing dashboard data...")
    dashboard_data = prepare_dashboard_data(matches)
    
    # Save to JSON
    with open("dashboard_data.json", "w") as f:
        json.dump(dashboard_data, f, indent=2)
    
    print(f"\n✓ Saved {len(dashboard_data)} matched markets to dashboard_data.json")
    
    # Print top 5 arbitrage opportunities
    print("\nTop 5 Arbitrage Opportunities:")
    print("=" * 80)
    for i, item in enumerate(dashboard_data[:5], 1):
        if item["arbitrage"]:
            best_arb = max(item["arbitrage"], key=lambda x: x.get("profit_pct", 0))
            print(f"\n{i}. {item['polymarket']['question'][:60]}...")
            print(f"   Match Score: {item['match_score']:.2f}")
            print(f"   {best_arb['type']}")
            print(f"   Profit: {best_arb['profit_pct']:.2f}% (${best_arb['profit_per_share']:.4f} per share)")
            print(f"   Poly: ${item['polymarket']['yes_price']:.3f} | Kalshi: ${item['kalshi']['yes_bid']:.3f}/{item['kalshi']['yes_ask']:.3f}")

if __name__ == "__main__":
    main()
