import asyncio
from playwright.async_api import async_playwright
import csv
import time

async def scrape_polymarket_leaderboard():
    """Scrape Polymarket leaderboard data"""
    
    max_pages = None  # Set to None for unlimited pages
    base_url = "https://polymarket.com/leaderboard"
    
    all_data = []
    output_file = 'polymarket_leaderboard.csv'
    
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        context = await browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        )
        page = await context.new_page()
        
        try:
            print(f"Navigating to {base_url}...")
            await page.goto(base_url, wait_until='networkidle', timeout=60000)
            await asyncio.sleep(2)
            
            # Click on "All" tab to get lifetime stats
            print("Clicking 'All' tab...")
            all_button = await page.query_selector('button:has-text("All")')
            if all_button:
                await all_button.click()
                await asyncio.sleep(1.5)
            
            page_num = 1
            consecutive_failures = 0
            max_consecutive_failures = 3
            
            while True:
                if max_pages and page_num > max_pages:
                    print(f"Reached max pages limit ({max_pages})")
                    break
                
                if consecutive_failures >= max_consecutive_failures:
                    print(f"Stopping after {consecutive_failures} consecutive failures")
                    break
                
                print(f"\nScraping page {page_num}...")
                
                try:
                    # Wait for leaderboard entries to load
                    await page.wait_for_selector('div.flex.flex-col.gap-2.py-5.border-b', timeout=10000)
                    await asyncio.sleep(1)
                    
                    # Scroll to bottom to ensure pagination is visible
                    await page.evaluate('window.scrollTo(0, document.body.scrollHeight)')
                    await asyncio.sleep(0.5)
                    
                    # Get all leaderboard entries
                    entries = await page.query_selector_all('div.flex.flex-col.gap-2.py-5.border-b')
                    print(f"Found {len(entries)} entries on page {page_num}")
                    
                    if len(entries) == 0:
                        print("No entries found, stopping")
                        break
                    
                    page_data = []
                    for entry in entries:
                        try:
                            # Extract rank
                            rank_el = await entry.query_selector('div.text-sm.text-text-secondary.font-medium')
                            rank = await rank_el.inner_text() if rank_el else ""
                            rank = rank.strip()
                            
                            # Extract username
                            username_el = await entry.query_selector('p.font-medium')
                            username = await username_el.inner_text() if username_el else ""
                            username = username.strip()
                            
                            # Extract profit/loss
                            profit_el = await entry.query_selector('p.text-text-primary')
                            profit_loss = await profit_el.inner_text() if profit_el else ""
                            profit_loss = profit_loss.strip()
                            
                            # Extract volume
                            volume_el = await entry.query_selector('p.text-text-secondary')
                            volume = await volume_el.inner_text() if volume_el else ""
                            volume = volume.strip()
                            
                            # Get profile URL
                            profile_link = await entry.query_selector('a[href*="/profile/"]')
                            profile_url = await profile_link.get_attribute('href') if profile_link else ""
                            if profile_url and not profile_url.startswith('http'):
                                profile_url = f"https://polymarket.com{profile_url}"
                            
                            data = {
                                'rank': rank,
                                'username': username,
                                'profit_loss': profit_loss,
                                'volume': volume,
                                'profile_url': profile_url,
                                'page': page_num
                            }
                            
                            page_data.append(data)
                            
                        except Exception as e:
                            print(f"Error extracting entry: {e}")
                            continue
                    
                    # Add page data to all_data
                    all_data.extend(page_data)
                    print(f"Collected {len(page_data)} entries from page {page_num} (Total: {len(all_data)})")
                    
                    # Save progress every 5 pages
                    if page_num % 5 == 0:
                        print(f"Saving progress... ({len(all_data)} entries)")
                        with open(output_file, 'w', newline='', encoding='utf-8') as f:
                            writer = csv.DictWriter(f, fieldnames=['rank', 'username', 'profit_loss', 'volume', 'profile_url', 'page'])
                            writer.writeheader()
                            writer.writerows(all_data)
                    
                    # Reset failure counter on success
                    consecutive_failures = 0
                    
                    # Check for next page button
                    next_page_num = page_num + 1
                    
                    # Store current first entry username to verify page change
                    first_entry = entries[0] if entries else None
                    current_first_username = ""
                    if first_entry:
                        username_el = await first_entry.query_selector('p.font-medium')
                        current_first_username = await username_el.inner_text() if username_el else ""
                    
                    # Pagination buttons are <li> elements
                    next_button = await page.query_selector(f'li.cursor-pointer:has-text("{next_page_num}")')
                    
                    if next_button:
                        # Click using JavaScript to avoid interception
                        await page.evaluate('(el) => el.click()', next_button)
                        await asyncio.sleep(2)
                        
                        # Wait for page to update
                        await page.wait_for_selector('div.flex.flex-col.gap-2.py-5.border-b', timeout=10000)
                        await asyncio.sleep(1)
                        
                        # Scroll to top to see new entries
                        await page.evaluate('window.scrollTo(0, 0)')
                        await asyncio.sleep(0.5)
                        
                        # Verify page changed by checking first entry
                        new_entries = await page.query_selector_all('div.flex.flex-col.gap-2.py-5.border-b')
                        if new_entries:
                            new_first_entry = new_entries[0]
                            username_el = await new_first_entry.query_selector('p.font-medium')
                            new_first_username = await username_el.inner_text() if username_el else ""
                            
                            if new_first_username == current_first_username:
                                print("Warning: Page didn't change, stopping pagination")
                                break
                        
                        page_num += 1
                    else:
                        print("No more pages found")
                        break
                
                except Exception as e:
                    print(f"Error on page {page_num}: {e}")
                    consecutive_failures += 1
                    if consecutive_failures >= max_consecutive_failures:
                        break
                    # Try to continue to next page anyway
                    page_num += 1
                    await asyncio.sleep(2)
            
        except Exception as e:
            print(f"Error during scraping: {e}")
            import traceback
            traceback.print_exc()
        
        finally:
            await browser.close()
    
    # Save final results to CSV
    if all_data:
        print(f"\nSaving final results: {len(all_data)} entries to {output_file}...")
        
        with open(output_file, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=['rank', 'username', 'profit_loss', 'volume', 'profile_url', 'page'])
            writer.writeheader()
            writer.writerows(all_data)
        
        print(f"✓ Saved to {output_file}")
        return len(all_data)
    else:
        print("No data collected")
        return 0

if __name__ == "__main__":
    total = asyncio.run(scrape_polymarket_leaderboard())
    print(f"\n{'='*50}")
    print(f"Total entries scraped: {total}")
    print(f"{'='*50}")
