#!/usr/bin/env python3
"""
PhantomBuster LinkedIn Posts Fetcher

This script fetches LinkedIn posts via PhantomBuster API and updates the
resource file at .claude/skills/copywriting-content/resources/linkedin-posts.md

Usage:
    python3 .claude/skills/copywriting-content/resources/tools/fetch_posts.py

Environment Variables Required:
    PHANTOMBUSTER_API_KEY - Your PhantomBuster API key
    PHANTOM_AGENT_ID - ID of the LinkedIn Activity Extractor phantom
"""

import requests
import csv
import os
import json
from datetime import datetime
from dotenv import load_dotenv

load_dotenv()

API_KEY = os.getenv('PHANTOMBUSTER_API_KEY')
AGENT_ID = os.getenv('PHANTOM_AGENT_ID', '7605788771374762')

# S3 paths from agent configuration
ORG_S3 = "uUvNzb1u7DE"
AGENT_S3 = "W67tNcRpQKDzGGEhHJyOUA"

def get_headers():
    return {
        "X-Phantombuster-Key-1": API_KEY,
        "Content-Type": "application/json"
    }

def fetch_latest_container():
    """Get the most recent container (execution) for the agent."""
    url = f"https://api.phantombuster.com/api/v2/containers/fetch-all?agentId={AGENT_ID}"
    resp = requests.get(url, headers=get_headers())
    
    if resp.status_code != 200:
        print(f"[-] Failed to fetch containers: {resp.status_code}")
        return None
    
    data = resp.json()
    containers = data.get('containers', data) if isinstance(data, dict) else data
    
    if not containers:
        print("[-] No containers found. Run the phantom first.")
        return None
    
    return containers[0]

def download_csv():
    """Download the result.csv from S3."""
    csv_url = f"https://phantombuster.s3.amazonaws.com/{ORG_S3}/{AGENT_S3}/result.csv"
    print(f"[*] Downloading CSV from: {csv_url}")
    
    resp = requests.get(csv_url)
    if resp.status_code == 200:
        return resp.text
    else:
        print(f"[-] Failed to download CSV: {resp.status_code}")
        return None

def parse_posts(csv_content):
    """Parse CSV content and return list of post dictionaries."""
    reader = csv.DictReader(csv_content.splitlines())
    posts = []
    
    for row in reader:
        # Only include original posts by Raunak Jaggi (filter out likes/reactions on others' posts)
        if row.get('author') == 'Raunak Jaggi' and row.get('action') == 'Post':
            posts.append({
                'url': row.get('postUrl', ''),
                'content': row.get('postContent', ''),
                'date': row.get('postDate', 'N/A'),
                'likes': row.get('likeCount', '0'),
                'comments': row.get('commentCount', '0'),
                'views': row.get('viewCount', ''),
                'timestamp': row.get('postTimestamp', '')
            })
    
    return posts

def filter_posts_after_dec_2025(posts):
    """Filter to only include posts after December 2025."""
    filtered = []
    cutoff = datetime(2025, 12, 1)
    
    for post in posts:
        ts = post.get('timestamp', '')
        if ts:
            try:
                post_date = datetime.fromisoformat(ts.replace('Z', '+00:00'))
                if post_date.replace(tzinfo=None) >= cutoff:
                    filtered.append(post)
            except:
                # If we can't parse, include it to be safe
                filtered.append(post)
    
    return filtered

def generate_markdown(posts):
    """Generate markdown content for the resource file."""
    md = f"""# LinkedIn Posts History

**Author:** Raunak Jaggi
**Last Updated:** {datetime.now().strftime('%Y-%m-%d')}
**Filter:** Posts after December 2025 only

---

"""
    
    for post in posts:
        md += f"""## Post: {post['date']}
**Engagement:** 👍 {post['likes']} | 💬 {post['comments']} | 👀 {post['views']}
**URL:** {post['url']}

{post['content']}

---

"""
    
    return md

def main():
    if not API_KEY:
        print("[-] PHANTOMBUSTER_API_KEY not found in environment")
        return
    
    print("[*] Fetching latest container...")
    container = fetch_latest_container()
    
    if not container:
        return
    
    if container.get('status') != 'finished':
        print(f"[-] Latest container status: {container.get('status')} (not finished)")
        return
    
    print(f"[+] Container {container['id']} finished")
    
    csv_content = download_csv()
    if not csv_content:
        return
    
    print("[*] Parsing posts...")
    posts = parse_posts(csv_content)
    print(f"[+] Found {len(posts)} original posts by Raunak Jaggi")
    
    filtered = filter_posts_after_dec_2025(posts)
    print(f"[+] {len(filtered)} posts after December 2025")
    
    print("[*] Generating markdown...")
    md_content = generate_markdown(filtered)
    
    # Output to parent resources folder
    output_path = os.path.join(os.path.dirname(__file__), '..', 'linkedin-posts.md')
    output_path = os.path.abspath(output_path)
    
    with open(output_path, 'w') as f:
        f.write(md_content)
    
    print(f"[+] Resource file updated: {output_path}")

if __name__ == '__main__':
    main()
