#!/usr/bin/env python3
"""
Extract transcript from a YouTube video.

Usage:
    python youtube-transcript.py <youtube_url_or_id>
    python youtube-transcript.py https://www.youtube.com/watch?v=dQw4w9WgXcQ
    python youtube-transcript.py dQw4w9WgXcQ

Output:
    Prints transcript to stdout. Redirect to file if needed:
    python youtube-transcript.py <url> > transcript.md
"""

import sys
import re

try:
    from youtube_transcript_api import YouTubeTranscriptApi
except ImportError:
    print("Error: youtube-transcript-api not installed")
    print("Run: pip install youtube-transcript-api")
    sys.exit(1)


def extract_video_id(url_or_id):
    """Extract video ID from URL or return as-is if already an ID."""
    # Already an ID
    if re.match(r'^[a-zA-Z0-9_-]{11}$', url_or_id):
        return url_or_id

    # YouTube URL patterns
    patterns = [
        r'(?:youtube\.com\/watch\?v=)([a-zA-Z0-9_-]{11})',
        r'(?:youtu\.be\/)([a-zA-Z0-9_-]{11})',
        r'(?:youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})',
    ]

    for pattern in patterns:
        match = re.search(pattern, url_or_id)
        if match:
            return match.group(1)

    return None


def get_transcript(video_id):
    """Fetch and format transcript."""
    try:
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)

        # Format as readable text
        lines = []
        for entry in transcript_list:
            text = entry['text'].replace('\n', ' ')
            lines.append(text)

        return ' '.join(lines)

    except Exception as e:
        return f"Error fetching transcript: {e}"


def main():
    if len(sys.argv) < 2:
        print("Usage: python youtube-transcript.py <youtube_url_or_id>")
        sys.exit(1)

    url_or_id = sys.argv[1]
    video_id = extract_video_id(url_or_id)

    if not video_id:
        print(f"Could not extract video ID from: {url_or_id}")
        sys.exit(1)

    print(f"# YouTube Transcript\n")
    print(f"**Video ID:** {video_id}\n")
    print("---\n")

    transcript = get_transcript(video_id)
    print(transcript)


if __name__ == "__main__":
    main()
