#!/usr/bin/env python
# -*- coding: utf-8 -*-
import datetime
import HTMLParser
import httplib
import netrc
import os
import re
import socket
import time
import urllib
import urllib2
import email.utils
import xml.etree.ElementTree
from urlparse import parse_qs
try:
	import cStringIO as StringIO
except ImportError:
	import StringIO
from utils import *
class InfoExtractor(object):
	"""Information Extractor class.
	Information extractors are the classes that, given a URL, extract
	information from the video (or videos) the URL refers to. This
	information includes the real video URL, the video title and simplified
	title, author and others. The information is stored in a dictionary
	which is then passed to the FileDownloader. The FileDownloader
	processes this information possibly downloading the video to the file
	system, among other possible outcomes. The dictionaries must include
	the following fields:
	id:		Video identifier.
	url:		Final video URL.
	uploader:	Nickname of the video uploader.
	title:		Literal title.
	ext:		Video filename extension.
	format:		Video format.
	player_url:	SWF Player URL (may be None).
	The following fields are optional. Their primary purpose is to allow
	youtube-dl to serve as the backend for a video search function, such
	as the one in youtube2mp3.  They are only used when their respective
	forced printing functions are called:
	thumbnail:	Full URL to a video thumbnail image.
	description:	One-line video description.
	Subclasses of this one should re-define the _real_initialize() and
	_real_extract() methods and define a _VALID_URL regexp.
	Probably, they should also be added to the list of extractors.
	"""
	_ready = False
	_downloader = None
	def __init__(self, downloader=None):
		"""Constructor. Receives an optional downloader."""
		self._ready = False
		self.set_downloader(downloader)
	def suitable(self, url):
		"""Receives a URL and returns True if suitable for this IE."""
		return re.match(self._VALID_URL, url) is not None
	def initialize(self):
		"""Initializes an instance (authentication, etc)."""
		if not self._ready:
			self._real_initialize()
			self._ready = True
	def extract(self, url):
		"""Extracts URL information and returns it in list of dicts."""
		self.initialize()
		return self._real_extract(url)
	def set_downloader(self, downloader):
		"""Sets the downloader for this IE."""
		self._downloader = downloader
	def _real_initialize(self):
		"""Real initialization process. Redefine in subclasses."""
		pass
	def _real_extract(self, url):
		"""Real extraction process. Redefine in subclasses."""
		pass
class YoutubeIE(InfoExtractor):
	"""Information extractor for youtube.com."""
	_VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
	_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
	_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	_NETRC_MACHINE = 'youtube'
	# Listed in order of quality
	_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
	_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13']
	_video_extensions = {
		'13': '3gp',
		'17': 'mp4',
		'18': 'mp4',
		'22': 'mp4',
		'37': 'mp4',
		'38': 'video', # You actually don't know if this will be MOV, AVI or whatever
		'43': 'webm',
		'44': 'webm',
		'45': 'webm',
		'46': 'webm',
	}
	_video_dimensions = {
		'5': '240x400',
		'6': '???',
		'13': '???',
		'17': '144x176',
		'18': '360x640',
		'22': '720x1280',
		'34': '360x640',
		'35': '480x854',
		'37': '1080x1920',
		'38': '3072x4096',
		'43': '360x640',
		'44': '480x854',
		'45': '720x1280',
		'46': '1080x1920',
	}	
	IE_NAME = u'youtube'
	def report_lang(self):
		"""Report attempt to set language."""
		self._downloader.to_screen(u'[youtube] Setting language')
	def report_login(self):
		"""Report attempt to log in."""
		self._downloader.to_screen(u'[youtube] Logging in')
	def report_age_confirmation(self):
		"""Report attempt to confirm age."""
		self._downloader.to_screen(u'[youtube] Confirming age')
	def report_video_webpage_download(self, video_id):
		"""Report attempt to download video webpage."""
		self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
	def report_video_info_webpage_download(self, video_id):
		"""Report attempt to download video info webpage."""
		self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
	def report_video_subtitles_download(self, video_id):
		"""Report attempt to download video info webpage."""
		self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles' % video_id)
	def report_information_extraction(self, video_id):
		"""Report attempt to extract video information."""
		self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
	def report_unavailable_format(self, video_id, format):
		"""Report extracted video URL."""
		self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
	def report_rtmp_download(self):
		"""Indicate the download will use the RTMP protocol."""
		self._downloader.to_screen(u'[youtube] RTMP download detected')
	def _closed_captions_xml_to_srt(self, xml_string):
		srt = ''
		texts = re.findall(r'