diff options
| -rw-r--r-- | LATEST_VERSION | 2 | ||||
| -rw-r--r-- | Makefile | 20 | ||||
| -rw-r--r-- | README.md | 108 | ||||
| -rwxr-xr-x[-rw-r--r--] | youtube-dl | 1757 | 
4 files changed, 1401 insertions, 486 deletions
| diff --git a/LATEST_VERSION b/LATEST_VERSION index 295c9c4fa..35b79da09 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2011.03.29 +2011.09.15 diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..e725dc720 --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +default: update + +update: update-readme update-latest + +update-latest: +	./youtube-dl --version > LATEST_VERSION + +update-readme: +	@options=$$(COLUMNS=80 ./youtube-dl --help | sed -e '1,/.*General Options.*/ d' -e 's/^\W\{2\}\(\w\)/### \1/') && \ +		header=$$(sed -e '/.*## OPTIONS/,$$ d' README.md) && \ +		footer=$$(sed -e '1,/.*## FAQ/ d' README.md) && \ +		echo "$${header}" > README.md && \ +		echo -e '\n## OPTIONS' >> README.md && \ +		echo "$${options}" >> README.md&& \ +		echo -e '\n## FAQ' >> README.md && \ +		echo "$${footer}" >> README.md + + + +.PHONY: default update update-latest update-readme diff --git a/README.md b/README.md new file mode 100644 index 000000000..9fdd7d3e2 --- /dev/null +++ b/README.md @@ -0,0 +1,108 @@ +# youtube-dl + +## USAGE +youtube-dl [options] url [url...] + +## DESCRIPTION +**youtube-dl** is a small command-line program to download videos from +YouTube.com and a few more sites. It requires the Python interpreter, version +2.x (x being at least 5), and it is not platform specific. It should work in +your Unix box, in Windows or in Mac OS X. It is released to the public domain, +which means you can modify it, redistribute it or use it however you like. + +## OPTIONS +    -h, --help               print this help text and exit +    -v, --version            print program version and exit +    -U, --update             update this program to latest version +    -i, --ignore-errors      continue on download errors +    -r, --rate-limit LIMIT   download rate limit (e.g. 50k or 44.6m) +    -R, --retries RETRIES    number of retries (default is 10) +    --dump-user-agent        display the current browser identification + +### Video Selection: +    --playlist-start NUMBER  playlist video to start at (default is 1) +    --playlist-end NUMBER    playlist video to end at (default is last) +    --match-title REGEX      download only matching titles (regex or caseless +                             sub-string) +    --reject-title REGEX     skip download for matching titles (regex or +                             caseless sub-string) + +### Filesystem Options: +    -t, --title              use title in file name +    -l, --literal            use literal title in file name +    -A, --auto-number        number downloaded files starting from 00000 +    -o, --output TEMPLATE    output filename template +    -a, --batch-file FILE    file containing URLs to download ('-' for stdin) +    -w, --no-overwrites      do not overwrite files +    -c, --continue           resume partially downloaded files +    --cookies FILE           file to dump cookie jar to +    --no-part                do not use .part files +    --no-mtime               do not use the Last-modified header to set the file +                             modification time +    --write-description      write video description to a .description file +    --write-info-json        write video metadata to a .info.json file + +### Verbosity / Simulation Options: +    -q, --quiet              activates quiet mode +    -s, --simulate           do not download video +    -g, --get-url            simulate, quiet but print URL +    -e, --get-title          simulate, quiet but print title +    --get-thumbnail          simulate, quiet but print thumbnail URL +    --get-description        simulate, quiet but print video description +    --get-filename           simulate, quiet but print output filename +    --no-progress            do not print progress bar +    --console-title          display progress in console titlebar + +### Video Format Options: +    -f, --format FORMAT      video format code +    --all-formats            download all available video formats +    --max-quality FORMAT     highest quality format to download + +### Authentication Options: +    -u, --username USERNAME  account username +    -p, --password PASSWORD  account password +    -n, --netrc              use .netrc authentication data + +### Post-processing Options: +    --extract-audio          convert video files to audio-only files (requires +                             ffmpeg and ffprobe) +    --audio-format FORMAT    "best", "aac" or "mp3"; best by default + +## FAQ + +### Can you please put the -b option back? + +Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you''re interested in. In that case, simply request it with the -f option and youtube-dl will try to download it. + +### I get HTTP error 402 when trying to download a video. What's this? + +Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. + +### I have downloaded a video but how can I play it? + +Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/). + +### The links provided by youtube-dl -g are not working anymore + +The URLs youtube-dl outputs require the downloader to have the correct cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. + +### ERROR: no fmt_url_map or conn information found in video info + +youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. + +## COPYRIGHT + +youtube-dl is released into the public domain by the copyright holders. + +This README file was originally written by Daniel Bolton (<https://github.com/dbbolton>) and is likewise released into the public domain. + +## BUGS + +Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> + +Please include: + +* Your exact command line, like `youtube-dl -t "http://www.youtube.com/watch?v=uHlDtZ6Oc3s&feature=channel_video_title"`. A common mistake is not to escape the `&`. Putting URLs in quotes should solve this problem. +* The output of `youtube-dl --version` +* The output of `python --version` +* The name and version of your Operating System ("Ubuntu 11.04 x64" or "Windows 7 x64" is usually enough). diff --git a/youtube-dl b/youtube-dl index fd2edef6f..a2100aa6d 100644..100755 --- a/youtube-dl +++ b/youtube-dl @@ -1,19 +1,29 @@  #!/usr/bin/env python  # -*- coding: utf-8 -*- -# Author: Ricardo Garcia Gonzalez -# Author: Danny Colligan -# Author: Benjamin Johnson -# Author: Vasyl' Vavrychuk -# Author: Witold Baryluk -# Author: Paweł Paprota -# Author: Gergely Imreh -# License: Public domain code + +__author__  = ( +	'Ricardo Garcia Gonzalez', +	'Danny Colligan', +	'Benjamin Johnson', +	'Vasyl\' Vavrychuk', +	'Witold Baryluk', +	'Paweł Paprota', +	'Gergely Imreh', +	'Rogério Brito', +	'Philipp Hagemeister', +	'Sören Schulze', +	) + +__license__ = 'Public Domain' +__version__ = '2011.09.15' + +UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' +  import cookielib -import ctypes  import datetime -import email.utils  import gzip  import htmlentitydefs +import HTMLParser  import httplib  import locale  import math @@ -23,22 +33,44 @@ import os.path  import re  import socket  import string -import StringIO  import subprocess  import sys  import time  import urllib  import urllib2 +import warnings  import zlib +if os.name == 'nt': +	import ctypes + +try: +	import email.utils +except ImportError: # Python 2.4 +	import email.Utils +try: +	import cStringIO as StringIO +except ImportError: +	import StringIO +  # parse_qs was moved from the cgi module to the urlparse module recently.  try:  	from urlparse import parse_qs  except ImportError:  	from cgi import parse_qs +try: +	import lxml.etree +except ImportError: +	pass # Handled below + +try: +	import xml.etree.ElementTree +except ImportError: # Python<2.5: Not officially supported, but let it slip +	warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.') +  std_headers = { -	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11', +	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',  	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',  	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',  	'Accept-Encoding': 'gzip, deflate', @@ -47,6 +79,119 @@ std_headers = {  simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') +try: +	import json +except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson): +	import re +	class json(object): +		@staticmethod +		def loads(s): +			s = s.decode('UTF-8') +			def raiseError(msg, i): +				raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:])) +			def skipSpace(i, expectMore=True): +				while i < len(s) and s[i] in ' \t\r\n': +					i += 1 +				if expectMore: +					if i >= len(s): +						raiseError('Premature end', i) +				return i +			def decodeEscape(match): +				esc = match.group(1) +				_STATIC = { +					'"': '"', +					'\\': '\\', +					'/': '/', +					'b': unichr(0x8), +					'f': unichr(0xc), +					'n': '\n', +					'r': '\r', +					't': '\t', +				} +				if esc in _STATIC: +					return _STATIC[esc] +				if esc[0] == 'u': +					if len(esc) == 1+4: +						return unichr(int(esc[1:5], 16)) +					if len(esc) == 5+6 and esc[5:7] == '\\u': +						hi = int(esc[1:5], 16) +						low = int(esc[7:11], 16) +						return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000) +				raise ValueError('Unknown escape ' + str(esc)) +			def parseString(i): +				i += 1 +				e = i +				while True: +					e = s.index('"', e) +					bslashes = 0 +					while s[e-bslashes-1] == '\\': +						bslashes += 1 +					if bslashes % 2 == 1: +						e += 1 +						continue +					break +				rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)') +				stri = rexp.sub(decodeEscape, s[i:e]) +				return (e+1,stri) +			def parseObj(i): +				i += 1 +				res = {} +				i = skipSpace(i) +				if s[i] == '}': # Empty dictionary +					return (i+1,res) +				while True: +					if s[i] != '"': +						raiseError('Expected a string object key', i) +					i,key = parseString(i) +					i = skipSpace(i) +					if i >= len(s) or s[i] != ':': +						raiseError('Expected a colon', i) +					i,val = parse(i+1) +					res[key] = val +					i = skipSpace(i) +					if s[i] == '}': +						return (i+1, res) +					if s[i] != ',': +						raiseError('Expected comma or closing curly brace', i) +					i = skipSpace(i+1) +			def parseArray(i): +				res = [] +				i = skipSpace(i+1) +				if s[i] == ']': # Empty array +					return (i+1,res) +				while True: +					i,val = parse(i) +					res.append(val) +					i = skipSpace(i) # Raise exception if premature end +					if s[i] == ']': +						return (i+1, res) +					if s[i] != ',': +						raiseError('Expected a comma or closing bracket', i) +					i = skipSpace(i+1) +			def parseDiscrete(i): +				for k,v in {'true': True, 'false': False, 'null': None}.items(): +					if s.startswith(k, i): +						return (i+len(k), v) +				raiseError('Not a boolean (or null)', i) +			def parseNumber(i): +				mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:]) +				if mobj is None: +					raiseError('Not a number', i) +				nums = mobj.group(1) +				if '.' in nums or 'e' in nums or 'E' in nums: +					return (i+len(nums), float(nums)) +				return (i+len(nums), int(nums)) +			CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete} +			def parse(i): +				i = skipSpace(i) +				i,res = CHARMAP.get(s[i], parseNumber)(i) +				i = skipSpace(i, False) +				return (i,res) +			i,res = parse(0) +			if i < len(s): +				raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')') +			return res +  def preferredencoding():  	"""Get preferred encoding. @@ -63,6 +208,7 @@ def preferredencoding():  			yield pref  	return yield_preferredencoding().next() +  def htmlentity_transform(matchobj):  	"""Transforms an HTML entity to a Unicode character. @@ -89,11 +235,13 @@ def htmlentity_transform(matchobj):  	# Unknown entity in name, return its literal representation  	return (u'&%s;' % entity) +  def sanitize_title(utitle):  	"""Sanitizes a video title so it could be used as part of a filename."""  	utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)  	return utitle.replace(unicode(os.sep), u'%') +  def sanitize_open(filename, open_mode):  	"""Try to open the given filename, and slightly tweak it if this fails. @@ -120,13 +268,15 @@ def sanitize_open(filename, open_mode):  		stream = open(filename, open_mode)  		return (stream, filename) +  def timeconvert(timestr): -    """Convert RFC 2822 defined time string into system timestamp""" -    timestamp = None -    timetuple = email.utils.parsedate_tz(timestr) -    if timetuple is not None: -        timestamp = email.utils.mktime_tz(timetuple) -    return timestamp +	"""Convert RFC 2822 defined time string into system timestamp""" +	timestamp = None +	timetuple = email.utils.parsedate_tz(timestr) +	if timetuple is not None: +		timestamp = email.utils.mktime_tz(timetuple) +	return timestamp +  class DownloadError(Exception):  	"""Download Error exception. @@ -137,6 +287,7 @@ class DownloadError(Exception):  	"""  	pass +  class SameFileError(Exception):  	"""Same File exception. @@ -145,6 +296,7 @@ class SameFileError(Exception):  	"""  	pass +  class PostProcessingError(Exception):  	"""Post Processing exception. @@ -153,6 +305,7 @@ class PostProcessingError(Exception):  	"""  	pass +  class UnavailableVideoError(Exception):  	"""Unavailable Format exception. @@ -161,6 +314,7 @@ class UnavailableVideoError(Exception):  	"""  	pass +  class ContentTooShortError(Exception):  	"""Content Too Short exception. @@ -176,6 +330,7 @@ class ContentTooShortError(Exception):  		self.downloaded = downloaded  		self.expected = expected +  class YoutubeDLHandler(urllib2.HTTPHandler):  	"""Handler for HTTP requests and responses. @@ -185,11 +340,11 @@ class YoutubeDLHandler(urllib2.HTTPHandler):  	a particular request, the original request in the program code only has  	to include the HTTP header "Youtubedl-No-Compression", which will be  	removed before making the real request. -	 +  	Part of this code was copied from: -	  http://techknack.net/python-urllib2-handlers/ -	   +	http://techknack.net/python-urllib2-handlers/ +  	Andrew Rowls, the author of that code, agreed to release it to the  	public domain.  	""" @@ -200,7 +355,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler):  			return zlib.decompress(data, -zlib.MAX_WBITS)  		except zlib.error:  			return zlib.decompress(data) -	 +  	@staticmethod  	def addinfourl_wrapper(stream, headers, url, code):  		if hasattr(urllib2.addinfourl, 'getcode'): @@ -208,7 +363,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler):  		ret = urllib2.addinfourl(stream, headers, url)  		ret.code = code  		return ret -	 +  	def http_request(self, req):  		for h in std_headers:  			if h in req.headers: @@ -234,6 +389,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler):  			resp.msg = old_resp.msg  		return resp +  class FileDownloader(object):  	"""File Downloader class. @@ -282,10 +438,14 @@ class FileDownloader(object):  	noprogress:       Do not print the progress bar.  	playliststart:    Playlist item to start at.  	playlistend:      Playlist item to end at. +	matchtitle:       Download only matching titles. +	rejecttitle:      Reject downloads for matching titles.  	logtostderr:      Log messages to stderr instead of stdout.  	consoletitle:     Display progress in console window's titlebar.  	nopart:           Do not use temporary .part files.  	updatetime:       Use the Last-modified header to set output file timestamps. +	writedescription: Write the video description to a .description file +	writeinfojson:    Write the video description to a .info.json file  	"""  	params = None @@ -305,16 +465,6 @@ class FileDownloader(object):  		self.params = params  	@staticmethod -	def pmkdir(filename): -		"""Create directory components in filename. Similar to Unix "mkdir -p".""" -		components = filename.split(os.sep) -		aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))] -		aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator -		for dir in aggregate: -			if not os.path.exists(dir): -				os.mkdir(dir) - -	@staticmethod  	def format_bytes(bytes):  		if bytes is None:  			return 'N/A' @@ -325,7 +475,7 @@ class FileDownloader(object):  		else:  			exponent = long(math.log(bytes, 1024.0))  		suffix = 'bkMGTPEZY'[exponent] -		converted = float(bytes) / float(1024**exponent) +		converted = float(bytes) / float(1024 ** exponent)  		return '%.2f%s' % (converted, suffix)  	@staticmethod @@ -463,7 +613,7 @@ class FileDownloader(object):  			os.rename(old_filename, new_filename)  		except (IOError, OSError), err:  			self.trouble(u'ERROR: unable to rename file') -	 +  	def try_utime(self, filename, last_modified_hdr):  		"""Try to set the last-modified time of the given file."""  		if last_modified_hdr is None: @@ -475,11 +625,20 @@ class FileDownloader(object):  			return  		filetime = timeconvert(timestr)  		if filetime is None: -			return +			return filetime  		try: -			os.utime(filename,(time.time(), filetime)) +			os.utime(filename, (time.time(), filetime))  		except:  			pass +		return filetime + +	def report_writedescription(self, descfn): +		""" Report that the description file is being written """ +		self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True) + +	def report_writeinfojson(self, infofn): +		""" Report that the metadata file has been written """ +		self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True)  	def report_destination(self, filename):  		"""Report destination filename.""" @@ -539,52 +698,101 @@ class FileDownloader(object):  	def process_info(self, info_dict):  		"""Process a single dictionary returned by an InfoExtractor."""  		filename = self.prepare_filename(info_dict) +		 +		# Forced printings +		if self.params.get('forcetitle', False): +			print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') +		if self.params.get('forceurl', False): +			print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace') +		if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: +			print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') +		if self.params.get('forcedescription', False) and 'description' in info_dict: +			print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') +		if self.params.get('forcefilename', False) and filename is not None: +			print filename.encode(preferredencoding(), 'xmlcharrefreplace') +		if self.params.get('forceformat', False): +			print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace') +  		# Do nothing else if in simulate mode  		if self.params.get('simulate', False): -			# Forced printings -			if self.params.get('forcetitle', False): -				print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') -			if self.params.get('forceurl', False): -				print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace') -			if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: -				print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') -			if self.params.get('forcedescription', False) and 'description' in info_dict: -				print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') -			if self.params.get('forcefilename', False) and filename is not None: -				print filename.encode(preferredencoding(), 'xmlcharrefreplace') -  			return  		if filename is None:  			return + +		matchtitle=self.params.get('matchtitle',False) +		rejecttitle=self.params.get('rejecttitle',False) +		title=info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') +		if matchtitle and not re.search(matchtitle, title, re.IGNORECASE): +			self.to_screen(u'[download] "%s" title did not match pattern "%s"' % (title, matchtitle)) +			return +		if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE): +			self.to_screen(u'[download] "%s" title matched reject pattern "%s"' % (title, rejecttitle)) +			return +			  		if self.params.get('nooverwrites', False) and os.path.exists(filename):  			self.to_stderr(u'WARNING: file exists and will be skipped')  			return  		try: -			self.pmkdir(filename) +			dn = os.path.dirname(filename) +			if dn != '' and not os.path.exists(dn): +				os.makedirs(dn)  		except (OSError, IOError), err: -			self.trouble(u'ERROR: unable to create directories: %s' % str(err)) +			self.trouble(u'ERROR: unable to create directory ' + unicode(err))  			return -		try: -			success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) -		except (OSError, IOError), err: -			raise UnavailableVideoError -		except (urllib2.URLError, httplib.HTTPException, socket.error), err: -			self.trouble(u'ERROR: unable to download video data: %s' % str(err)) -			return -		except (ContentTooShortError, ), err: -			self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) -			return +		if self.params.get('writedescription', False): +			try: +				descfn = filename + '.description' +				self.report_writedescription(descfn) +				descfile = open(descfn, 'wb') +				try: +					descfile.write(info_dict['description'].encode('utf-8')) +				finally: +					descfile.close() +			except (OSError, IOError): +				self.trouble(u'ERROR: Cannot write description file ' + descfn) +				return -		if success: +		if self.params.get('writeinfojson', False): +			infofn = filename + '.info.json' +			self.report_writeinfojson(infofn)  			try: -				self.post_process(filename, info_dict) -			except (PostProcessingError), err: -				self.trouble(u'ERROR: postprocessing: %s' % str(err)) +				json.dump +			except (NameError,AttributeError): +				self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') +				return +			try: +				infof = open(infofn, 'wb') +				try: +					json.dump(info_dict, infof) +				finally: +					infof.close() +			except (OSError, IOError): +				self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)  				return +		if not self.params.get('skip_download', False): +			try: +				success,add_data = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) +				info_dict.update(add_data) +			except (OSError, IOError), err: +				raise UnavailableVideoError +			except (urllib2.URLError, httplib.HTTPException, socket.error), err: +				self.trouble(u'ERROR: unable to download video data: %s' % str(err)) +				return +			except (ContentTooShortError, ), err: +				self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) +				return +	 +			if success: +				try: +					self.post_process(filename, info_dict) +				except (PostProcessingError), err: +					self.trouble(u'ERROR: postprocessing: %s' % str(err)) +					return +  	def download(self, url_list):  		"""Download a given list of URLs."""  		if len(url_list) > 1 and self.fixed_template(): @@ -644,6 +852,11 @@ class FileDownloader(object):  			cursize = os.path.getsize(tmpfilename)  			if prevsize == cursize and retval == 1:  				break +			 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those +			if prevsize == cursize and retval == 2 and cursize > 1024: +				self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') +				retval = 0 +				break  		if retval == 0:  			self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))  			self.try_rename(tmpfilename, filename) @@ -680,7 +893,7 @@ class FileDownloader(object):  		# Request parameters in case of being able to resume  		if self.params.get('continuedl', False) and resume_len != 0:  			self.report_resuming_byte(resume_len) -			request.add_header('Range','bytes=%d-' % resume_len) +			request.add_header('Range', 'bytes=%d-' % resume_len)  			open_mode = 'ab'  		count = 0 @@ -706,7 +919,7 @@ class FileDownloader(object):  					else:  						# Examine the reported length  						if (content_length is not None and -						    (resume_len - 100 < long(content_length) < resume_len + 100)): +								(resume_len - 100 < long(content_length) < resume_len + 100)):  							# The file had already been fully downloaded.  							# Explanation to the above condition: in issue #175 it was revealed that  							# YouTube sometimes adds or removes a few bytes from the end of the file, @@ -751,6 +964,7 @@ class FileDownloader(object):  			if stream is None:  				try:  					(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) +					assert stream is not None  					filename = self.undo_temp_name(tmpfilename)  					self.report_destination(filename)  				except (OSError, IOError), err: @@ -772,6 +986,9 @@ class FileDownloader(object):  			# Apply rate limit  			self.slow_down(start, byte_counter - resume_len) +		if stream is None: +			self.trouble(u'\nERROR: Did not get any data blocks') +			return False  		stream.close()  		self.report_finish()  		if data_len is not None and byte_counter != data_len: @@ -779,10 +996,12 @@ class FileDownloader(object):  		self.try_rename(tmpfilename, filename)  		# Update file modification time +		filetime = None  		if self.params.get('updatetime', True): -			self.try_utime(filename, data.info().get('last-modified', None)) +			filetime = self.try_utime(filename, data.info().get('last-modified', None)) + +		return True, {'filetime': filetime} -		return True  class InfoExtractor(object):  	"""Information Extractor class. @@ -814,9 +1033,8 @@ class InfoExtractor(object):  	description:	One-line video description.  	Subclasses of this one should re-define the _real_initialize() and -	_real_extract() methods, as well as the suitable() static method. -	Probably, they should also be instantiated and added to the main -	downloader. +	_real_extract() methods and define a _VALID_URL regexp. +	Probably, they should also be added to the list of extractors.  	"""  	_ready = False @@ -827,10 +1045,9 @@ class InfoExtractor(object):  		self._ready = False  		self.set_downloader(downloader) -	@staticmethod -	def suitable(url): +	def suitable(self, url):  		"""Receives a URL and returns True if suitable for this IE.""" -		return False +		return re.match(self._VALID_URL, url) is not None  	def initialize(self):  		"""Initializes an instance (authentication, etc).""" @@ -855,16 +1072,17 @@ class InfoExtractor(object):  		"""Real extraction process. Redefine in subclasses."""  		pass +  class YoutubeIE(InfoExtractor):  	"""Information extractor for youtube.com.""" -	_VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' +	_VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'  	_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'  	_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'  	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'  	_NETRC_MACHINE = 'youtube'  	# Listed in order of quality -	_available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13'] +	_available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13']  	_video_extensions = {  		'13': '3gp',  		'17': 'mp4', @@ -875,10 +1093,7 @@ class YoutubeIE(InfoExtractor):  		'43': 'webm',  		'45': 'webm',  	} - -	@staticmethod -	def suitable(url): -		return (re.match(YoutubeIE._VALID_URL, url) is not None) +	IE_NAME = u'youtube'  	def report_lang(self):  		"""Report attempt to set language.""" @@ -1009,7 +1224,7 @@ class YoutubeIE(InfoExtractor):  		self.report_video_info_webpage_download(video_id)  		for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:  			video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' -					   % (video_id, el_type)) +					% (video_id, el_type))  			request = urllib2.Request(video_info_url)  			try:  				video_info_webpage = urllib2.urlopen(request).read() @@ -1067,11 +1282,19 @@ class YoutubeIE(InfoExtractor):  					pass  		# description -		video_description = 'No description available.' -		if self._downloader.params.get('forcedescription', False): -			mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage) -			if mobj is not None: -				video_description = mobj.group(1) +		try: +			lxml.etree +		except NameError: +			video_description = u'No description available.' +			if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False): +				mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage) +				if mobj is not None: +					video_description = mobj.group(1).decode('utf-8') +		else: +			html_parser = lxml.etree.HTMLParser(encoding='utf-8') +			vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser) +			video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()')) +			# TODO use another parser  		# token  		video_token = urllib.unquote_plus(video_info['token'][0]) @@ -1079,8 +1302,15 @@ class YoutubeIE(InfoExtractor):  		# Decide which formats to download  		req_format = self._downloader.params.get('format', None) -		if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1 and ',' in video_info['fmt_url_map'][0]: -			url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) +		if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): +			self.report_rtmp_download() +			video_url_list = [(None, video_info['conn'][0])] +		elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: +			url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') +			url_data = [parse_qs(uds) for uds in url_data_strs] +			url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data) +			url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data) +  			format_limit = self._downloader.params.get('format_limit', None)  			if format_limit is not None and format_limit in self._available_formats:  				format_list = self._available_formats[self._available_formats.index(format_limit):] @@ -1092,6 +1322,8 @@ class YoutubeIE(InfoExtractor):  				return  			if req_format is None:  				video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality +			elif req_format == 'worst': +				video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality  			elif req_format == '-1':  				video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats  			else: @@ -1100,13 +1332,8 @@ class YoutubeIE(InfoExtractor):  					self._downloader.trouble(u'ERROR: requested format not available')  					return  				video_url_list = [(req_format, url_map[req_format])] # Specific format - -		elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): -			self.report_rtmp_download() -			video_url_list = [(None, video_info['conn'][0])] -  		else: -			self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') +			self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')  			return  		for format_param, video_real_url in video_url_list: @@ -1116,7 +1343,6 @@ class YoutubeIE(InfoExtractor):  			# Extension  			video_extension = self._video_extensions.get(format_param, 'flv') -			# Find the video URL in fmt_url_map or conn paramters  			try:  				# Process video information  				self._downloader.process_info({ @@ -1129,7 +1355,7 @@ class YoutubeIE(InfoExtractor):  					'ext':		video_extension.decode('utf-8'),  					'format':	(format_param is None and u'NA' or format_param.decode('utf-8')),  					'thumbnail':	video_thumbnail.decode('utf-8'), -					'description':	video_description.decode('utf-8'), +					'description':	video_description,  					'player_url':	player_url,  				})  			except UnavailableVideoError, err: @@ -1143,15 +1369,12 @@ class MetacafeIE(InfoExtractor):  	_DISCLAIMER = 'http://www.metacafe.com/family_filter/'  	_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'  	_youtube_ie = None +	IE_NAME = u'metacafe'  	def __init__(self, youtube_ie, downloader=None):  		InfoExtractor.__init__(self, downloader)  		self._youtube_ie = youtube_ie -	@staticmethod -	def suitable(url): -		return (re.match(MetacafeIE._VALID_URL, url) is not None) -  	def report_disclaimer(self):  		"""Report disclaimer retrieval."""  		self._downloader.to_screen(u'[metacafe] Retrieving disclaimer') @@ -1285,14 +1508,11 @@ class DailymotionIE(InfoExtractor):  	"""Information Extractor for Dailymotion"""  	_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' +	IE_NAME = u'dailymotion'  	def __init__(self, downloader=None):  		InfoExtractor.__init__(self, downloader) -	@staticmethod -	def suitable(url): -		return (re.match(DailymotionIE._VALID_URL, url) is not None) -  	def report_download_webpage(self, video_id):  		"""Report webpage download."""  		self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id) @@ -1320,6 +1540,7 @@ class DailymotionIE(InfoExtractor):  		# Retrieve video webpage to extract further information  		request = urllib2.Request(url) +		request.add_header('Cookie', 'family_filter=off')  		try:  			self.report_download_webpage(video_id)  			webpage = urllib2.urlopen(request).read() @@ -1329,25 +1550,29 @@ class DailymotionIE(InfoExtractor):  		# Extract URL, uploader and title from webpage  		self.report_extraction(video_id) -		mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage) +		mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage)  		if mobj is None:  			self._downloader.trouble(u'ERROR: unable to extract media URL')  			return -		mediaURL = urllib.unquote(mobj.group(1)) +		sequence = urllib.unquote(mobj.group(1)) +		mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract media URL') +			return +		mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')  		# if needed add http://www.dailymotion.com/ if relative URL  		video_url = mediaURL -		# '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>' -		mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage) +		mobj = re.search(r'(?im)<title>Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?</title>', webpage)  		if mobj is None:  			self._downloader.trouble(u'ERROR: unable to extract title')  			return  		video_title = mobj.group(1).decode('utf-8')  		video_title = sanitize_title(video_title) -		mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage) +		mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)  		if mobj is None:  			self._downloader.trouble(u'ERROR: unable to extract uploader nickname')  			return @@ -1369,18 +1594,16 @@ class DailymotionIE(InfoExtractor):  		except UnavailableVideoError:  			self._downloader.trouble(u'\nERROR: unable to download video') +  class GoogleIE(InfoExtractor):  	"""Information extractor for video.google.com."""  	_VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*' +	IE_NAME = u'video.google'  	def __init__(self, downloader=None):  		InfoExtractor.__init__(self, downloader) -	@staticmethod -	def suitable(url): -		return (re.match(GoogleIE._VALID_URL, url) is not None) -  	def report_download_webpage(self, video_id):  		"""Report webpage download."""  		self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id) @@ -1462,7 +1685,6 @@ class GoogleIE(InfoExtractor):  		else:	# we need something to pass to process_info  			video_thumbnail = '' -  		try:  			# Process video information  			self._downloader.process_info({ @@ -1484,14 +1706,11 @@ class PhotobucketIE(InfoExtractor):  	"""Information extractor for photobucket.com."""  	_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' +	IE_NAME = u'photobucket'  	def __init__(self, downloader=None):  		InfoExtractor.__init__(self, downloader) -	@staticmethod -	def suitable(url): -		return (re.match(PhotobucketIE._VALID_URL, url) is not None) -  	def report_download_webpage(self, video_id):  		"""Report webpage download."""  		self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id) @@ -1569,14 +1788,11 @@ class YahooIE(InfoExtractor):  	# _VPAGE_URL matches only the extractable '/watch/' URLs  	_VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'  	_VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' +	IE_NAME = u'video.yahoo'  	def __init__(self, downloader=None):  		InfoExtractor.__init__(self, downloader) -	@staticmethod -	def suitable(url): -		return (re.match(YahooIE._VALID_URL, url) is not None) -  	def report_download_webpage(self, video_id):  		"""Report webpage download."""  		self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id) @@ -1662,7 +1878,8 @@ class YahooIE(InfoExtractor):  			self._downloader.trouble(u'ERROR: unable to extract video description')  			return  		video_description = mobj.group(1).decode('utf-8') -		if not video_description: video_description = 'No description available.' +		if not video_description: +			video_description = 'No description available.'  		# Extract video height and width  		mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage) @@ -1683,8 +1900,8 @@ class YahooIE(InfoExtractor):  		yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents  		yv_bitrate = '700'  # according to Wikipedia this is hard-coded  		request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + -				          '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + -					  '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') +				'&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + +				'&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')  		try:  			self.report_download_webpage(video_id)  			webpage = urllib2.urlopen(request).read() @@ -1713,23 +1930,134 @@ class YahooIE(InfoExtractor):  				'thumbnail':	video_thumbnail.decode('utf-8'),  				'description':	video_description,  				'thumbnail':	video_thumbnail, -				'description':	video_description,  				'player_url':	None,  			})  		except UnavailableVideoError:  			self._downloader.trouble(u'\nERROR: unable to download video') +class VimeoIE(InfoExtractor): +	"""Information extractor for vimeo.com.""" + +	# _VALID_URL matches Vimeo URLs +	_VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)' +	IE_NAME = u'vimeo' + +	def __init__(self, downloader=None): +		InfoExtractor.__init__(self, downloader) + +	def report_download_webpage(self, video_id): +		"""Report webpage download.""" +		self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id) + +	def report_extraction(self, video_id): +		"""Report information extraction.""" +		self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id) + +	def _real_initialize(self): +		return + +	def _real_extract(self, url, new_video=True): +		# Extract ID from URL +		mobj = re.match(self._VALID_URL, url) +		if mobj is None: +			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) +			return + +		# At this point we have a new video +		self._downloader.increment_downloads() +		video_id = mobj.group(1) + +		# Retrieve video webpage to extract further information +		request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers) +		try: +			self.report_download_webpage(video_id) +			webpage = urllib2.urlopen(request).read() +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) +			return + +		# Now we begin extracting as much information as we can from what we +		# retrieved. First we extract the information common to all extractors, +		# and latter we extract those that are Vimeo specific. +		self.report_extraction(video_id) + +		# Extract title +		mobj = re.search(r'<caption>(.*?)</caption>', webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract video title') +			return +		video_title = mobj.group(1).decode('utf-8') +		simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + +		# Extract uploader +		mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract video uploader') +			return +		video_uploader = mobj.group(1).decode('utf-8') + +		# Extract video thumbnail +		mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract video thumbnail') +			return +		video_thumbnail = mobj.group(1).decode('utf-8') + +		# # Extract video description +		# mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage) +		# if mobj is None: +		# 	self._downloader.trouble(u'ERROR: unable to extract video description') +		# 	return +		# video_description = mobj.group(1).decode('utf-8') +		# if not video_description: video_description = 'No description available.' +		video_description = 'Foo.' + +		# Vimeo specific: extract request signature +		mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract request signature') +			return +		sig = mobj.group(1).decode('utf-8') + +		# Vimeo specific: Extract request signature expiration +		mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract request signature expiration') +			return +		sig_exp = mobj.group(1).decode('utf-8') + +		video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp) + +		try: +			# Process video information +			self._downloader.process_info({ +				'id':		video_id.decode('utf-8'), +				'url':		video_url, +				'uploader':	video_uploader, +				'upload_date':	u'NA', +				'title':	video_title, +				'stitle':	simple_title, +				'ext':		u'mp4', +				'thumbnail':	video_thumbnail.decode('utf-8'), +				'description':	video_description, +				'thumbnail':	video_thumbnail, +				'description':	video_description, +				'player_url':	None, +			}) +		except UnavailableVideoError: +			self._downloader.trouble(u'ERROR: unable to download video') + +  class GenericIE(InfoExtractor):  	"""Generic last-resort information extractor.""" +	_VALID_URL = r'.*' +	IE_NAME = u'generic' +  	def __init__(self, downloader=None):  		InfoExtractor.__init__(self, downloader) -	@staticmethod -	def suitable(url): -		return True -  	def report_download_webpage(self, video_id):  		"""Report webpage download."""  		self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.') @@ -1777,11 +2105,11 @@ class GenericIE(InfoExtractor):  			return  		video_url = urllib.unquote(mobj.group(1)) -		video_id  = os.path.basename(video_url) +		video_id = os.path.basename(video_url)  		# here's a fun little line of code for you:  		video_extension = os.path.splitext(video_id)[1][1:] -		video_id        = os.path.splitext(video_id)[0] +		video_id = os.path.splitext(video_id)[0]  		# it's tempting to parse this further, but you would  		# have to take into account all the variations like @@ -1823,21 +2151,18 @@ class GenericIE(InfoExtractor):  class YoutubeSearchIE(InfoExtractor):  	"""Information Extractor for YouTube search queries.""" -	_VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' +	_VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'  	_TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'  	_VIDEO_INDICATOR = r'href="/watch\?v=.+?"'  	_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'  	_youtube_ie = None  	_max_youtube_results = 1000 +	IE_NAME = u'youtube:search'  	def __init__(self, youtube_ie, downloader=None):  		InfoExtractor.__init__(self, downloader)  		self._youtube_ie = youtube_ie -	@staticmethod -	def suitable(url): -		return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None) -  	def report_download_page(self, query, pagenum):  		"""Report attempt to download playlist page with given number."""  		query = query.decode(preferredencoding()) @@ -1847,14 +2172,14 @@ class YoutubeSearchIE(InfoExtractor):  		self._youtube_ie.initialize()  	def _real_extract(self, query): -		mobj = re.match(self._VALID_QUERY, query) +		mobj = re.match(self._VALID_URL, query)  		if mobj is None:  			self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)  			return  		prefix, query = query.split(':')  		prefix = prefix[8:] -		query  = query.encode('utf-8') +		query = query.encode('utf-8')  		if prefix == '':  			self._download_n_results(query, 1)  			return @@ -1868,7 +2193,7 @@ class YoutubeSearchIE(InfoExtractor):  					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))  					return  				elif n > self._max_youtube_results: -					self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n)) +					self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))  					n = self._max_youtube_results  				self._download_n_results(query, n)  				return @@ -1912,23 +2237,21 @@ class YoutubeSearchIE(InfoExtractor):  			pagenum = pagenum + 1 +  class GoogleSearchIE(InfoExtractor):  	"""Information Extractor for Google Video search queries.""" -	_VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+' +	_VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+'  	_TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'  	_VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'  	_MORE_PAGES_INDICATOR = r'<span>Next</span>'  	_google_ie = None  	_max_google_results = 1000 +	IE_NAME = u'video.google:search'  	def __init__(self, google_ie, downloader=None):  		InfoExtractor.__init__(self, downloader)  		self._google_ie = google_ie -	@staticmethod -	def suitable(url): -		return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None) -  	def report_download_page(self, query, pagenum):  		"""Report attempt to download playlist page with given number."""  		query = query.decode(preferredencoding()) @@ -1938,14 +2261,14 @@ class GoogleSearchIE(InfoExtractor):  		self._google_ie.initialize()  	def _real_extract(self, query): -		mobj = re.match(self._VALID_QUERY, query) +		mobj = re.match(self._VALID_URL, query)  		if mobj is None:  			self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)  			return  		prefix, query = query.split(':')  		prefix = prefix[8:] -		query  = query.encode('utf-8') +		query = query.encode('utf-8')  		if prefix == '':  			self._download_n_results(query, 1)  			return @@ -1959,7 +2282,7 @@ class GoogleSearchIE(InfoExtractor):  					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))  					return  				elif n > self._max_google_results: -					self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n)) +					self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))  					n = self._max_google_results  				self._download_n_results(query, n)  				return @@ -2003,23 +2326,21 @@ class GoogleSearchIE(InfoExtractor):  			pagenum = pagenum + 1 +  class YahooSearchIE(InfoExtractor):  	"""Information Extractor for Yahoo! Video search queries.""" -	_VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+' +	_VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'  	_TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'  	_VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'  	_MORE_PAGES_INDICATOR = r'\s*Next'  	_yahoo_ie = None  	_max_yahoo_results = 1000 +	IE_NAME = u'video.yahoo:search'  	def __init__(self, yahoo_ie, downloader=None):  		InfoExtractor.__init__(self, downloader)  		self._yahoo_ie = yahoo_ie -	@staticmethod -	def suitable(url): -		return (re.match(YahooSearchIE._VALID_QUERY, url) is not None) -  	def report_download_page(self, query, pagenum):  		"""Report attempt to download playlist page with given number."""  		query = query.decode(preferredencoding()) @@ -2029,14 +2350,14 @@ class YahooSearchIE(InfoExtractor):  		self._yahoo_ie.initialize()  	def _real_extract(self, query): -		mobj = re.match(self._VALID_QUERY, query) +		mobj = re.match(self._VALID_URL, query)  		if mobj is None:  			self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)  			return  		prefix, query = query.split(':')  		prefix = prefix[8:] -		query  = query.encode('utf-8') +		query = query.encode('utf-8')  		if prefix == '':  			self._download_n_results(query, 1)  			return @@ -2050,7 +2371,7 @@ class YahooSearchIE(InfoExtractor):  					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))  					return  				elif n > self._max_yahoo_results: -					self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n)) +					self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))  					n = self._max_yahoo_results  				self._download_n_results(query, n)  				return @@ -2094,23 +2415,21 @@ class YahooSearchIE(InfoExtractor):  			pagenum = pagenum + 1 +  class YoutubePlaylistIE(InfoExtractor):  	"""Information Extractor for YouTube playlists.""" -	_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' +	_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'  	_TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'  	_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'  	_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'  	_youtube_ie = None +	IE_NAME = u'youtube:playlist'  	def __init__(self, youtube_ie, downloader=None):  		InfoExtractor.__init__(self, downloader)  		self._youtube_ie = youtube_ie -	@staticmethod -	def suitable(url): -		return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None) -  	def report_download_page(self, playlist_id, pagenum):  		"""Report attempt to download playlist page with given number."""  		self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) @@ -2170,6 +2489,7 @@ class YoutubePlaylistIE(InfoExtractor):  			self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)  		return +  class YoutubeUserIE(InfoExtractor):  	"""Information Extractor for YouTube users.""" @@ -2179,19 +2499,16 @@ class YoutubeUserIE(InfoExtractor):  	_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'  	_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'  	_youtube_ie = None +	IE_NAME = u'youtube:user'  	def __init__(self, youtube_ie, downloader=None):  		InfoExtractor.__init__(self, downloader)  		self._youtube_ie = youtube_ie -	@staticmethod -	def suitable(url): -		return (re.match(YoutubeUserIE._VALID_URL, url) is not None) -  	def report_download_page(self, username, start_index):  		"""Report attempt to download user page."""  		self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % -				           (username, start_index, start_index + self._GDATA_PAGE_SIZE)) +				(username, start_index, start_index + self._GDATA_PAGE_SIZE))  	def _real_initialize(self):  		self._youtube_ie.initialize() @@ -2253,9 +2570,9 @@ class YoutubeUserIE(InfoExtractor):  			video_ids = video_ids[playliststart:]  		else:  			video_ids = video_ids[playliststart:playlistend] -			 +  		self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" % -				           (username, all_ids_count, len(video_ids))) +				(username, all_ids_count, len(video_ids)))  		for video_id in video_ids:  			self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id) @@ -2265,14 +2582,11 @@ class DepositFilesIE(InfoExtractor):  	"""Information extractor for depositfiles.com"""  	_VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)' +	IE_NAME = u'DepositFiles'  	def __init__(self, downloader=None):  		InfoExtractor.__init__(self, downloader) -	@staticmethod -	def suitable(url): -		return (re.match(DepositFilesIE._VALID_URL, url) is not None) -  	def report_download_webpage(self, file_id):  		"""Report webpage download."""  		self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id) @@ -2340,6 +2654,7 @@ class DepositFilesIE(InfoExtractor):  		except UnavailableVideoError, err:  			self._downloader.trouble(u'ERROR: unable to download file') +  class FacebookIE(InfoExtractor):  	"""Information Extractor for Facebook""" @@ -2351,14 +2666,11 @@ class FacebookIE(InfoExtractor):  		'highqual': 'mp4',  		'lowqual': 'mp4',  	} +	IE_NAME = u'facebook'  	def __init__(self, downloader=None):  		InfoExtractor.__init__(self, downloader) -	@staticmethod -	def suitable(url): -		return (re.match(FacebookIE._VALID_URL, url) is not None) -  	def _reporter(self, message):  		"""Add header and report message."""  		self._downloader.to_screen(u'[facebook] %s' % message) @@ -2506,10 +2818,7 @@ class FacebookIE(InfoExtractor):  					pass  		# description -		video_description = 'No description available.' -		if (self._downloader.params.get('forcedescription', False) and -		    'description' in video_info): -			video_description = video_info['description'] +		video_description = video_info.get('description', 'No description available.')  		url_map = video_info['video_urls']  		if len(url_map.keys()) > 0: @@ -2527,6 +2836,8 @@ class FacebookIE(InfoExtractor):  				return  			if req_format is None:  				video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality +			elif req_format == 'worst': +				video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality  			elif req_format == '-1':  				video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats  			else: @@ -2544,7 +2855,6 @@ class FacebookIE(InfoExtractor):  			# Extension  			video_extension = self._video_extensions.get(format_param, 'mp4') -			# Find the video URL in fmt_url_map or conn paramters  			try:  				# Process video information  				self._downloader.process_info({ @@ -2563,6 +2873,377 @@ class FacebookIE(InfoExtractor):  			except UnavailableVideoError, err:  				self._downloader.trouble(u'\nERROR: unable to download video') +class BlipTVIE(InfoExtractor): +	"""Information extractor for blip.tv""" + +	_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$' +	_URL_EXT = r'^.*\.([a-z0-9]+)$' +	IE_NAME = u'blip.tv' + +	def report_extraction(self, file_id): +		"""Report information extraction.""" +		self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id) + +	def _simplify_title(self, title): +		res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) +		res = res.strip(ur'_') +		return res + +	def _real_extract(self, url): +		mobj = re.match(self._VALID_URL, url) +		if mobj is None: +			self._downloader.trouble(u'ERROR: invalid URL: %s' % url) +			return + +		if '?' in url: +			cchar = '&' +		else: +			cchar = '?' +		json_url = url + cchar + 'skin=json&version=2&no_wrap=1' +		request = urllib2.Request(json_url) +		self.report_extraction(mobj.group(1)) +		try: +			json_code = urllib2.urlopen(request).read() +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) +			return +		try: +			json_data = json.loads(json_code) +			if 'Post' in json_data: +				data = json_data['Post'] +			else: +				data = json_data + +			upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') +			video_url = data['media']['url'] +			umobj = re.match(self._URL_EXT, video_url) +			if umobj is None: +				raise ValueError('Can not determine filename extension') +			ext = umobj.group(1) + +			self._downloader.increment_downloads() + +			info = { +				'id': data['item_id'], +				'url': video_url, +				'uploader': data['display_name'], +				'upload_date': upload_date, +				'title': data['title'], +				'stitle': self._simplify_title(data['title']), +				'ext': ext, +				'format': data['media']['mimeType'], +				'thumbnail': data['thumbnailUrl'], +				'description': data['description'], +				'player_url': data['embedUrl'] +			} +		except (ValueError,KeyError), err: +			self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err)) +			return + +		try: +			self._downloader.process_info(info) +		except UnavailableVideoError, err: +			self._downloader.trouble(u'\nERROR: unable to download video') + + +class MyVideoIE(InfoExtractor): +	"""Information Extractor for myvideo.de.""" + +	_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' +	IE_NAME = u'myvideo' + +	def __init__(self, downloader=None): +		InfoExtractor.__init__(self, downloader) +	 +	def report_download_webpage(self, video_id): +		"""Report webpage download.""" +		self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id) + +	def report_extraction(self, video_id): +		"""Report information extraction.""" +		self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id) + +	def _real_initialize(self): +		return + +	def _real_extract(self,url): +		mobj = re.match(self._VALID_URL, url) +		if mobj is None: +			self._download.trouble(u'ERROR: invalid URL: %s' % url) +			return + +		video_id = mobj.group(1) +		simple_title = mobj.group(2).decode('utf-8') +		# should actually not be necessary +		simple_title = sanitize_title(simple_title) +		simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title) + +		# Get video webpage +		request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id) +		try: +			self.report_download_webpage(video_id) +			webpage = urllib2.urlopen(request).read() +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) +			return + +		self.report_extraction(video_id) +		mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />', +				 webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract media URL') +			return +		video_url = mobj.group(1) + ('/%s.flv' % video_id) + +		mobj = re.search('<title>([^<]+)</title>', webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract title') +			return + +		video_title = mobj.group(1) +		video_title = sanitize_title(video_title) + +		try: +			print(video_url) +			self._downloader.process_info({ +				'id':		video_id, +				'url':		video_url, +				'uploader':	u'NA', +				'upload_date':  u'NA', +				'title':	video_title, +				'stitle':	simple_title, +				'ext':		u'flv', +				'format':	u'NA', +				'player_url':	None, +			}) +		except UnavailableVideoError: +			self._downloader.trouble(u'\nERROR: Unable to download video') + +class ComedyCentralIE(InfoExtractor): +	"""Information extractor for The Daily Show and Colbert Report """ + +	_VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$' +	IE_NAME = u'comedycentral' + +	def report_extraction(self, episode_id): +		self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id) +	 +	def report_config_download(self, episode_id): +		self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id) + +	def report_index_download(self, episode_id): +		self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id) + +	def report_player_url(self, episode_id): +		self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id) + +	def _simplify_title(self, title): +		res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) +		res = res.strip(ur'_') +		return res + +	def _real_extract(self, url): +		mobj = re.match(self._VALID_URL, url) +		if mobj is None: +			self._downloader.trouble(u'ERROR: invalid URL: %s' % url) +			return + +		if mobj.group('shortname'): +			if mobj.group('shortname') in ('tds', 'thedailyshow'): +				url = 'http://www.thedailyshow.com/full-episodes/' +			else: +				url = 'http://www.colbertnation.com/full-episodes/' +			mobj = re.match(self._VALID_URL, url) +			assert mobj is not None + +		dlNewest = not mobj.group('episode') +		if dlNewest: +			epTitle = mobj.group('showname') +		else: +			epTitle = mobj.group('episode') + +		req = urllib2.Request(url) +		self.report_extraction(epTitle) +		try: +			htmlHandle = urllib2.urlopen(req) +			html = htmlHandle.read() +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) +			return +		if dlNewest: +			url = htmlHandle.geturl() +			mobj = re.match(self._VALID_URL, url) +			if mobj is None: +				self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url) +				return +			if mobj.group('episode') == '': +				self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url) +				return +			epTitle = mobj.group('episode') + +		mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"/>', html) +		if len(mMovieParams) == 0: +			self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) +			return + +		playerUrl_raw = mMovieParams[0][0] +		self.report_player_url(epTitle) +		try: +			urlHandle = urllib2.urlopen(playerUrl_raw) +			playerUrl = urlHandle.geturl() +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err)) +			return + +		uri = mMovieParams[0][1] +		indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri}) +		self.report_index_download(epTitle) +		try: +			indexXml = urllib2.urlopen(indexUrl).read() +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err)) +			return + +		idoc = xml.etree.ElementTree.fromstring(indexXml) +		itemEls = idoc.findall('.//item') +		for itemEl in itemEls: +			mediaId = itemEl.findall('./guid')[0].text +			shortMediaId = mediaId.split(':')[-1] +			showId = mediaId.split(':')[-2].replace('.com', '') +			officialTitle = itemEl.findall('./title')[0].text +			officialDate = itemEl.findall('./pubDate')[0].text + +			configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + +						urllib.urlencode({'uri': mediaId})) +			configReq = urllib2.Request(configUrl) +			self.report_config_download(epTitle) +			try: +				configXml = urllib2.urlopen(configReq).read() +			except (urllib2.URLError, httplib.HTTPException, socket.error), err: +				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) +				return + +			cdoc = xml.etree.ElementTree.fromstring(configXml) +			turls = [] +			for rendition in cdoc.findall('.//rendition'): +				finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) +				turls.append(finfo) + +			if len(turls) == 0: +				self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found') +				continue + +			# For now, just pick the highest bitrate +			format,video_url = turls[-1] + +			self._downloader.increment_downloads() + +			effTitle = showId + '-' + epTitle +			info = { +				'id': shortMediaId, +				'url': video_url, +				'uploader': showId, +				'upload_date': officialDate, +				'title': effTitle, +				'stitle': self._simplify_title(effTitle), +				'ext': 'mp4', +				'format': format, +				'thumbnail': None, +				'description': officialTitle, +				'player_url': playerUrl +			} + +			try: +				self._downloader.process_info(info) +			except UnavailableVideoError, err: +				self._downloader.trouble(u'\nERROR: unable to download ' + mediaId) +				continue + + +class EscapistIE(InfoExtractor): +	"""Information extractor for The Escapist """ + +	_VALID_URL = r'^(https?://)?(www\.)escapistmagazine.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?].*$' +	IE_NAME = u'escapist' + +	def report_extraction(self, showName): +		self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName) + +	def report_config_download(self, showName): +		self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName) + +	def _simplify_title(self, title): +		res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) +		res = res.strip(ur'_') +		return res + +	def _real_extract(self, url): +		htmlParser = HTMLParser.HTMLParser() + +		mobj = re.match(self._VALID_URL, url) +		if mobj is None: +			self._downloader.trouble(u'ERROR: invalid URL: %s' % url) +			return +		showName = mobj.group('showname') +		videoId = mobj.group('episode') + +		self.report_extraction(showName) +		try: +			webPage = urllib2.urlopen(url).read() +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err)) +			return + +		descMatch = re.search('<meta name="description" content="([^"]*)"', webPage) +		description = htmlParser.unescape(descMatch.group(1)) +		imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage) +		imgUrl = htmlParser.unescape(imgMatch.group(1)) +		playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage) +		playerUrl = htmlParser.unescape(playerUrlMatch.group(1)) +		configUrlMatch = re.search('config=(.*)$', playerUrl) +		configUrl = urllib2.unquote(configUrlMatch.group(1)) + +		self.report_config_download(showName) +		try: +			configJSON = urllib2.urlopen(configUrl).read() +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err)) +			return + +		# Technically, it's JavaScript, not JSON +		configJSON = configJSON.replace("'", '"') + +		try: +			config = json.loads(configJSON) +		except (ValueError,), err: +			self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err)) +			return + +		playlist = config['playlist'] +		videoUrl = playlist[1]['url'] + +		self._downloader.increment_downloads() +		info = { +			'id': videoId, +			'url': videoUrl, +			'uploader': showName, +			'upload_date': None, +			'title': showName, +			'stitle': self._simplify_title(showName), +			'ext': 'flv', +			'format': 'flv', +			'thumbnail': imgUrl, +			'description': description, +			'player_url': playerUrl, +		} + +		try: +			self._downloader.process_info(info) +		except UnavailableVideoError, err: +			self._downloader.trouble(u'\nERROR: unable to download ' + videoId) + + +  class PostProcessor(object):  	"""Post Processor class. @@ -2609,16 +3290,13 @@ class PostProcessor(object):  		"""  		return information # by default, do nothing +  class FFmpegExtractAudioPP(PostProcessor): -	def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=None): +	def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):  		PostProcessor.__init__(self, downloader)  		if preferredcodec is None:  			preferredcodec = 'best' -		if preferredquality is None: -			preferredquality = '128K' -		if keepvideo is None: -			keepvideo = False;  		self._preferredcodec = preferredcodec  		self._preferredquality = preferredquality  		self._keepvideo = keepvideo @@ -2659,8 +3337,6 @@ class FFmpegExtractAudioPP(PostProcessor):  			return None  		more_opts = [] -		if (self._preferredquality != '128K') and (self._preferredquality != '160K') and (self._preferredquality != '192K'): -			self._preferredquality = '128K'  		if self._preferredcodec == 'best' or self._preferredcodec == filecodec:  			if filecodec == 'aac' or filecodec == 'mp3':  				# Lossless if possible @@ -2672,12 +3348,16 @@ class FFmpegExtractAudioPP(PostProcessor):  				# MP3 otherwise.  				acodec = 'libmp3lame'  				extension = 'mp3' -				more_opts = ['-ab', self._preferredquality] +				more_opts = [] +				if self._preferredquality is not None: +					more_opts += ['-ab', self._preferredquality]  		else:  			# We convert the audio (lossy)  			acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]  			extension = self._preferredcodec -			more_opts = ['-ab', self._preferredquality] +			more_opts = [] +			if self._preferredquality is not None: +				more_opts += ['-ab', self._preferredquality]  			if self._preferredcodec == 'aac':  				more_opts += ['-f', 'adts'] @@ -2690,6 +3370,13 @@ class FFmpegExtractAudioPP(PostProcessor):  			self._downloader.to_stderr(u'WARNING: error running ffmpeg')  			return None + 		# Try to update the date time for extracted audio file. +		if information.get('filetime') is not None: +			try: +				os.utime(new_path, (time.time(), information['filetime'])) +			except: +				self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file') +  		if not self._keepvideo:  			try:  				os.remove(path) @@ -2700,315 +3387,415 @@ class FFmpegExtractAudioPP(PostProcessor):  		information['filepath'] = new_path  		return information -### MAIN PROGRAM ### -if __name__ == '__main__': + +def updateSelf(downloader, filename): +	''' Update the program file with the latest version from the repository ''' +	# Note: downloader only used for options +	if not os.access(filename, os.W_OK): +		sys.exit('ERROR: no write permissions on %s' % filename) + +	downloader.to_screen('Updating to latest version...') +  	try: -		# Modules needed only when running the main program -		import getpass -		import optparse +		try: +			urlh = urllib.urlopen(UPDATE_URL) +			newcontent = urlh.read() +		finally: +			urlh.close() +	except (IOError, OSError), err: +		sys.exit('ERROR: unable to download latest version') -		# Function to update the program file with the latest version from the repository. -		def update_self(downloader, filename): -			# Note: downloader only used for options -			if not os.access(filename, os.W_OK): -				sys.exit('ERROR: no write permissions on %s' % filename) +	try: +		outf = open(filename, 'wb') +		try: +			outf.write(newcontent) +		finally: +			outf.close() +	except (IOError, OSError), err: +		sys.exit('ERROR: unable to overwrite current version') -			downloader.to_screen('Updating to latest stable version...') -			try: -				latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' -				latest_version = urllib.urlopen(latest_url).read().strip() -				prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version -				newcontent = urllib.urlopen(prog_url).read() -			except (IOError, OSError), err: -				sys.exit('ERROR: unable to download latest version') -			try: -				stream = open(filename, 'w') -				stream.write(newcontent) -				stream.close() -			except (IOError, OSError), err: -				sys.exit('ERROR: unable to overwrite current version') -			downloader.to_screen('Updated to version %s' % latest_version) - -		# Parse command line -		parser = optparse.OptionParser( -			usage='Usage: %prog [options] url...', -			version='2011.03.29', -			conflict_handler='resolve', -		) - -		parser.add_option('-h', '--help', -				action='help', help='print this help text and exit') -		parser.add_option('-v', '--version', -				action='version', help='print program version and exit') -		parser.add_option('-U', '--update', -				action='store_true', dest='update_self', help='update this program to latest stable version') -		parser.add_option('-i', '--ignore-errors', -				action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) -		parser.add_option('-r', '--rate-limit', -				dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') -		parser.add_option('-R', '--retries', -				dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) -		parser.add_option('--playlist-start', -				dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) -		parser.add_option('--playlist-end', -				dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) -		parser.add_option('--dump-user-agent', -				action='store_true', dest='dump_user_agent', -				help='display the current browser identification', default=False) - -		authentication = optparse.OptionGroup(parser, 'Authentication Options') -		authentication.add_option('-u', '--username', -				dest='username', metavar='USERNAME', help='account username') -		authentication.add_option('-p', '--password', -				dest='password', metavar='PASSWORD', help='account password') -		authentication.add_option('-n', '--netrc', -				action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) -		parser.add_option_group(authentication) - -		video_format = optparse.OptionGroup(parser, 'Video Format Options') -		video_format.add_option('-f', '--format', -				action='store', dest='format', metavar='FORMAT', help='video format code') -		video_format.add_option('--all-formats', -				action='store_const', dest='format', help='download all available video formats', const='-1') -		video_format.add_option('--max-quality', -				action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') -		parser.add_option_group(video_format) - -		verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') -		verbosity.add_option('-q', '--quiet', -				action='store_true', dest='quiet', help='activates quiet mode', default=False) -		verbosity.add_option('-s', '--simulate', -				action='store_true', dest='simulate', help='do not download video', default=False) -		verbosity.add_option('-g', '--get-url', -				action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) -		verbosity.add_option('-e', '--get-title', -				action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) -		verbosity.add_option('--get-thumbnail', -				action='store_true', dest='getthumbnail', -				help='simulate, quiet but print thumbnail URL', default=False) -		verbosity.add_option('--get-description', -				action='store_true', dest='getdescription', -				help='simulate, quiet but print video description', default=False) -		verbosity.add_option('--get-filename', -				action='store_true', dest='getfilename', -				help='simulate, quiet but print output filename', default=False) -		verbosity.add_option('--no-progress', -				action='store_true', dest='noprogress', help='do not print progress bar', default=False) -		verbosity.add_option('--console-title', -				action='store_true', dest='consoletitle', -				help='display progress in console titlebar', default=False) -		parser.add_option_group(verbosity) - -		filesystem = optparse.OptionGroup(parser, 'Filesystem Options') -		filesystem.add_option('-t', '--title', -				action='store_true', dest='usetitle', help='use title in file name', default=False) -		filesystem.add_option('-l', '--literal', -				action='store_true', dest='useliteral', help='use literal title in file name', default=False) -		filesystem.add_option('-A', '--auto-number', -				action='store_true', dest='autonumber', -				help='number downloaded files starting from 00000', default=False) -		filesystem.add_option('-o', '--output', -				dest='outtmpl', metavar='TEMPLATE', help='output filename template') -		filesystem.add_option('-a', '--batch-file', -				dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') -		filesystem.add_option('-w', '--no-overwrites', -				action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) -		filesystem.add_option('-c', '--continue', -				action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) -		filesystem.add_option('--cookies', -				dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') -		filesystem.add_option('--no-part', -				action='store_true', dest='nopart', help='do not use .part files', default=False) -		filesystem.add_option('--no-mtime', -				action='store_false', dest='updatetime', -				help='do not use the Last-modified header to set the file modification time', default=True) -		parser.add_option_group(filesystem) - -		postproc = optparse.OptionGroup(parser, 'Post-processing Options') -		postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, -				help='convert video files to audio-only files (requires ffmpeg and ffprobe)') -		postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', -				help='"best", "aac" or "mp3"; best by default') -		postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K', -				help='128K, 160K or 192K; 128K by default') -		postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, -				help='keeps the video file on disk after the post-processing; the video is erased by default') -		parser.add_option_group(postproc) - -		(opts, args) = parser.parse_args() - -		# Open appropriate CookieJar -		if opts.cookiefile is None: -			jar = cookielib.CookieJar() -		else: -			try: -				jar = cookielib.MozillaCookieJar(opts.cookiefile) -				if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): -					jar.load() -			except (IOError, OSError), err: -				sys.exit(u'ERROR: unable to open cookie file') +	downloader.to_screen('Updated youtube-dl. Restart to use the new version.') -		# Dump user agent -		if opts.dump_user_agent: -			print std_headers['User-Agent'] -			sys.exit(0) +def parseOpts(): +	# Deferred imports +	import getpass +	import optparse -		# General configuration -		cookie_processor = urllib2.HTTPCookieProcessor(jar) -		urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())) -		socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) +	def _format_option_string(option): +		''' ('-o', '--option') -> -o, --format METAVAR''' + +		opts = [] + +		if option._short_opts: opts.append(option._short_opts[0]) +		if option._long_opts: opts.append(option._long_opts[0]) +		if len(opts) > 1: opts.insert(1, ', ') + +		if option.takes_value(): opts.append(' %s' % option.metavar) + +		return "".join(opts) + +	def _find_term_columns(): +		columns = os.environ.get('COLUMNS', None) +		if columns: +			return int(columns) -		# Batch file verification -		batchurls = [] -		if opts.batchfile is not None: -			try: -				if opts.batchfile == '-': -					batchfd = sys.stdin -				else: -					batchfd = open(opts.batchfile, 'r') -				batchurls = batchfd.readlines() -				batchurls = [x.strip() for x in batchurls] -				batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] -			except IOError: -				sys.exit(u'ERROR: batch file could not be read') -		all_urls = batchurls + args - -		# Conflicting, missing and erroneous options -		if opts.usenetrc and (opts.username is not None or opts.password is not None): -			parser.error(u'using .netrc conflicts with giving username/password') -		if opts.password is not None and opts.username is None: -			parser.error(u'account username missing') -		if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): -			parser.error(u'using output template conflicts with using title, literal title or auto number') -		if opts.usetitle and opts.useliteral: -			parser.error(u'using title conflicts with using literal title') -		if opts.username is not None and opts.password is None: -			opts.password = getpass.getpass(u'Type account password and press return:') -		if opts.ratelimit is not None: -			numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) -			if numeric_limit is None: -				parser.error(u'invalid rate limit specified') -			opts.ratelimit = numeric_limit -		if opts.retries is not None: -			try: -				opts.retries = long(opts.retries) -			except (TypeError, ValueError), err: -				parser.error(u'invalid retry count specified')  		try: -			opts.playliststart = long(opts.playliststart) -			if opts.playliststart <= 0: -				raise ValueError -		except (TypeError, ValueError), err: -			parser.error(u'invalid playlist start number specified') +			sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) +			out,err = sp.communicate() +			return int(out.split()[1]) +		except: +			pass +		return None + +	max_width = 80 +	max_help_position = 80 + +	# No need to wrap help messages if we're on a wide console +	columns = _find_term_columns() +	if columns: max_width = columns + +	fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) +	fmt.format_option_strings = _format_option_string + +	kw = { +		'version'   : __version__, +		'formatter' : fmt, +		'usage' : '%prog [options] url [url...]', +		'conflict_handler' : 'resolve', +	} + +	parser = optparse.OptionParser(**kw) + +	# option groups +	general        = optparse.OptionGroup(parser, 'General Options') +	selection      = optparse.OptionGroup(parser, 'Video Selection') +	authentication = optparse.OptionGroup(parser, 'Authentication Options') +	video_format   = optparse.OptionGroup(parser, 'Video Format Options') +	postproc       = optparse.OptionGroup(parser, 'Post-processing Options') +	filesystem     = optparse.OptionGroup(parser, 'Filesystem Options') +	verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') + +	general.add_option('-h', '--help', +			action='help', help='print this help text and exit') +	general.add_option('-v', '--version', +			action='version', help='print program version and exit') +	general.add_option('-U', '--update', +			action='store_true', dest='update_self', help='update this program to latest version') +	general.add_option('-i', '--ignore-errors', +			action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) +	general.add_option('-r', '--rate-limit', +			dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') +	general.add_option('-R', '--retries', +			dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) +	general.add_option('--dump-user-agent', +			action='store_true', dest='dump_user_agent', +			help='display the current browser identification', default=False) +	general.add_option('--list-extractors', +			action='store_true', dest='list_extractors', +			help='List all supported extractors and the URLs they would handle', default=False) + +	selection.add_option('--playlist-start', +			dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) +	selection.add_option('--playlist-end', +			dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) +	selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)') +	selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') + +	authentication.add_option('-u', '--username', +			dest='username', metavar='USERNAME', help='account username') +	authentication.add_option('-p', '--password', +			dest='password', metavar='PASSWORD', help='account password') +	authentication.add_option('-n', '--netrc', +			action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) + + +	video_format.add_option('-f', '--format', +			action='store', dest='format', metavar='FORMAT', help='video format code') +	video_format.add_option('--all-formats', +			action='store_const', dest='format', help='download all available video formats', const='-1') +	video_format.add_option('--max-quality', +			action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') + + +	verbosity.add_option('-q', '--quiet', +			action='store_true', dest='quiet', help='activates quiet mode', default=False) +	verbosity.add_option('-s', '--simulate', +			action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False) +	verbosity.add_option('--skip-download', +			action='store_true', dest='skip_download', help='do not download the video', default=False) +	verbosity.add_option('-g', '--get-url', +			action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) +	verbosity.add_option('-e', '--get-title', +			action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) +	verbosity.add_option('--get-thumbnail', +			action='store_true', dest='getthumbnail', +			help='simulate, quiet but print thumbnail URL', default=False) +	verbosity.add_option('--get-description', +			action='store_true', dest='getdescription', +			help='simulate, quiet but print video description', default=False) +	verbosity.add_option('--get-filename', +			action='store_true', dest='getfilename', +			help='simulate, quiet but print output filename', default=False) +	verbosity.add_option('--get-format', +			action='store_true', dest='getformat', +			help='simulate, quiet but print output format', default=False) +	verbosity.add_option('--no-progress', +			action='store_true', dest='noprogress', help='do not print progress bar', default=False) +	verbosity.add_option('--console-title', +			action='store_true', dest='consoletitle', +			help='display progress in console titlebar', default=False) + + +	filesystem.add_option('-t', '--title', +			action='store_true', dest='usetitle', help='use title in file name', default=False) +	filesystem.add_option('-l', '--literal', +			action='store_true', dest='useliteral', help='use literal title in file name', default=False) +	filesystem.add_option('-A', '--auto-number', +			action='store_true', dest='autonumber', +			help='number downloaded files starting from 00000', default=False) +	filesystem.add_option('-o', '--output', +			dest='outtmpl', metavar='TEMPLATE', help='output filename template') +	filesystem.add_option('-a', '--batch-file', +			dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') +	filesystem.add_option('-w', '--no-overwrites', +			action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) +	filesystem.add_option('-c', '--continue', +			action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) +	filesystem.add_option('--cookies', +			dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') +	filesystem.add_option('--no-part', +			action='store_true', dest='nopart', help='do not use .part files', default=False) +	filesystem.add_option('--no-mtime', +			action='store_false', dest='updatetime', +			help='do not use the Last-modified header to set the file modification time', default=True) +	filesystem.add_option('--write-description', +			action='store_true', dest='writedescription', +			help='write video description to a .description file', default=False) +	filesystem.add_option('--write-info-json', +			action='store_true', dest='writeinfojson', +			help='write video metadata to a .info.json file', default=False) + + +	postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, +			help='convert video files to audio-only files (requires ffmpeg and ffprobe)') +	postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', +			help='"best", "aac" or "mp3"; best by default') +	postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K', +			help='ffmpeg audio bitrate specification, 128k by default') +	postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, +			help='keeps the video file on disk after the post-processing; the video is erased by default') + + +	parser.add_option_group(general) +	parser.add_option_group(selection) +	parser.add_option_group(filesystem) +	parser.add_option_group(verbosity) +	parser.add_option_group(video_format) +	parser.add_option_group(authentication) +	parser.add_option_group(postproc) + +	opts, args = parser.parse_args() + +	return parser, opts, args + +def gen_extractors(): +	""" Return a list of an instance of every supported extractor. +	The order does matter; the first extractor matched is the one handling the URL. +	""" +	youtube_ie = YoutubeIE() +	google_ie = GoogleIE() +	yahoo_ie = YahooIE() +	return [ +		youtube_ie, +		MetacafeIE(youtube_ie), +		DailymotionIE(), +		YoutubePlaylistIE(youtube_ie), +		YoutubeUserIE(youtube_ie), +		YoutubeSearchIE(youtube_ie), +		google_ie, +		GoogleSearchIE(google_ie), +		PhotobucketIE(), +		yahoo_ie, +		YahooSearchIE(yahoo_ie), +		DepositFilesIE(), +		FacebookIE(), +		BlipTVIE(), +		VimeoIE(), +		MyVideoIE(), +		ComedyCentralIE(), +		EscapistIE(), + +		GenericIE() +	] + +def main(): +	parser, opts, args = parseOpts() + +	# Open appropriate CookieJar +	if opts.cookiefile is None: +		jar = cookielib.CookieJar() +	else:  		try: -			opts.playlistend = long(opts.playlistend) -			if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): -				raise ValueError -		except (TypeError, ValueError), err: -			parser.error(u'invalid playlist end number specified') -		if opts.extractaudio: -			if opts.audioformat not in ['best', 'aac', 'mp3']: -				parser.error(u'invalid audio format specified') - -		# Information extractors -		youtube_ie = YoutubeIE() -		metacafe_ie = MetacafeIE(youtube_ie) -		dailymotion_ie = DailymotionIE() -		youtube_pl_ie = YoutubePlaylistIE(youtube_ie) -		youtube_user_ie = YoutubeUserIE(youtube_ie) -		youtube_search_ie = YoutubeSearchIE(youtube_ie) -		google_ie = GoogleIE() -		google_search_ie = GoogleSearchIE(google_ie) -		photobucket_ie = PhotobucketIE() -		yahoo_ie = YahooIE() -		yahoo_search_ie = YahooSearchIE(yahoo_ie) -		deposit_files_ie = DepositFilesIE() -		facebook_ie = FacebookIE() -		generic_ie = GenericIE() - -		# File downloader -		fd = FileDownloader({ -			'usenetrc': opts.usenetrc, -			'username': opts.username, -			'password': opts.password, -			'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), -			'forceurl': opts.geturl, -			'forcetitle': opts.gettitle, -			'forcethumbnail': opts.getthumbnail, -			'forcedescription': opts.getdescription, -			'forcefilename': opts.getfilename, -			'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), -			'format': opts.format, -			'format_limit': opts.format_limit, -			'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) -				or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') -				or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') -				or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') -				or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') -				or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') -				or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') -				or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') -				or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') -				or u'%(id)s.%(ext)s'), -			'ignoreerrors': opts.ignoreerrors, -			'ratelimit': opts.ratelimit, -			'nooverwrites': opts.nooverwrites, -			'retries': opts.retries, -			'continuedl': opts.continue_dl, -			'noprogress': opts.noprogress, -			'playliststart': opts.playliststart, -			'playlistend': opts.playlistend, -			'logtostderr': opts.outtmpl == '-', -			'consoletitle': opts.consoletitle, -			'nopart': opts.nopart, -			'updatetime': opts.updatetime, -			}) -		fd.add_info_extractor(youtube_search_ie) -		fd.add_info_extractor(youtube_pl_ie) -		fd.add_info_extractor(youtube_user_ie) -		fd.add_info_extractor(metacafe_ie) -		fd.add_info_extractor(dailymotion_ie) -		fd.add_info_extractor(youtube_ie) -		fd.add_info_extractor(google_ie) -		fd.add_info_extractor(google_search_ie) -		fd.add_info_extractor(photobucket_ie) -		fd.add_info_extractor(yahoo_ie) -		fd.add_info_extractor(yahoo_search_ie) -		fd.add_info_extractor(deposit_files_ie) -		fd.add_info_extractor(facebook_ie) - -		# This must come last since it's the -		# fallback if none of the others work -		fd.add_info_extractor(generic_ie) - -		# PostProcessors -		if opts.extractaudio: -			fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat,preferredquality=opts.audioquality,keepvideo=opts.keepvideo)) - -		# Update version -		if opts.update_self: -			update_self(fd, sys.argv[0]) - -		# Maybe do nothing -		if len(all_urls) < 1: -			if not opts.update_self: -				parser.error(u'you must provide at least one URL') +			jar = cookielib.MozillaCookieJar(opts.cookiefile) +			if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): +				jar.load() +		except (IOError, OSError), err: +			sys.exit(u'ERROR: unable to open cookie file') + +	# Dump user agent +	if opts.dump_user_agent: +		print std_headers['User-Agent'] +		sys.exit(0) + +	# Batch file verification +	batchurls = [] +	if opts.batchfile is not None: +		try: +			if opts.batchfile == '-': +				batchfd = sys.stdin  			else: -				sys.exit() -		retcode = fd.download(all_urls) +				batchfd = open(opts.batchfile, 'r') +			batchurls = batchfd.readlines() +			batchurls = [x.strip() for x in batchurls] +			batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] +		except IOError: +			sys.exit(u'ERROR: batch file could not be read') +	all_urls = batchurls + args + +	# General configuration +	cookie_processor = urllib2.HTTPCookieProcessor(jar) +	opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()) +	urllib2.install_opener(opener) +	socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + +	extractors = gen_extractors() + +	if opts.list_extractors: +		for ie in extractors: +			print(ie.IE_NAME) +			matchedUrls = filter(lambda url: ie.suitable(url), all_urls) +			all_urls = filter(lambda url: url not in matchedUrls, all_urls) +			for mu in matchedUrls: +				print(u'  ' + mu) +		sys.exit(0) + +	# Conflicting, missing and erroneous options +	if opts.usenetrc and (opts.username is not None or opts.password is not None): +		parser.error(u'using .netrc conflicts with giving username/password') +	if opts.password is not None and opts.username is None: +		parser.error(u'account username missing') +	if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): +		parser.error(u'using output template conflicts with using title, literal title or auto number') +	if opts.usetitle and opts.useliteral: +		parser.error(u'using title conflicts with using literal title') +	if opts.username is not None and opts.password is None: +		opts.password = getpass.getpass(u'Type account password and press return:') +	if opts.ratelimit is not None: +		numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) +		if numeric_limit is None: +			parser.error(u'invalid rate limit specified') +		opts.ratelimit = numeric_limit +	if opts.retries is not None: +		try: +			opts.retries = long(opts.retries) +		except (TypeError, ValueError), err: +			parser.error(u'invalid retry count specified') +	try: +		opts.playliststart = int(opts.playliststart) +		if opts.playliststart <= 0: +			raise ValueError(u'Playlist start must be positive') +	except (TypeError, ValueError), err: +		parser.error(u'invalid playlist start number specified') +	try: +		opts.playlistend = int(opts.playlistend) +		if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): +			raise ValueError(u'Playlist end must be greater than playlist start') +	except (TypeError, ValueError), err: +		parser.error(u'invalid playlist end number specified') +	if opts.extractaudio: +		if opts.audioformat not in ['best', 'aac', 'mp3']: +			parser.error(u'invalid audio format specified') + +	# File downloader +	fd = FileDownloader({ +		'usenetrc': opts.usenetrc, +		'username': opts.username, +		'password': opts.password, +		'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), +		'forceurl': opts.geturl, +		'forcetitle': opts.gettitle, +		'forcethumbnail': opts.getthumbnail, +		'forcedescription': opts.getdescription, +		'forcefilename': opts.getfilename, +		'forceformat': opts.getformat, +		'simulate': opts.simulate, +		'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), +		'format': opts.format, +		'format_limit': opts.format_limit, +		'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) +			or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') +			or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') +			or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') +			or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') +			or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') +			or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') +			or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') +			or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') +			or u'%(id)s.%(ext)s'), +		'ignoreerrors': opts.ignoreerrors, +		'ratelimit': opts.ratelimit, +		'nooverwrites': opts.nooverwrites, +		'retries': opts.retries, +		'continuedl': opts.continue_dl, +		'noprogress': opts.noprogress, +		'playliststart': opts.playliststart, +		'playlistend': opts.playlistend, +		'logtostderr': opts.outtmpl == '-', +		'consoletitle': opts.consoletitle, +		'nopart': opts.nopart, +		'updatetime': opts.updatetime, +		'writedescription': opts.writedescription, +		'writeinfojson': opts.writeinfojson, +		'matchtitle': opts.matchtitle, +		'rejecttitle': opts.rejecttitle, +		}) +	for extractor in extractors: +		fd.add_info_extractor(extractor) + +	# PostProcessors +	if opts.extractaudio: +		fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo)) + +	# Update version +	if opts.update_self: +		updateSelf(fd, sys.argv[0]) + +	# Maybe do nothing +	if len(all_urls) < 1: +		if not opts.update_self: +			parser.error(u'you must provide at least one URL') +		else: +			sys.exit() +	retcode = fd.download(all_urls) -		# Dump cookie jar if requested -		if opts.cookiefile is not None: -			try: -				jar.save() -			except (IOError, OSError), err: -				sys.exit(u'ERROR: unable to save cookie jar') +	# Dump cookie jar if requested +	if opts.cookiefile is not None: +		try: +			jar.save() +		except (IOError, OSError), err: +			sys.exit(u'ERROR: unable to save cookie jar') + +	sys.exit(retcode) -		sys.exit(retcode) +if __name__ == '__main__': +	try: +		main()  	except DownloadError:  		sys.exit(1)  	except SameFileError:  		sys.exit(u'ERROR: fixed output name but more than one file to download')  	except KeyboardInterrupt:  		sys.exit(u'\nERROR: Interrupted by user') + +# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: | 
