aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2018-07-11 01:31:44 -0700
committerJames Taylor <user234683@users.noreply.github.com>2018-07-11 01:31:44 -0700
commitaea11c407f7be904037183da152c4cdf561cd65e (patch)
tree5fad52326feee68c3771becee44fdf24ceebca05
parent2b308ec9f0fd1ba296db7bcdcea9f93aebf6eaa1 (diff)
downloadyt-local-aea11c407f7be904037183da152c4cdf561cd65e.tar.lz
yt-local-aea11c407f7be904037183da152c4cdf561cd65e.tar.xz
yt-local-aea11c407f7be904037183da152c4cdf561cd65e.zip
track custom youtube-dl distribution
-rw-r--r--.gitignore2
-rw-r--r--youtube_dl/YoutubeDL.py2392
-rw-r--r--youtube_dl/__init__.py481
-rw-r--r--youtube_dl/__main__.py19
-rw-r--r--youtube_dl/aes.py361
-rw-r--r--youtube_dl/cache.py96
-rw-r--r--youtube_dl/compat.py3016
-rw-r--r--youtube_dl/downloader/__init__.py61
-rw-r--r--youtube_dl/downloader/common.py389
-rw-r--r--youtube_dl/downloader/dash.py80
-rw-r--r--youtube_dl/downloader/external.py354
-rw-r--r--youtube_dl/downloader/f4m.py438
-rw-r--r--youtube_dl/downloader/fragment.py268
-rw-r--r--youtube_dl/downloader/hls.py204
-rw-r--r--youtube_dl/downloader/http.py354
-rw-r--r--youtube_dl/downloader/ism.py259
-rw-r--r--youtube_dl/downloader/rtmp.py214
-rw-r--r--youtube_dl/downloader/rtsp.py47
-rw-r--r--youtube_dl/extractor/__init__.py46
-rw-r--r--youtube_dl/extractor/adobepass.py1567
-rw-r--r--youtube_dl/extractor/common.py2862
-rw-r--r--youtube_dl/extractor/commonmistakes.py50
-rw-r--r--youtube_dl/extractor/commonprotocols.py60
-rw-r--r--youtube_dl/extractor/extractors.py31
-rw-r--r--youtube_dl/extractor/generic.py3335
-rw-r--r--youtube_dl/extractor/openload.py379
-rw-r--r--youtube_dl/extractor/youtube.py2914
-rw-r--r--youtube_dl/jsinterp.py262
-rw-r--r--youtube_dl/options.py916
-rw-r--r--youtube_dl/postprocessor/__init__.py40
-rw-r--r--youtube_dl/postprocessor/common.py69
-rw-r--r--youtube_dl/postprocessor/embedthumbnail.py93
-rw-r--r--youtube_dl/postprocessor/execafterdownload.py31
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py613
-rw-r--r--youtube_dl/postprocessor/metadatafromtitle.py48
-rw-r--r--youtube_dl/postprocessor/xattrpp.py79
-rw-r--r--youtube_dl/socks.py273
-rw-r--r--youtube_dl/swfinterp.py834
-rw-r--r--youtube_dl/update.py187
-rw-r--r--youtube_dl/utils.py3911
-rw-r--r--youtube_dl/version.py3
41 files changed, 27637 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore
index 8ff3fab..0c11b02 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
__pycache__/
*.py[cod]
*$py.class
-youtube_dl/
+youtube_dl_old/
debug/
data/
banned_addresses.txt
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
new file mode 100644
index 0000000..38ba43a
--- /dev/null
+++ b/youtube_dl/YoutubeDL.py
@@ -0,0 +1,2392 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import absolute_import, unicode_literals
+
+import collections
+import contextlib
+import copy
+import datetime
+import errno
+import fileinput
+import io
+import itertools
+import json
+import locale
+import operator
+import os
+import platform
+import re
+import shutil
+import subprocess
+import socket
+import sys
+import time
+import tokenize
+import traceback
+import random
+
+from string import ascii_letters
+
+from .compat import (
+ compat_basestring,
+ compat_cookiejar,
+ compat_get_terminal_size,
+ compat_http_client,
+ compat_kwargs,
+ compat_numeric_types,
+ compat_os_name,
+ compat_str,
+ compat_tokenize_tokenize,
+ compat_urllib_error,
+ compat_urllib_request,
+ compat_urllib_request_DataHandler,
+)
+from .utils import (
+ age_restricted,
+ args_to_str,
+ ContentTooShortError,
+ date_from_str,
+ DateRange,
+ DEFAULT_OUTTMPL,
+ determine_ext,
+ determine_protocol,
+ DownloadError,
+ encode_compat_str,
+ encodeFilename,
+ error_to_compat_str,
+ expand_path,
+ ExtractorError,
+ format_bytes,
+ formatSeconds,
+ GeoRestrictedError,
+ int_or_none,
+ ISO3166Utils,
+ locked_file,
+ make_HTTPS_handler,
+ MaxDownloadsReached,
+ orderedSet,
+ PagedList,
+ parse_filesize,
+ PerRequestProxyHandler,
+ platform_name,
+ PostProcessingError,
+ preferredencoding,
+ prepend_extension,
+ register_socks_protocols,
+ render_table,
+ replace_extension,
+ SameFileError,
+ sanitize_filename,
+ sanitize_path,
+ sanitize_url,
+ sanitized_Request,
+ std_headers,
+ subtitles_filename,
+ UnavailableVideoError,
+ url_basename,
+ version_tuple,
+ write_json_file,
+ write_string,
+ YoutubeDLCookieProcessor,
+ YoutubeDLHandler,
+)
+from .cache import Cache
+from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
+from .extractor.openload import PhantomJSwrapper
+from .downloader import get_suitable_downloader
+from .downloader.rtmp import rtmpdump_version
+from .postprocessor import (
+ FFmpegFixupM3u8PP,
+ FFmpegFixupM4aPP,
+ FFmpegFixupStretchedPP,
+ FFmpegMergerPP,
+ FFmpegPostProcessor,
+ get_postprocessor,
+)
+from .version import __version__
+
+if compat_os_name == 'nt':
+ import ctypes
+
+
+class YoutubeDL(object):
+ """YoutubeDL class.
+
+ YoutubeDL objects are the ones responsible of downloading the
+ actual video file and writing it to disk if the user has requested
+ it, among some other tasks. In most cases there should be one per
+ program. As, given a video URL, the downloader doesn't know how to
+ extract all the needed information, task that InfoExtractors do, it
+ has to pass the URL to one of them.
+
+ For this, YoutubeDL objects have a method that allows
+ InfoExtractors to be registered in a given order. When it is passed
+ a URL, the YoutubeDL object handles it to the first InfoExtractor it
+ finds that reports being able to handle it. The InfoExtractor extracts
+ all the information about the video or videos the URL refers to, and
+ YoutubeDL process the extracted information, possibly using a File
+ Downloader to download the video.
+
+ YoutubeDL objects accept a lot of parameters. In order not to saturate
+ the object constructor with arguments, it receives a dictionary of
+ options instead. These options are available through the params
+ attribute for the InfoExtractors to use. The YoutubeDL also
+ registers itself as the downloader in charge for the InfoExtractors
+ that are added to it, so this is a "mutual registration".
+
+ Available options:
+
+ username: Username for authentication purposes.
+ password: Password for authentication purposes.
+ videopassword: Password for accessing a video.
+ ap_mso: Adobe Pass multiple-system operator identifier.
+ ap_username: Multiple-system operator account username.
+ ap_password: Multiple-system operator account password.
+ usenetrc: Use netrc for authentication instead.
+ verbose: Print additional info to stdout.
+ quiet: Do not print messages to stdout.
+ no_warnings: Do not print out anything for warnings.
+ forceurl: Force printing final URL.
+ forcetitle: Force printing title.
+ forceid: Force printing ID.
+ forcethumbnail: Force printing thumbnail URL.
+ forcedescription: Force printing description.
+ forcefilename: Force printing final filename.
+ forceduration: Force printing duration.
+ forcejson: Force printing info_dict as JSON.
+ dump_single_json: Force printing the info_dict of the whole playlist
+ (or video) as a single JSON line.
+ simulate: Do not download the video files.
+ format: Video format code. See options.py for more information.
+ outtmpl: Template for output names.
+ restrictfilenames: Do not allow "&" and spaces in file names
+ ignoreerrors: Do not stop on download errors.
+ force_generic_extractor: Force downloader to use the generic extractor
+ nooverwrites: Prevent overwriting files.
+ playliststart: Playlist item to start at.
+ playlistend: Playlist item to end at.
+ playlist_items: Specific indices of playlist to download.
+ playlistreverse: Download playlist items in reverse order.
+ playlistrandom: Download playlist items in random order.
+ matchtitle: Download only matching titles.
+ rejecttitle: Reject downloads for matching titles.
+ logger: Log messages to a logging.Logger instance.
+ logtostderr: Log messages to stderr instead of stdout.
+ writedescription: Write the video description to a .description file
+ writeinfojson: Write the video description to a .info.json file
+ writeannotations: Write the video annotations to a .annotations.xml file
+ writethumbnail: Write the thumbnail image to a file
+ write_all_thumbnails: Write all thumbnail formats to files
+ writesubtitles: Write the video subtitles to a file
+ writeautomaticsub: Write the automatically generated subtitles to a file
+ allsubtitles: Downloads all the subtitles of the video
+ (requires writesubtitles or writeautomaticsub)
+ listsubtitles: Lists all available subtitles for the video
+ subtitlesformat: The format code for subtitles
+ subtitleslangs: List of languages of the subtitles to download
+ keepvideo: Keep the video file after post-processing
+ daterange: A DateRange object, download only if the upload_date is in the range.
+ skip_download: Skip the actual download of the video file
+ cachedir: Location of the cache files in the filesystem.
+ False to disable filesystem cache.
+ noplaylist: Download single video instead of a playlist if in doubt.
+ age_limit: An integer representing the user's age in years.
+ Unsuitable videos for the given age are skipped.
+ min_views: An integer representing the minimum view count the video
+ must have in order to not be skipped.
+ Videos without view count information are always
+ downloaded. None for no limit.
+ max_views: An integer representing the maximum view count.
+ Videos that are more popular than that are not
+ downloaded.
+ Videos without view count information are always
+ downloaded. None for no limit.
+ download_archive: File name of a file where all downloads are recorded.
+ Videos already present in the file are not downloaded
+ again.
+ cookiefile: File name where cookies should be read from and dumped to.
+ nocheckcertificate:Do not verify SSL certificates
+ prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
+ At the moment, this is only supported by YouTube.
+ proxy: URL of the proxy server to use
+ geo_verification_proxy: URL of the proxy to use for IP address verification
+ on geo-restricted sites.
+ socket_timeout: Time to wait for unresponsive hosts, in seconds
+ bidi_workaround: Work around buggy terminals without bidirectional text
+ support, using fridibi
+ debug_printtraffic:Print out sent and received HTTP traffic
+ include_ads: Download ads as well
+ default_search: Prepend this string if an input url is not valid.
+ 'auto' for elaborate guessing
+ encoding: Use this encoding instead of the system-specified.
+ extract_flat: Do not resolve URLs, return the immediate result.
+ Pass in 'in_playlist' to only show this behavior for
+ playlist items.
+ postprocessors: A list of dictionaries, each with an entry
+ * key: The name of the postprocessor. See
+ youtube_dl/postprocessor/__init__.py for a list.
+ as well as any further keyword arguments for the
+ postprocessor.
+ progress_hooks: A list of functions that get called on download
+ progress, with a dictionary with the entries
+ * status: One of "downloading", "error", or "finished".
+ Check this first and ignore unknown values.
+
+ If status is one of "downloading", or "finished", the
+ following properties may also be present:
+ * filename: The final filename (always present)
+ * tmpfilename: The filename we're currently writing to
+ * downloaded_bytes: Bytes on disk
+ * total_bytes: Size of the whole file, None if unknown
+ * total_bytes_estimate: Guess of the eventual file size,
+ None if unavailable.
+ * elapsed: The number of seconds since download started.
+ * eta: The estimated time in seconds, None if unknown
+ * speed: The download speed in bytes/second, None if
+ unknown
+ * fragment_index: The counter of the currently
+ downloaded video fragment.
+ * fragment_count: The number of fragments (= individual
+ files that will be merged)
+
+ Progress hooks are guaranteed to be called at least once
+ (with status "finished") if the download is successful.
+ merge_output_format: Extension to use when merging formats.
+ fixup: Automatically correct known faults of the file.
+ One of:
+ - "never": do nothing
+ - "warn": only emit a warning
+ - "detect_or_warn": check whether we can do anything
+ about it, warn otherwise (default)
+ source_address: Client-side IP address to bind to.
+ call_home: Boolean, true iff we are allowed to contact the
+ youtube-dl servers for debugging.
+ sleep_interval: Number of seconds to sleep before each download when
+ used alone or a lower bound of a range for randomized
+ sleep before each download (minimum possible number
+ of seconds to sleep) when used along with
+ max_sleep_interval.
+ max_sleep_interval:Upper bound of a range for randomized sleep before each
+ download (maximum possible number of seconds to sleep).
+ Must only be used along with sleep_interval.
+ Actual sleep time will be a random float from range
+ [sleep_interval; max_sleep_interval].
+ listformats: Print an overview of available video formats and exit.
+ list_thumbnails: Print a table of all thumbnails and exit.
+ match_filter: A function that gets called with the info_dict of
+ every video.
+ If it returns a message, the video is ignored.
+ If it returns None, the video is downloaded.
+ match_filter_func in utils.py is one example for this.
+ no_color: Do not emit color codes in output.
+ geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
+ HTTP header
+ geo_bypass_country:
+ Two-letter ISO 3166-2 country code that will be used for
+ explicit geographic restriction bypassing via faking
+ X-Forwarded-For HTTP header
+ geo_bypass_ip_block:
+ IP range in CIDR notation that will be used similarly to
+ geo_bypass_country
+
+ The following options determine which downloader is picked:
+ external_downloader: Executable of the external downloader to call.
+ None or unset for standard (built-in) downloader.
+ hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
+ if True, otherwise use ffmpeg/avconv if False, otherwise
+ use downloader suggested by extractor if None.
+
+ The following parameters are not used by YoutubeDL itself, they are used by
+ the downloader (see youtube_dl/downloader/common.py):
+ nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
+ noresizebuffer, retries, continuedl, noprogress, consoletitle,
+ xattr_set_filesize, external_downloader_args, hls_use_mpegts,
+ http_chunk_size.
+
+ The following options are used by the post processors:
+ prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
+ otherwise prefer ffmpeg.
+ postprocessor_args: A list of additional command-line arguments for the
+ postprocessor.
+
+ The following options are used by the Youtube extractor:
+ youtube_include_dash_manifest: If True (default), DASH manifests and related
+ data will be downloaded and processed by extractor.
+ You can reduce network I/O by disabling it if you don't
+ care about DASH.
+ """
+
+ _NUMERIC_FIELDS = set((
+ 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
+ 'timestamp', 'upload_year', 'upload_month', 'upload_day',
+ 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
+ 'average_rating', 'comment_count', 'age_limit',
+ 'start_time', 'end_time',
+ 'chapter_number', 'season_number', 'episode_number',
+ 'track_number', 'disc_number', 'release_year',
+ 'playlist_index',
+ ))
+
+ params = None
+ _ies = []
+ _pps = []
+ _download_retcode = None
+ _num_downloads = None
+ _screen_file = None
+
+ def __init__(self, params=None, auto_init=True):
+ """Create a FileDownloader object with the given options."""
+ if params is None:
+ params = {}
+ self._ies = []
+ self._ies_instances = {}
+ self._pps = []
+ self._progress_hooks = []
+ self._download_retcode = 0
+ self._num_downloads = 0
+ self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
+ self._err_file = sys.stderr
+ self.params = {
+ # Default parameters
+ 'nocheckcertificate': False,
+ }
+ self.params.update(params)
+ self.cache = Cache(self)
+
+ def check_deprecated(param, option, suggestion):
+ if self.params.get(param) is not None:
+ self.report_warning(
+ '%s is deprecated. Use %s instead.' % (option, suggestion))
+ return True
+ return False
+
+ if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
+ if self.params.get('geo_verification_proxy') is None:
+ self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
+
+ check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
+ check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
+ check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
+
+ if params.get('bidi_workaround', False):
+ try:
+ import pty
+ master, slave = pty.openpty()
+ width = compat_get_terminal_size().columns
+ if width is None:
+ width_args = []
+ else:
+ width_args = ['-w', str(width)]
+ sp_kwargs = dict(
+ stdin=subprocess.PIPE,
+ stdout=slave,
+ stderr=self._err_file)
+ try:
+ self._output_process = subprocess.Popen(
+ ['bidiv'] + width_args, **sp_kwargs
+ )
+ except OSError:
+ self._output_process = subprocess.Popen(
+ ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
+ self._output_channel = os.fdopen(master, 'rb')
+ except OSError as ose:
+ if ose.errno == errno.ENOENT:
+ self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
+ else:
+ raise
+
+ if (sys.platform != 'win32' and
+ sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
+ not params.get('restrictfilenames', False)):
+ # Unicode filesystem API will throw errors (#1474, #13027)
+ self.report_warning(
+ 'Assuming --restrict-filenames since file system encoding '
+ 'cannot encode all characters. '
+ 'Set the LC_ALL environment variable to fix this.')
+ self.params['restrictfilenames'] = True
+
+ if isinstance(params.get('outtmpl'), bytes):
+ self.report_warning(
+ 'Parameter outtmpl is bytes, but should be a unicode string. '
+ 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
+
+ self._setup_opener()
+
+ if auto_init:
+ self.print_debug_header()
+ self.add_default_info_extractors()
+
+ for pp_def_raw in self.params.get('postprocessors', []):
+ pp_class = get_postprocessor(pp_def_raw['key'])
+ pp_def = dict(pp_def_raw)
+ del pp_def['key']
+ pp = pp_class(self, **compat_kwargs(pp_def))
+ self.add_post_processor(pp)
+
+ for ph in self.params.get('progress_hooks', []):
+ self.add_progress_hook(ph)
+
+ register_socks_protocols()
+
+ def warn_if_short_id(self, argv):
+ # short YouTube ID starting with dash?
+ idxs = [
+ i for i, a in enumerate(argv)
+ if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
+ if idxs:
+ correct_argv = (
+ ['youtube-dl'] +
+ [a for i, a in enumerate(argv) if i not in idxs] +
+ ['--'] + [argv[i] for i in idxs]
+ )
+ self.report_warning(
+ 'Long argument string detected. '
+ 'Use -- to separate parameters and URLs, like this:\n%s\n' %
+ args_to_str(correct_argv))
+
+ def add_info_extractor(self, ie):
+ """Add an InfoExtractor object to the end of the list."""
+ self._ies.append(ie)
+ if not isinstance(ie, type):
+ self._ies_instances[ie.ie_key()] = ie
+ ie.set_downloader(self)
+
+ def get_info_extractor(self, ie_key):
+ """
+ Get an instance of an IE with name ie_key, it will try to get one from
+ the _ies list, if there's no instance it will create a new one and add
+ it to the extractor list.
+ """
+ ie = self._ies_instances.get(ie_key)
+ if ie is None:
+ ie = get_info_extractor(ie_key)()
+ self.add_info_extractor(ie)
+ return ie
+
+ def add_default_info_extractors(self):
+ """
+ Add the InfoExtractors returned by gen_extractors to the end of the list
+ """
+ for ie in gen_extractor_classes():
+ self.add_info_extractor(ie)
+
+ def add_post_processor(self, pp):
+ """Add a PostProcessor object to the end of the chain."""
+ self._pps.append(pp)
+ pp.set_downloader(self)
+
+ def add_progress_hook(self, ph):
+ """Add the progress hook (currently only for the file downloader)"""
+ self._progress_hooks.append(ph)
+
+ def _bidi_workaround(self, message):
+ if not hasattr(self, '_output_channel'):
+ return message
+
+ assert hasattr(self, '_output_process')
+ assert isinstance(message, compat_str)
+ line_count = message.count('\n') + 1
+ self._output_process.stdin.write((message + '\n').encode('utf-8'))
+ self._output_process.stdin.flush()
+ res = ''.join(self._output_channel.readline().decode('utf-8')
+ for _ in range(line_count))
+ return res[:-len('\n')]
+
+ def to_screen(self, message, skip_eol=False):
+ """Print message to stdout if not in quiet mode."""
+ return self.to_stdout(message, skip_eol, check_quiet=True)
+
+ def _write_string(self, s, out=None):
+ write_string(s, out=out, encoding=self.params.get('encoding'))
+
+ def to_stdout(self, message, skip_eol=False, check_quiet=False):
+ """Print message to stdout if not in quiet mode."""
+ if self.params.get('logger'):
+ self.params['logger'].debug(message)
+ elif not check_quiet or not self.params.get('quiet', False):
+ message = self._bidi_workaround(message)
+ terminator = ['\n', ''][skip_eol]
+ output = message + terminator
+
+ self._write_string(output, self._screen_file)
+
+ def to_stderr(self, message):
+ """Print message to stderr."""
+ assert isinstance(message, compat_str)
+ if self.params.get('logger'):
+ self.params['logger'].error(message)
+ else:
+ message = self._bidi_workaround(message)
+ output = message + '\n'
+ self._write_string(output, self._err_file)
+
+ def to_console_title(self, message):
+ if not self.params.get('consoletitle', False):
+ return
+ if compat_os_name == 'nt':
+ if ctypes.windll.kernel32.GetConsoleWindow():
+ # c_wchar_p() might not be necessary if `message` is
+ # already of type unicode()
+ ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
+ elif 'TERM' in os.environ:
+ self._write_string('\033]0;%s\007' % message, self._screen_file)
+
+ def save_console_title(self):
+ if not self.params.get('consoletitle', False):
+ return
+ if self.params.get('simulate', False):
+ return
+ if compat_os_name != 'nt' and 'TERM' in os.environ:
+ # Save the title on stack
+ self._write_string('\033[22;0t', self._screen_file)
+
+ def restore_console_title(self):
+ if not self.params.get('consoletitle', False):
+ return
+ if self.params.get('simulate', False):
+ return
+ if compat_os_name != 'nt' and 'TERM' in os.environ:
+ # Restore the title from stack
+ self._write_string('\033[23;0t', self._screen_file)
+
+ def __enter__(self):
+ self.save_console_title()
+ return self
+
+ def __exit__(self, *args):
+ self.restore_console_title()
+
+ if self.params.get('cookiefile') is not None:
+ self.cookiejar.save()
+
+ def trouble(self, message=None, tb=None):
+ """Determine action to take when a download problem appears.
+
+ Depending on if the downloader has been configured to ignore
+ download errors or not, this method may throw an exception or
+ not when errors are found, after printing the message.
+
+ tb, if given, is additional traceback information.
+ """
+ if message is not None:
+ self.to_stderr(message)
+ if self.params.get('verbose'):
+ if tb is None:
+ if sys.exc_info()[0]: # if .trouble has been called from an except block
+ tb = ''
+ if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
+ tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
+ tb += encode_compat_str(traceback.format_exc())
+ else:
+ tb_data = traceback.format_list(traceback.extract_stack())
+ tb = ''.join(tb_data)
+ self.to_stderr(tb)
+ if not self.params.get('ignoreerrors', False):
+ if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
+ exc_info = sys.exc_info()[1].exc_info
+ else:
+ exc_info = sys.exc_info()
+ raise DownloadError(message, exc_info)
+ self._download_retcode = 1
+
+ def report_warning(self, message):
+ '''
+ Print the message to stderr, it will be prefixed with 'WARNING:'
+ If stderr is a tty file the 'WARNING:' will be colored
+ '''
+ if self.params.get('logger') is not None:
+ self.params['logger'].warning(message)
+ else:
+ if self.params.get('no_warnings'):
+ return
+ if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
+ _msg_header = '\033[0;33mWARNING:\033[0m'
+ else:
+ _msg_header = 'WARNING:'
+ warning_message = '%s %s' % (_msg_header, message)
+ self.to_stderr(warning_message)
+
+ def report_error(self, message, tb=None):
+ '''
+ Do the same as trouble, but prefixes the message with 'ERROR:', colored
+ in red if stderr is a tty file.
+ '''
+ if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
+ _msg_header = '\033[0;31mERROR:\033[0m'
+ else:
+ _msg_header = 'ERROR:'
+ error_message = '%s %s' % (_msg_header, message)
+ self.trouble(error_message, tb)
+
+ def report_file_already_downloaded(self, file_name):
+ """Report file has already been fully downloaded."""
+ try:
+ self.to_screen('[download] %s has already been downloaded' % file_name)
+ except UnicodeEncodeError:
+ self.to_screen('[download] The file has already been downloaded')
+
+ def prepare_filename(self, info_dict):
+ """Generate the output filename."""
+ try:
+ template_dict = dict(info_dict)
+
+ template_dict['epoch'] = int(time.time())
+ autonumber_size = self.params.get('autonumber_size')
+ if autonumber_size is None:
+ autonumber_size = 5
+ template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
+ if template_dict.get('resolution') is None:
+ if template_dict.get('width') and template_dict.get('height'):
+ template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
+ elif template_dict.get('height'):
+ template_dict['resolution'] = '%sp' % template_dict['height']
+ elif template_dict.get('width'):
+ template_dict['resolution'] = '%dx?' % template_dict['width']
+
+ sanitize = lambda k, v: sanitize_filename(
+ compat_str(v),
+ restricted=self.params.get('restrictfilenames'),
+ is_id=(k == 'id' or k.endswith('_id')))
+ template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
+ for k, v in template_dict.items()
+ if v is not None and not isinstance(v, (list, tuple, dict)))
+ template_dict = collections.defaultdict(lambda: 'NA', template_dict)
+
+ outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+
+ # For fields playlist_index and autonumber convert all occurrences
+ # of %(field)s to %(field)0Nd for backward compatibility
+ field_size_compat_map = {
+ 'playlist_index': len(str(template_dict['n_entries'])),
+ 'autonumber': autonumber_size,
+ }
+ FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
+ mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
+ if mobj:
+ outtmpl = re.sub(
+ FIELD_SIZE_COMPAT_RE,
+ r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
+ outtmpl)
+
+ # Missing numeric fields used together with integer presentation types
+ # in format specification will break the argument substitution since
+ # string 'NA' is returned for missing fields. We will patch output
+ # template for missing fields to meet string presentation type.
+ for numeric_field in self._NUMERIC_FIELDS:
+ if numeric_field not in template_dict:
+ # As of [1] format syntax is:
+ # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
+ # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
+ FORMAT_RE = r'''(?x)
+ (?<!%)
+ %
+ \({0}\) # mapping key
+ (?:[#0\-+ ]+)? # conversion flags (optional)
+ (?:\d+)? # minimum field width (optional)
+ (?:\.\d+)? # precision (optional)
+ [hlL]? # length modifier (optional)
+ [diouxXeEfFgGcrs%] # conversion type
+ '''
+ outtmpl = re.sub(
+ FORMAT_RE.format(numeric_field),
+ r'%({0})s'.format(numeric_field), outtmpl)
+
+ # expand_path translates '%%' into '%' and '$$' into '$'
+ # correspondingly that is not what we want since we need to keep
+ # '%%' intact for template dict substitution step. Working around
+ # with boundary-alike separator hack.
+ sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
+ outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
+
+ # outtmpl should be expand_path'ed before template dict substitution
+ # because meta fields may contain env variables we don't want to
+ # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
+ # title "Hello $PATH", we don't want `$PATH` to be expanded.
+ filename = expand_path(outtmpl).replace(sep, '') % template_dict
+
+ # Temporary fix for #4787
+ # 'Treat' all problem characters by passing filename through preferredencoding
+ # to workaround encoding issues with subprocess on python2 @ Windows
+ if sys.version_info < (3, 0) and sys.platform == 'win32':
+ filename = encodeFilename(filename, True).decode(preferredencoding())
+ return sanitize_path(filename)
+ except ValueError as err:
+ self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
+ return None
+
+ def _match_entry(self, info_dict, incomplete):
+ """ Returns None iff the file should be downloaded """
+
+ video_title = info_dict.get('title', info_dict.get('id', 'video'))
+ if 'title' in info_dict:
+ # This can happen when we're just evaluating the playlist
+ title = info_dict['title']
+ matchtitle = self.params.get('matchtitle', False)
+ if matchtitle:
+ if not re.search(matchtitle, title, re.IGNORECASE):
+ return '"' + title + '" title did not match pattern "' + matchtitle + '"'
+ rejecttitle = self.params.get('rejecttitle', False)
+ if rejecttitle:
+ if re.search(rejecttitle, title, re.IGNORECASE):
+ return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+ date = info_dict.get('upload_date')
+ if date is not None:
+ dateRange = self.params.get('daterange', DateRange())
+ if date not in dateRange:
+ return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+ view_count = info_dict.get('view_count')
+ if view_count is not None:
+ min_views = self.params.get('min_views')
+ if min_views is not None and view_count < min_views:
+ return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
+ max_views = self.params.get('max_views')
+ if max_views is not None and view_count > max_views:
+ return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
+ if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
+ return 'Skipping "%s" because it is age restricted' % video_title
+ if self.in_download_archive(info_dict):
+ return '%s has already been recorded in archive' % video_title
+
+ if not incomplete:
+ match_filter = self.params.get('match_filter')
+ if match_filter is not None:
+ ret = match_filter(info_dict)
+ if ret is not None:
+ return ret
+
+ return None
+
+ @staticmethod
+ def add_extra_info(info_dict, extra_info):
+ '''Set the keys from extra_info in info dict if they are missing'''
+ for key, value in extra_info.items():
+ info_dict.setdefault(key, value)
+
+ def extract_info(self, url, download=True, ie_key=None, extra_info={},
+ process=True, force_generic_extractor=False):
+ '''
+ Returns a list with a dictionary for each video we find.
+ If 'download', also downloads the videos.
+ extra_info is a dict containing the extra values to add to each result
+ '''
+
+ if not ie_key and force_generic_extractor:
+ ie_key = 'Generic'
+
+ if ie_key:
+ ies = [self.get_info_extractor(ie_key)]
+ else:
+ ies = self._ies
+
+ for ie in ies:
+ if not ie.suitable(url):
+ continue
+
+ ie = self.get_info_extractor(ie.ie_key())
+ if not ie.working():
+ self.report_warning('The program functionality for this site has been marked as broken, '
+ 'and will probably not work.')
+
+ try:
+ ie_result = ie.extract(url)
+ if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
+ break
+ if isinstance(ie_result, list):
+ # Backwards compatibility: old IE result format
+ ie_result = {
+ '_type': 'compat_list',
+ 'entries': ie_result,
+ }
+ self.add_default_extra_info(ie_result, ie, url)
+ if process:
+ return self.process_ie_result(ie_result, download, extra_info)
+ else:
+ return ie_result
+ except GeoRestrictedError as e:
+ msg = e.msg
+ if e.countries:
+ msg += '\nThis video is available in %s.' % ', '.join(
+ map(ISO3166Utils.short2full, e.countries))
+ msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
+ self.report_error(msg)
+ break
+ except ExtractorError as e: # An error we somewhat expected
+ self.report_error(compat_str(e), e.format_traceback())
+ break
+ except MaxDownloadsReached:
+ raise
+ except Exception as e:
+ if self.params.get('ignoreerrors', False):
+ self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
+ break
+ else:
+ raise
+ else:
+ self.report_error('no suitable InfoExtractor for URL %s' % url)
+
+ def add_default_extra_info(self, ie_result, ie, url):
+ self.add_extra_info(ie_result, {
+ 'extractor': ie.IE_NAME,
+ 'webpage_url': url,
+ 'webpage_url_basename': url_basename(url),
+ 'extractor_key': ie.ie_key(),
+ })
+
+ def process_ie_result(self, ie_result, download=True, extra_info={}):
+ """
+ Take the result of the ie(may be modified) and resolve all unresolved
+ references (URLs, playlist items).
+
+ It will also download the videos if 'download'.
+ Returns the resolved ie_result.
+ """
+ result_type = ie_result.get('_type', 'video')
+
+ if result_type in ('url', 'url_transparent'):
+ ie_result['url'] = sanitize_url(ie_result['url'])
+ extract_flat = self.params.get('extract_flat', False)
+ if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
+ extract_flat is True):
+ if self.params.get('forcejson', False):
+ self.to_stdout(json.dumps(ie_result))
+ return ie_result
+
+ if result_type == 'video':
+ self.add_extra_info(ie_result, extra_info)
+ return self.process_video_result(ie_result, download=download)
+ elif result_type == 'url':
+ # We have to add extra_info to the results because it may be
+ # contained in a playlist
+ return self.extract_info(ie_result['url'],
+ download,
+ ie_key=ie_result.get('ie_key'),
+ extra_info=extra_info)
+ elif result_type == 'url_transparent':
+ # Use the information from the embedding page
+ info = self.extract_info(
+ ie_result['url'], ie_key=ie_result.get('ie_key'),
+ extra_info=extra_info, download=False, process=False)
+
+ # extract_info may return None when ignoreerrors is enabled and
+ # extraction failed with an error, don't crash and return early
+ # in this case
+ if not info:
+ return info
+
+ force_properties = dict(
+ (k, v) for k, v in ie_result.items() if v is not None)
+ for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
+ if f in force_properties:
+ del force_properties[f]
+ new_result = info.copy()
+ new_result.update(force_properties)
+
+ # Extracted info may not be a video result (i.e.
+ # info.get('_type', 'video') != video) but rather an url or
+ # url_transparent. In such cases outer metadata (from ie_result)
+ # should be propagated to inner one (info). For this to happen
+ # _type of info should be overridden with url_transparent. This
+ # fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
+ if new_result.get('_type') == 'url':
+ new_result['_type'] = 'url_transparent'
+
+ return self.process_ie_result(
+ new_result, download=download, extra_info=extra_info)
+ elif result_type in ('playlist', 'multi_video'):
+ # We process each entry in the playlist
+ playlist = ie_result.get('title') or ie_result.get('id')
+ self.to_screen('[download] Downloading playlist: %s' % playlist)
+
+ playlist_results = []
+
+ playliststart = self.params.get('playliststart', 1) - 1
+ playlistend = self.params.get('playlistend')
+ # For backwards compatibility, interpret -1 as whole list
+ if playlistend == -1:
+ playlistend = None
+
+ playlistitems_str = self.params.get('playlist_items')
+ playlistitems = None
+ if playlistitems_str is not None:
+ def iter_playlistitems(format):
+ for string_segment in format.split(','):
+ if '-' in string_segment:
+ start, end = string_segment.split('-')
+ for item in range(int(start), int(end) + 1):
+ yield int(item)
+ else:
+ yield int(string_segment)
+ playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
+
+ ie_entries = ie_result['entries']
+
+ def make_playlistitems_entries(list_ie_entries):
+ num_entries = len(list_ie_entries)
+ return [
+ list_ie_entries[i - 1] for i in playlistitems
+ if -num_entries <= i - 1 < num_entries]
+
+ def report_download(num_entries):
+ self.to_screen(
+ '[%s] playlist %s: Downloading %d videos' %
+ (ie_result['extractor'], playlist, num_entries))
+
+ if isinstance(ie_entries, list):
+ n_all_entries = len(ie_entries)
+ if playlistitems:
+ entries = make_playlistitems_entries(ie_entries)
+ else:
+ entries = ie_entries[playliststart:playlistend]
+ n_entries = len(entries)
+ self.to_screen(
+ '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
+ (ie_result['extractor'], playlist, n_all_entries, n_entries))
+ elif isinstance(ie_entries, PagedList):
+ if playlistitems:
+ entries = []
+ for item in playlistitems:
+ entries.extend(ie_entries.getslice(
+ item - 1, item
+ ))
+ else:
+ entries = ie_entries.getslice(
+ playliststart, playlistend)
+ n_entries = len(entries)
+ report_download(n_entries)
+ else: # iterable
+ if playlistitems:
+ entries = make_playlistitems_entries(list(itertools.islice(
+ ie_entries, 0, max(playlistitems))))
+ else:
+ entries = list(itertools.islice(
+ ie_entries, playliststart, playlistend))
+ n_entries = len(entries)
+ report_download(n_entries)
+
+ if self.params.get('playlistreverse', False):
+ entries = entries[::-1]
+
+ if self.params.get('playlistrandom', False):
+ random.shuffle(entries)
+
+ x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+
+ for i, entry in enumerate(entries, 1):
+ self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
+ # This __x_forwarded_for_ip thing is a bit ugly but requires
+ # minimal changes
+ if x_forwarded_for:
+ entry['__x_forwarded_for_ip'] = x_forwarded_for
+ extra = {
+ 'n_entries': n_entries,
+ 'playlist': playlist,
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ 'playlist_index': i + playliststart,
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'extractor_key': ie_result['extractor_key'],
+ }
+
+ reason = self._match_entry(entry, incomplete=True)
+ if reason is not None:
+ self.to_screen('[download] ' + reason)
+ continue
+
+ entry_result = self.process_ie_result(entry,
+ download=download,
+ extra_info=extra)
+ playlist_results.append(entry_result)
+ ie_result['entries'] = playlist_results
+ self.to_screen('[download] Finished downloading playlist: %s' % playlist)
+ return ie_result
+ elif result_type == 'compat_list':
+ self.report_warning(
+ 'Extractor %s returned a compat_list result. '
+ 'It needs to be updated.' % ie_result.get('extractor'))
+
+ def _fixup(r):
+ self.add_extra_info(
+ r,
+ {
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'extractor_key': ie_result['extractor_key'],
+ }
+ )
+ return r
+ ie_result['entries'] = [
+ self.process_ie_result(_fixup(r), download, extra_info)
+ for r in ie_result['entries']
+ ]
+ return ie_result
+ else:
+ raise Exception('Invalid result type: %s' % result_type)
+
+ def _build_format_filter(self, filter_spec):
+ " Returns a function to filter the formats according to the filter_spec "
+
+ OPERATORS = {
+ '<': operator.lt,
+ '<=': operator.le,
+ '>': operator.gt,
+ '>=': operator.ge,
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
+ $
+ ''' % '|'.join(map(re.escape, OPERATORS.keys())))
+ m = operator_rex.search(filter_spec)
+ if m:
+ try:
+ comparison_value = int(m.group('value'))
+ except ValueError:
+ comparison_value = parse_filesize(m.group('value'))
+ if comparison_value is None:
+ comparison_value = parse_filesize(m.group('value') + 'B')
+ if comparison_value is None:
+ raise ValueError(
+ 'Invalid value %r in format specification %r' % (
+ m.group('value'), filter_spec))
+ op = OPERATORS[m.group('op')]
+
+ if not m:
+ STR_OPERATORS = {
+ '=': operator.eq,
+ '!=': operator.ne,
+ '^=': lambda attr, value: attr.startswith(value),
+ '$=': lambda attr, value: attr.endswith(value),
+ '*=': lambda attr, value: value in attr,
+ }
+ str_operator_rex = re.compile(r'''(?x)
+ \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
+ \s*(?P<value>[a-zA-Z0-9._-]+)
+ \s*$
+ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
+ m = str_operator_rex.search(filter_spec)
+ if m:
+ comparison_value = m.group('value')
+ op = STR_OPERATORS[m.group('op')]
+
+ if not m:
+ raise ValueError('Invalid filter specification %r' % filter_spec)
+
+ def _filter(f):
+ actual_value = f.get(m.group('key'))
+ if actual_value is None:
+ return m.group('none_inclusive')
+ return op(actual_value, comparison_value)
+ return _filter
+
+ def _default_format_spec(self, info_dict, download=True):
+
+ def can_merge():
+ merger = FFmpegMergerPP(self)
+ return merger.available and merger.can_merge()
+
+ def prefer_best():
+ if self.params.get('simulate', False):
+ return False
+ if not download:
+ return False
+ if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
+ return True
+ if info_dict.get('is_live'):
+ return True
+ if not can_merge():
+ return True
+ return False
+
+ req_format_list = ['bestvideo+bestaudio', 'best']
+ if prefer_best():
+ req_format_list.reverse()
+ return '/'.join(req_format_list)
+
+ def build_format_selector(self, format_spec):
+ def syntax_error(note, start):
+ message = (
+ 'Invalid format specification: '
+ '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
+ return SyntaxError(message)
+
+ PICKFIRST = 'PICKFIRST'
+ MERGE = 'MERGE'
+ SINGLE = 'SINGLE'
+ GROUP = 'GROUP'
+ FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
+
+ def _parse_filter(tokens):
+ filter_parts = []
+ for type, string, start, _, _ in tokens:
+ if type == tokenize.OP and string == ']':
+ return ''.join(filter_parts)
+ else:
+ filter_parts.append(string)
+
+ def _remove_unused_ops(tokens):
+ # Remove operators that we don't use and join them with the surrounding strings
+ # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
+ ALLOWED_OPS = ('/', '+', ',', '(', ')')
+ last_string, last_start, last_end, last_line = None, None, None, None
+ for type, string, start, end, line in tokens:
+ if type == tokenize.OP and string == '[':
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+ last_string = None
+ yield type, string, start, end, line
+ # everything inside brackets will be handled by _parse_filter
+ for type, string, start, end, line in tokens:
+ yield type, string, start, end, line
+ if type == tokenize.OP and string == ']':
+ break
+ elif type == tokenize.OP and string in ALLOWED_OPS:
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+ last_string = None
+ yield type, string, start, end, line
+ elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
+ if not last_string:
+ last_string = string
+ last_start = start
+ last_end = end
+ else:
+ last_string += string
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+
+ def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
+ selectors = []
+ current_selector = None
+ for type, string, start, _, _ in tokens:
+ # ENCODING is only defined in python 3.x
+ if type == getattr(tokenize, 'ENCODING', None):
+ continue
+ elif type in [tokenize.NAME, tokenize.NUMBER]:
+ current_selector = FormatSelector(SINGLE, string, [])
+ elif type == tokenize.OP:
+ if string == ')':
+ if not inside_group:
+ # ')' will be handled by the parentheses group
+ tokens.restore_last_token()
+ break
+ elif inside_merge and string in ['/', ',']:
+ tokens.restore_last_token()
+ break
+ elif inside_choice and string == ',':
+ tokens.restore_last_token()
+ break
+ elif string == ',':
+ if not current_selector:
+ raise syntax_error('"," must follow a format selector', start)
+ selectors.append(current_selector)
+ current_selector = None
+ elif string == '/':
+ if not current_selector:
+ raise syntax_error('"/" must follow a format selector', start)
+ first_choice = current_selector
+ second_choice = _parse_format_selection(tokens, inside_choice=True)
+ current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
+ elif string == '[':
+ if not current_selector:
+ current_selector = FormatSelector(SINGLE, 'best', [])
+ format_filter = _parse_filter(tokens)
+ current_selector.filters.append(format_filter)
+ elif string == '(':
+ if current_selector:
+ raise syntax_error('Unexpected "("', start)
+ group = _parse_format_selection(tokens, inside_group=True)
+ current_selector = FormatSelector(GROUP, group, [])
+ elif string == '+':
+ video_selector = current_selector
+ audio_selector = _parse_format_selection(tokens, inside_merge=True)
+ if not video_selector or not audio_selector:
+ raise syntax_error('"+" must be between two format selectors', start)
+ current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
+ else:
+ raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
+ elif type == tokenize.ENDMARKER:
+ break
+ if current_selector:
+ selectors.append(current_selector)
+ return selectors
+
+ def _build_selector_function(selector):
+ if isinstance(selector, list):
+ fs = [_build_selector_function(s) for s in selector]
+
+ def selector_function(ctx):
+ for f in fs:
+ for format in f(ctx):
+ yield format
+ return selector_function
+ elif selector.type == GROUP:
+ selector_function = _build_selector_function(selector.selector)
+ elif selector.type == PICKFIRST:
+ fs = [_build_selector_function(s) for s in selector.selector]
+
+ def selector_function(ctx):
+ for f in fs:
+ picked_formats = list(f(ctx))
+ if picked_formats:
+ return picked_formats
+ return []
+ elif selector.type == SINGLE:
+ format_spec = selector.selector
+
+ def selector_function(ctx):
+ formats = list(ctx['formats'])
+ if not formats:
+ return
+ if format_spec == 'all':
+ for f in formats:
+ yield f
+ elif format_spec in ['best', 'worst', None]:
+ format_idx = 0 if format_spec == 'worst' else -1
+ audiovideo_formats = [
+ f for f in formats
+ if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
+ if audiovideo_formats:
+ yield audiovideo_formats[format_idx]
+ # for extractors with incomplete formats (audio only (soundcloud)
+ # or video only (imgur)) we will fallback to best/worst
+ # {video,audio}-only format
+ elif ctx['incomplete_formats']:
+ yield formats[format_idx]
+ elif format_spec == 'bestaudio':
+ audio_formats = [
+ f for f in formats
+ if f.get('vcodec') == 'none']
+ if audio_formats:
+ yield audio_formats[-1]
+ elif format_spec == 'worstaudio':
+ audio_formats = [
+ f for f in formats
+ if f.get('vcodec') == 'none']
+ if audio_formats:
+ yield audio_formats[0]
+ elif format_spec == 'bestvideo':
+ video_formats = [
+ f for f in formats
+ if f.get('acodec') == 'none']
+ if video_formats:
+ yield video_formats[-1]
+ elif format_spec == 'worstvideo':
+ video_formats = [
+ f for f in formats
+ if f.get('acodec') == 'none']
+ if video_formats:
+ yield video_formats[0]
+ else:
+ extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
+ if format_spec in extensions:
+ filter_f = lambda f: f['ext'] == format_spec
+ else:
+ filter_f = lambda f: f['format_id'] == format_spec
+ matches = list(filter(filter_f, formats))
+ if matches:
+ yield matches[-1]
+ elif selector.type == MERGE:
+ def _merge(formats_info):
+ format_1, format_2 = [f['format_id'] for f in formats_info]
+ # The first format must contain the video and the
+ # second the audio
+ if formats_info[0].get('vcodec') == 'none':
+ self.report_error('The first format must '
+ 'contain the video, try using '
+ '"-f %s+%s"' % (format_2, format_1))
+ return
+ # Formats must be opposite (video+audio)
+ if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
+ self.report_error(
+ 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
+ % (format_1, format_2))
+ return
+ output_ext = (
+ formats_info[0]['ext']
+ if self.params.get('merge_output_format') is None
+ else self.params['merge_output_format'])
+ return {
+ 'requested_formats': formats_info,
+ 'format': '%s+%s' % (formats_info[0].get('format'),
+ formats_info[1].get('format')),
+ 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
+ formats_info[1].get('format_id')),
+ 'width': formats_info[0].get('width'),
+ 'height': formats_info[0].get('height'),
+ 'resolution': formats_info[0].get('resolution'),
+ 'fps': formats_info[0].get('fps'),
+ 'vcodec': formats_info[0].get('vcodec'),
+ 'vbr': formats_info[0].get('vbr'),
+ 'stretched_ratio': formats_info[0].get('stretched_ratio'),
+ 'acodec': formats_info[1].get('acodec'),
+ 'abr': formats_info[1].get('abr'),
+ 'ext': output_ext,
+ }
+ video_selector, audio_selector = map(_build_selector_function, selector.selector)
+
+ def selector_function(ctx):
+ for pair in itertools.product(
+ video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
+ yield _merge(pair)
+
+ filters = [self._build_format_filter(f) for f in selector.filters]
+
+ def final_selector(ctx):
+ ctx_copy = copy.deepcopy(ctx)
+ for _filter in filters:
+ ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
+ return selector_function(ctx_copy)
+ return final_selector
+
+ stream = io.BytesIO(format_spec.encode('utf-8'))
+ try:
+ tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
+ except tokenize.TokenError:
+ raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
+
+ class TokenIterator(object):
+ def __init__(self, tokens):
+ self.tokens = tokens
+ self.counter = 0
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ if self.counter >= len(self.tokens):
+ raise StopIteration()
+ value = self.tokens[self.counter]
+ self.counter += 1
+ return value
+
+ next = __next__
+
+ def restore_last_token(self):
+ self.counter -= 1
+
+ parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
+ return _build_selector_function(parsed_selector)
+
+ def _calc_headers(self, info_dict):
+ res = std_headers.copy()
+
+ add_headers = info_dict.get('http_headers')
+ if add_headers:
+ res.update(add_headers)
+
+ cookies = self._calc_cookies(info_dict)
+ if cookies:
+ res['Cookie'] = cookies
+
+ if 'X-Forwarded-For' not in res:
+ x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
+ if x_forwarded_for_ip:
+ res['X-Forwarded-For'] = x_forwarded_for_ip
+
+ return res
+
+ def _calc_cookies(self, info_dict):
+ pr = sanitized_Request(info_dict['url'])
+ self.cookiejar.add_cookie_header(pr)
+ return pr.get_header('Cookie')
+
+ def process_video_result(self, info_dict, download=True):
+ assert info_dict.get('_type', 'video') == 'video'
+
+ if 'id' not in info_dict:
+ raise ExtractorError('Missing "id" field in extractor result')
+ if 'title' not in info_dict:
+ raise ExtractorError('Missing "title" field in extractor result')
+
+ def report_force_conversion(field, field_not, conversion):
+ self.report_warning(
+ '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
+ % (field, field_not, conversion))
+
+ def sanitize_string_field(info, string_field):
+ field = info.get(string_field)
+ if field is None or isinstance(field, compat_str):
+ return
+ report_force_conversion(string_field, 'a string', 'string')
+ info[string_field] = compat_str(field)
+
+ def sanitize_numeric_fields(info):
+ for numeric_field in self._NUMERIC_FIELDS:
+ field = info.get(numeric_field)
+ if field is None or isinstance(field, compat_numeric_types):
+ continue
+ report_force_conversion(numeric_field, 'numeric', 'int')
+ info[numeric_field] = int_or_none(field)
+
+ sanitize_string_field(info_dict, 'id')
+ sanitize_numeric_fields(info_dict)
+
+ if 'playlist' not in info_dict:
+ # It isn't part of a playlist
+ info_dict['playlist'] = None
+ info_dict['playlist_index'] = None
+
+ thumbnails = info_dict.get('thumbnails')
+ if thumbnails is None:
+ thumbnail = info_dict.get('thumbnail')
+ if thumbnail:
+ info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
+ if thumbnails:
+ thumbnails.sort(key=lambda t: (
+ t.get('preference') if t.get('preference') is not None else -1,
+ t.get('width') if t.get('width') is not None else -1,
+ t.get('height') if t.get('height') is not None else -1,
+ t.get('id') if t.get('id') is not None else '', t.get('url')))
+ for i, t in enumerate(thumbnails):
+ t['url'] = sanitize_url(t['url'])
+ if t.get('width') and t.get('height'):
+ t['resolution'] = '%dx%d' % (t['width'], t['height'])
+ if t.get('id') is None:
+ t['id'] = '%d' % i
+
+ if self.params.get('list_thumbnails'):
+ self.list_thumbnails(info_dict)
+ return
+
+ thumbnail = info_dict.get('thumbnail')
+ if thumbnail:
+ info_dict['thumbnail'] = sanitize_url(thumbnail)
+ elif thumbnails:
+ info_dict['thumbnail'] = thumbnails[-1]['url']
+
+ if 'display_id' not in info_dict and 'id' in info_dict:
+ info_dict['display_id'] = info_dict['id']
+
+ if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
+ # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+ # see http://bugs.python.org/issue1646728)
+ try:
+ upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
+ info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+ except (ValueError, OverflowError, OSError):
+ pass
+
+ # Auto generate title fields corresponding to the *_number fields when missing
+ # in order to always have clean titles. This is very common for TV series.
+ for field in ('chapter', 'season', 'episode'):
+ if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+ info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+
+ for cc_kind in ('subtitles', 'automatic_captions'):
+ cc = info_dict.get(cc_kind)
+ if cc:
+ for _, subtitle in cc.items():
+ for subtitle_format in subtitle:
+ if subtitle_format.get('url'):
+ subtitle_format['url'] = sanitize_url(subtitle_format['url'])
+ if subtitle_format.get('ext') is None:
+ subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
+
+ automatic_captions = info_dict.get('automatic_captions')
+ subtitles = info_dict.get('subtitles')
+
+ if self.params.get('listsubtitles', False):
+ if 'automatic_captions' in info_dict:
+ self.list_subtitles(
+ info_dict['id'], automatic_captions, 'automatic captions')
+ self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
+ return
+
+ info_dict['requested_subtitles'] = self.process_subtitles(
+ info_dict['id'], subtitles, automatic_captions)
+
+ # We now pick which formats have to be downloaded
+ if info_dict.get('formats') is None:
+ # There's only one format available
+ formats = [info_dict]
+ else:
+ formats = info_dict['formats']
+
+ if not formats:
+ raise ExtractorError('No video formats found!')
+
+ def is_wellformed(f):
+ url = f.get('url')
+ if not url:
+ self.report_warning(
+ '"url" field is missing or empty - skipping format, '
+ 'there is an error in extractor')
+ return False
+ if isinstance(url, bytes):
+ sanitize_string_field(f, 'url')
+ return True
+
+ # Filter out malformed formats for better extraction robustness
+ formats = list(filter(is_wellformed, formats))
+
+ formats_dict = {}
+
+ # We check that all the formats have the format and format_id fields
+ for i, format in enumerate(formats):
+ sanitize_string_field(format, 'format_id')
+ sanitize_numeric_fields(format)
+ format['url'] = sanitize_url(format['url'])
+ if not format.get('format_id'):
+ format['format_id'] = compat_str(i)
+ else:
+ # Sanitize format_id from characters used in format selector expression
+ format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
+ format_id = format['format_id']
+ if format_id not in formats_dict:
+ formats_dict[format_id] = []
+ formats_dict[format_id].append(format)
+
+ # Make sure all formats have unique format_id
+ for format_id, ambiguous_formats in formats_dict.items():
+ if len(ambiguous_formats) > 1:
+ for i, format in enumerate(ambiguous_formats):
+ format['format_id'] = '%s-%d' % (format_id, i)
+
+ for i, format in enumerate(formats):
+ if format.get('format') is None:
+ format['format'] = '{id} - {res}{note}'.format(
+ id=format['format_id'],
+ res=self.format_resolution(format),
+ note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
+ )
+ # Automatically determine file extension if missing
+ if format.get('ext') is None:
+ format['ext'] = determine_ext(format['url']).lower()
+ # Automatically determine protocol if missing (useful for format
+ # selection purposes)
+ if format.get('protocol') is None:
+ format['protocol'] = determine_protocol(format)
+ # Add HTTP headers, so that external programs can use them from the
+ # json output
+ full_format_info = info_dict.copy()
+ full_format_info.update(format)
+ format['http_headers'] = self._calc_headers(full_format_info)
+ # Remove private housekeeping stuff
+ if '__x_forwarded_for_ip' in info_dict:
+ del info_dict['__x_forwarded_for_ip']
+
+ # TODO Central sorting goes here
+
+ if formats[0] is not info_dict:
+ # only set the 'formats' fields if the original info_dict list them
+ # otherwise we end up with a circular reference, the first (and unique)
+ # element in the 'formats' field in info_dict is info_dict itself,
+ # which can't be exported to json
+ info_dict['formats'] = formats
+ if self.params.get('listformats'):
+ self.list_formats(info_dict)
+ return
+
+ req_format = self.params.get('format')
+ if req_format is None:
+ req_format = self._default_format_spec(info_dict, download=download)
+ if self.params.get('verbose'):
+ self.to_stdout('[debug] Default format spec: %s' % req_format)
+
+ format_selector = self.build_format_selector(req_format)
+
+ # While in format selection we may need to have an access to the original
+ # format set in order to calculate some metrics or do some processing.
+ # For now we need to be able to guess whether original formats provided
+ # by extractor are incomplete or not (i.e. whether extractor provides only
+ # video-only or audio-only formats) for proper formats selection for
+ # extractors with such incomplete formats (see
+ # https://github.com/rg3/youtube-dl/pull/5556).
+ # Since formats may be filtered during format selection and may not match
+ # the original formats the results may be incorrect. Thus original formats
+ # or pre-calculated metrics should be passed to format selection routines
+ # as well.
+ # We will pass a context object containing all necessary additional data
+ # instead of just formats.
+ # This fixes incorrect format selection issue (see
+ # https://github.com/rg3/youtube-dl/issues/10083).
+ incomplete_formats = (
+ # All formats are video-only or
+ all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
+ # all formats are audio-only
+ all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
+
+ ctx = {
+ 'formats': formats,
+ 'incomplete_formats': incomplete_formats,
+ }
+
+ formats_to_download = list(format_selector(ctx))
+ if not formats_to_download:
+ raise ExtractorError('requested format not available',
+ expected=True)
+
+ if download:
+ if len(formats_to_download) > 1:
+ self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
+ for format in formats_to_download:
+ new_info = dict(info_dict)
+ new_info.update(format)
+ self.process_info(new_info)
+ # We update the info dict with the best quality format (backwards compatibility)
+ info_dict.update(formats_to_download[-1])
+ return info_dict
+
+ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
+ """Select the requested subtitles and their format"""
+ available_subs = {}
+ if normal_subtitles and self.params.get('writesubtitles'):
+ available_subs.update(normal_subtitles)
+ if automatic_captions and self.params.get('writeautomaticsub'):
+ for lang, cap_info in automatic_captions.items():
+ if lang not in available_subs:
+ available_subs[lang] = cap_info
+
+ if (not self.params.get('writesubtitles') and not
+ self.params.get('writeautomaticsub') or not
+ available_subs):
+ return None
+
+ if self.params.get('allsubtitles', False):
+ requested_langs = available_subs.keys()
+ else:
+ if self.params.get('subtitleslangs', False):
+ requested_langs = self.params.get('subtitleslangs')
+ elif 'en' in available_subs:
+ requested_langs = ['en']
+ else:
+ requested_langs = [list(available_subs.keys())[0]]
+
+ formats_query = self.params.get('subtitlesformat', 'best')
+ formats_preference = formats_query.split('/') if formats_query else []
+ subs = {}
+ for lang in requested_langs:
+ formats = available_subs.get(lang)
+ if formats is None:
+ self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+ continue
+ for ext in formats_preference:
+ if ext == 'best':
+ f = formats[-1]
+ break
+ matches = list(filter(lambda f: f['ext'] == ext, formats))
+ if matches:
+ f = matches[-1]
+ break
+ else:
+ f = formats[-1]
+ self.report_warning(
+ 'No subtitle format found matching "%s" for language %s, '
+ 'using %s' % (formats_query, lang, f['ext']))
+ subs[lang] = f
+ return subs
+
+ def process_info(self, info_dict):
+ """Process a single resolved IE result."""
+
+ assert info_dict.get('_type', 'video') == 'video'
+
+ max_downloads = self.params.get('max_downloads')
+ if max_downloads is not None:
+ if self._num_downloads >= int(max_downloads):
+ raise MaxDownloadsReached()
+
+ info_dict['fulltitle'] = info_dict['title']
+ if len(info_dict['title']) > 200:
+ info_dict['title'] = info_dict['title'][:197] + '...'
+
+ if 'format' not in info_dict:
+ info_dict['format'] = info_dict['ext']
+
+ reason = self._match_entry(info_dict, incomplete=False)
+ if reason is not None:
+ self.to_screen('[download] ' + reason)
+ return
+
+ self._num_downloads += 1
+
+ info_dict['_filename'] = filename = self.prepare_filename(info_dict)
+
+ # Forced printings
+ if self.params.get('forcetitle', False):
+ self.to_stdout(info_dict['fulltitle'])
+ if self.params.get('forceid', False):
+ self.to_stdout(info_dict['id'])
+ if self.params.get('forceurl', False):
+ if info_dict.get('requested_formats') is not None:
+ for f in info_dict['requested_formats']:
+ self.to_stdout(f['url'] + f.get('play_path', ''))
+ else:
+ # For RTMP URLs, also include the playpath
+ self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+ if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
+ self.to_stdout(info_dict['thumbnail'])
+ if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
+ self.to_stdout(info_dict['description'])
+ if self.params.get('forcefilename', False) and filename is not None:
+ self.to_stdout(filename)
+ if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
+ self.to_stdout(formatSeconds(info_dict['duration']))
+ if self.params.get('forceformat', False):
+ self.to_stdout(info_dict['format'])
+ if self.params.get('forcejson', False):
+ self.to_stdout(json.dumps(info_dict))
+
+ # Do nothing else if in simulate mode
+ if self.params.get('simulate', False):
+ return
+
+ if filename is None:
+ return
+
+ def ensure_dir_exists(path):
+ try:
+ dn = os.path.dirname(path)
+ if dn and not os.path.exists(dn):
+ os.makedirs(dn)
+ return True
+ except (OSError, IOError) as err:
+ self.report_error('unable to create directory ' + error_to_compat_str(err))
+ return False
+
+ if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
+ return
+
+ if self.params.get('writedescription', False):
+ descfn = replace_extension(filename, 'description', info_dict.get('ext'))
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
+ self.to_screen('[info] Video description is already present')
+ elif info_dict.get('description') is None:
+ self.report_warning('There\'s no description to write.')
+ else:
+ try:
+ self.to_screen('[info] Writing video description to: ' + descfn)
+ with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+ descfile.write(info_dict['description'])
+ except (OSError, IOError):
+ self.report_error('Cannot write description file ' + descfn)
+ return
+
+ if self.params.get('writeannotations', False):
+ annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
+ self.to_screen('[info] Video annotations are already present')
+ else:
+ try:
+ self.to_screen('[info] Writing video annotations to: ' + annofn)
+ with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
+ annofile.write(info_dict['annotations'])
+ except (KeyError, TypeError):
+ self.report_warning('There are no annotations to write.')
+ except (OSError, IOError):
+ self.report_error('Cannot write annotations file: ' + annofn)
+ return
+
+ subtitles_are_requested = any([self.params.get('writesubtitles', False),
+ self.params.get('writeautomaticsub')])
+
+ if subtitles_are_requested and info_dict.get('requested_subtitles'):
+ # subtitles download errors are already managed as troubles in relevant IE
+ # that way it will silently go on when used with unsupporting IE
+ subtitles = info_dict['requested_subtitles']
+ ie = self.get_info_extractor(info_dict['extractor_key'])
+ for sub_lang, sub_info in subtitles.items():
+ sub_format = sub_info['ext']
+ sub_filename = subtitles_filename(filename, sub_lang, sub_format)
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
+ self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
+ else:
+ self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
+ if sub_info.get('data') is not None:
+ try:
+ # Use newline='' to prevent conversion of newline characters
+ # See https://github.com/rg3/youtube-dl/issues/10268
+ with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
+ subfile.write(sub_info['data'])
+ except (OSError, IOError):
+ self.report_error('Cannot write subtitles file ' + sub_filename)
+ return
+ else:
+ try:
+ sub_data = ie._request_webpage(
+ sub_info['url'], info_dict['id'], note=False).read()
+ with io.open(encodeFilename(sub_filename), 'wb') as subfile:
+ subfile.write(sub_data)
+ except (ExtractorError, IOError, OSError, ValueError) as err:
+ self.report_warning('Unable to download subtitle for "%s": %s' %
+ (sub_lang, error_to_compat_str(err)))
+ continue
+
+ if self.params.get('writeinfojson', False):
+ infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
+ self.to_screen('[info] Video description metadata is already present')
+ else:
+ self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
+ try:
+ write_json_file(self.filter_requested_info(info_dict), infofn)
+ except (OSError, IOError):
+ self.report_error('Cannot write metadata to JSON file ' + infofn)
+ return
+
+ self._write_thumbnails(info_dict, filename)
+
+ if not self.params.get('skip_download', False):
+ try:
+ def dl(name, info):
+ fd = get_suitable_downloader(info, self.params)(self, self.params)
+ for ph in self._progress_hooks:
+ fd.add_progress_hook(ph)
+ if self.params.get('verbose'):
+ self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
+ return fd.download(name, info)
+
+ if info_dict.get('requested_formats') is not None:
+ downloaded = []
+ success = True
+ merger = FFmpegMergerPP(self)
+ if not merger.available:
+ postprocessors = []
+ self.report_warning('You have requested multiple '
+ 'formats but ffmpeg or avconv are not installed.'
+ ' The formats won\'t be merged.')
+ else:
+ postprocessors = [merger]
+
+ def compatible_formats(formats):
+ video, audio = formats
+ # Check extension
+ video_ext, audio_ext = video.get('ext'), audio.get('ext')
+ if video_ext and audio_ext:
+ COMPATIBLE_EXTS = (
+ ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
+ ('webm')
+ )
+ for exts in COMPATIBLE_EXTS:
+ if video_ext in exts and audio_ext in exts:
+ return True
+ # TODO: Check acodec/vcodec
+ return False
+
+ filename_real_ext = os.path.splitext(filename)[1][1:]
+ filename_wo_ext = (
+ os.path.splitext(filename)[0]
+ if filename_real_ext == info_dict['ext']
+ else filename)
+ requested_formats = info_dict['requested_formats']
+ if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
+ info_dict['ext'] = 'mkv'
+ self.report_warning(
+ 'Requested formats are incompatible for merge and will be merged into mkv.')
+ # Ensure filename always has a correct extension for successful merge
+ filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
+ if os.path.exists(encodeFilename(filename)):
+ self.to_screen(
+ '[download] %s has already been downloaded and '
+ 'merged' % filename)
+ else:
+ for f in requested_formats:
+ new_info = dict(info_dict)
+ new_info.update(f)
+ fname = prepend_extension(
+ self.prepare_filename(new_info),
+ 'f%s' % f['format_id'], new_info['ext'])
+ if not ensure_dir_exists(fname):
+ return
+ downloaded.append(fname)
+ partial_success = dl(fname, new_info)
+ success = success and partial_success
+ info_dict['__postprocessors'] = postprocessors
+ info_dict['__files_to_merge'] = downloaded
+ else:
+ # Just a single file
+ success = dl(filename, info_dict)
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self.report_error('unable to download video data: %s' % error_to_compat_str(err))
+ return
+ except (OSError, IOError) as err:
+ raise UnavailableVideoError(err)
+ except (ContentTooShortError, ) as err:
+ self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
+ return
+
+ if success and filename != '-':
+ # Fixup content
+ fixup_policy = self.params.get('fixup')
+ if fixup_policy is None:
+ fixup_policy = 'detect_or_warn'
+
+ INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
+
+ stretched_ratio = info_dict.get('stretched_ratio')
+ if stretched_ratio is not None and stretched_ratio != 1:
+ if fixup_policy == 'warn':
+ self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
+ info_dict['id'], stretched_ratio))
+ elif fixup_policy == 'detect_or_warn':
+ stretched_pp = FFmpegFixupStretchedPP(self)
+ if stretched_pp.available:
+ info_dict.setdefault('__postprocessors', [])
+ info_dict['__postprocessors'].append(stretched_pp)
+ else:
+ self.report_warning(
+ '%s: Non-uniform pixel ratio (%s). %s'
+ % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
+ else:
+ assert fixup_policy in ('ignore', 'never')
+
+ if (info_dict.get('requested_formats') is None and
+ info_dict.get('container') == 'm4a_dash'):
+ if fixup_policy == 'warn':
+ self.report_warning(
+ '%s: writing DASH m4a. '
+ 'Only some players support this container.'
+ % info_dict['id'])
+ elif fixup_policy == 'detect_or_warn':
+ fixup_pp = FFmpegFixupM4aPP(self)
+ if fixup_pp.available:
+ info_dict.setdefault('__postprocessors', [])
+ info_dict['__postprocessors'].append(fixup_pp)
+ else:
+ self.report_warning(
+ '%s: writing DASH m4a. '
+ 'Only some players support this container. %s'
+ % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
+ else:
+ assert fixup_policy in ('ignore', 'never')
+
+ if (info_dict.get('protocol') == 'm3u8_native' or
+ info_dict.get('protocol') == 'm3u8' and
+ self.params.get('hls_prefer_native')):
+ if fixup_policy == 'warn':
+ self.report_warning('%s: malformed AAC bitstream detected.' % (
+ info_dict['id']))
+ elif fixup_policy == 'detect_or_warn':
+ fixup_pp = FFmpegFixupM3u8PP(self)
+ if fixup_pp.available:
+ info_dict.setdefault('__postprocessors', [])
+ info_dict['__postprocessors'].append(fixup_pp)
+ else:
+ self.report_warning(
+ '%s: malformed AAC bitstream detected. %s'
+ % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
+ else:
+ assert fixup_policy in ('ignore', 'never')
+
+ try:
+ self.post_process(filename, info_dict)
+ except (PostProcessingError) as err:
+ self.report_error('postprocessing: %s' % str(err))
+ return
+ self.record_download_archive(info_dict)
+
+ def download(self, url_list):
+ """Download a given list of URLs."""
+ outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+ if (len(url_list) > 1 and
+ outtmpl != '-' and
+ '%' not in outtmpl and
+ self.params.get('max_downloads') != 1):
+ raise SameFileError(outtmpl)
+
+ for url in url_list:
+ try:
+ # It also downloads the videos
+ res = self.extract_info(
+ url, force_generic_extractor=self.params.get('force_generic_extractor', False))
+ except UnavailableVideoError:
+ self.report_error('unable to download video')
+ except MaxDownloadsReached:
+ self.to_screen('[info] Maximum number of downloaded files reached.')
+ raise
+ else:
+ if self.params.get('dump_single_json', False):
+ self.to_stdout(json.dumps(res))
+
+ return self._download_retcode
+
+ def download_with_info_file(self, info_filename):
+ with contextlib.closing(fileinput.FileInput(
+ [info_filename], mode='r',
+ openhook=fileinput.hook_encoded('utf-8'))) as f:
+ # FileInput doesn't have a read method, we can't call json.load
+ info = self.filter_requested_info(json.loads('\n'.join(f)))
+ try:
+ self.process_ie_result(info, download=True)
+ except DownloadError:
+ webpage_url = info.get('webpage_url')
+ if webpage_url is not None:
+ self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
+ return self.download([webpage_url])
+ else:
+ raise
+ return self._download_retcode
+
+ @staticmethod
+ def filter_requested_info(info_dict):
+ return dict(
+ (k, v) for k, v in info_dict.items()
+ if k not in ['requested_formats', 'requested_subtitles'])
+
+ def post_process(self, filename, ie_info):
+ """Run all the postprocessors on the given file."""
+ info = dict(ie_info)
+ info['filepath'] = filename
+ pps_chain = []
+ if ie_info.get('__postprocessors') is not None:
+ pps_chain.extend(ie_info['__postprocessors'])
+ pps_chain.extend(self._pps)
+ for pp in pps_chain:
+ files_to_delete = []
+ try:
+ files_to_delete, info = pp.run(info)
+ except PostProcessingError as e:
+ self.report_error(e.msg)
+ if files_to_delete and not self.params.get('keepvideo', False):
+ for old_filename in files_to_delete:
+ self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
+ try:
+ os.remove(encodeFilename(old_filename))
+ except (IOError, OSError):
+ self.report_warning('Unable to remove downloaded original file')
+
+ def _make_archive_id(self, info_dict):
+ # Future-proof against any change in case
+ # and backwards compatibility with prior versions
+ extractor = info_dict.get('extractor_key')
+ if extractor is None:
+ if 'id' in info_dict:
+ extractor = info_dict.get('ie_key') # key in a playlist
+ if extractor is None:
+ return None # Incomplete video information
+ return extractor.lower() + ' ' + info_dict['id']
+
+ def in_download_archive(self, info_dict):
+ fn = self.params.get('download_archive')
+ if fn is None:
+ return False
+
+ vid_id = self._make_archive_id(info_dict)
+ if vid_id is None:
+ return False # Incomplete video information
+
+ try:
+ with locked_file(fn, 'r', encoding='utf-8') as archive_file:
+ for line in archive_file:
+ if line.strip() == vid_id:
+ return True
+ except IOError as ioe:
+ if ioe.errno != errno.ENOENT:
+ raise
+ return False
+
+ def record_download_archive(self, info_dict):
+ fn = self.params.get('download_archive')
+ if fn is None:
+ return
+ vid_id = self._make_archive_id(info_dict)
+ assert vid_id
+ with locked_file(fn, 'a', encoding='utf-8') as archive_file:
+ archive_file.write(vid_id + '\n')
+
+ @staticmethod
+ def format_resolution(format, default='unknown'):
+ if format.get('vcodec') == 'none':
+ return 'audio only'
+ if format.get('resolution') is not None:
+ return format['resolution']
+ if format.get('height') is not None:
+ if format.get('width') is not None:
+ res = '%sx%s' % (format['width'], format['height'])
+ else:
+ res = '%sp' % format['height']
+ elif format.get('width') is not None:
+ res = '%dx?' % format['width']
+ else:
+ res = default
+ return res
+
+ def _format_note(self, fdict):
+ res = ''
+ if fdict.get('ext') in ['f4f', 'f4m']:
+ res += '(unsupported) '
+ if fdict.get('language'):
+ if res:
+ res += ' '
+ res += '[%s] ' % fdict['language']
+ if fdict.get('format_note') is not None:
+ res += fdict['format_note'] + ' '
+ if fdict.get('tbr') is not None:
+ res += '%4dk ' % fdict['tbr']
+ if fdict.get('container') is not None:
+ if res:
+ res += ', '
+ res += '%s container' % fdict['container']
+ if (fdict.get('vcodec') is not None and
+ fdict.get('vcodec') != 'none'):
+ if res:
+ res += ', '
+ res += fdict['vcodec']
+ if fdict.get('vbr') is not None:
+ res += '@'
+ elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
+ res += 'video@'
+ if fdict.get('vbr') is not None:
+ res += '%4dk' % fdict['vbr']
+ if fdict.get('fps') is not None:
+ if res:
+ res += ', '
+ res += '%sfps' % fdict['fps']
+ if fdict.get('acodec') is not None:
+ if res:
+ res += ', '
+ if fdict['acodec'] == 'none':
+ res += 'video only'
+ else:
+ res += '%-5s' % fdict['acodec']
+ elif fdict.get('abr') is not None:
+ if res:
+ res += ', '
+ res += 'audio'
+ if fdict.get('abr') is not None:
+ res += '@%3dk' % fdict['abr']
+ if fdict.get('asr') is not None:
+ res += ' (%5dHz)' % fdict['asr']
+ if fdict.get('filesize') is not None:
+ if res:
+ res += ', '
+ res += format_bytes(fdict['filesize'])
+ elif fdict.get('filesize_approx') is not None:
+ if res:
+ res += ', '
+ res += '~' + format_bytes(fdict['filesize_approx'])
+ return res
+
+ def list_formats(self, info_dict):
+ formats = info_dict.get('formats', [info_dict])
+ table = [
+ [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
+ for f in formats
+ if f.get('preference') is None or f['preference'] >= -1000]
+ if len(formats) > 1:
+ table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
+
+ header_line = ['format code', 'extension', 'resolution', 'note']
+ self.to_screen(
+ '[info] Available formats for %s:\n%s' %
+ (info_dict['id'], render_table(header_line, table)))
+
+ def list_thumbnails(self, info_dict):
+ thumbnails = info_dict.get('thumbnails')
+ if not thumbnails:
+ self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
+ return
+
+ self.to_screen(
+ '[info] Thumbnails for %s:' % info_dict['id'])
+ self.to_screen(render_table(
+ ['ID', 'width', 'height', 'URL'],
+ [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
+
+ def list_subtitles(self, video_id, subtitles, name='subtitles'):
+ if not subtitles:
+ self.to_screen('%s has no %s' % (video_id, name))
+ return
+ self.to_screen(
+ 'Available %s for %s:' % (name, video_id))
+ self.to_screen(render_table(
+ ['Language', 'formats'],
+ [[lang, ', '.join(f['ext'] for f in reversed(formats))]
+ for lang, formats in subtitles.items()]))
+
+ def urlopen(self, req):
+ """ Start an HTTP download """
+ if isinstance(req, compat_basestring):
+ req = sanitized_Request(req)
+ return self._opener.open(req, timeout=self._socket_timeout)
+
+ def print_debug_header(self):
+ if not self.params.get('verbose'):
+ return
+
+ if type('') is not compat_str:
+ # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
+ self.report_warning(
+ 'Your Python is broken! Update to a newer and supported version')
+
+ stdout_encoding = getattr(
+ sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
+ encoding_str = (
+ '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
+ locale.getpreferredencoding(),
+ sys.getfilesystemencoding(),
+ stdout_encoding,
+ self.get_encoding()))
+ write_string(encoding_str, encoding=None)
+
+ self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
+ if _LAZY_LOADER:
+ self._write_string('[debug] Lazy loading extractors enabled' + '\n')
+ try:
+ sp = subprocess.Popen(
+ ['git', 'rev-parse', '--short', 'HEAD'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ cwd=os.path.dirname(os.path.abspath(__file__)))
+ out, err = sp.communicate()
+ out = out.decode().strip()
+ if re.match('[0-9a-f]+', out):
+ self._write_string('[debug] Git HEAD: ' + out + '\n')
+ except Exception:
+ try:
+ sys.exc_clear()
+ except Exception:
+ pass
+
+ def python_implementation():
+ impl_name = platform.python_implementation()
+ if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
+ return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
+ return impl_name
+
+ self._write_string('[debug] Python version %s (%s) - %s\n' % (
+ platform.python_version(), python_implementation(),
+ platform_name()))
+
+ exe_versions = FFmpegPostProcessor.get_versions(self)
+ exe_versions['rtmpdump'] = rtmpdump_version()
+ exe_versions['phantomjs'] = PhantomJSwrapper._version()
+ exe_str = ', '.join(
+ '%s %s' % (exe, v)
+ for exe, v in sorted(exe_versions.items())
+ if v
+ )
+ if not exe_str:
+ exe_str = 'none'
+ self._write_string('[debug] exe versions: %s\n' % exe_str)
+
+ proxy_map = {}
+ for handler in self._opener.handlers:
+ if hasattr(handler, 'proxies'):
+ proxy_map.update(handler.proxies)
+ self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
+
+ if self.params.get('call_home', False):
+ ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
+ self._write_string('[debug] Public IP address: %s\n' % ipaddr)
+ latest_version = self.urlopen(
+ 'https://yt-dl.org/latest/version').read().decode('utf-8')
+ if version_tuple(latest_version) > version_tuple(__version__):
+ self.report_warning(
+ 'You are using an outdated version (newest version: %s)! '
+ 'See https://yt-dl.org/update if you need help updating.' %
+ latest_version)
+
+ def _setup_opener(self):
+ timeout_val = self.params.get('socket_timeout')
+ self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
+
+ opts_cookiefile = self.params.get('cookiefile')
+ opts_proxy = self.params.get('proxy')
+
+ if opts_cookiefile is None:
+ self.cookiejar = compat_cookiejar.CookieJar()
+ else:
+ opts_cookiefile = expand_path(opts_cookiefile)
+ self.cookiejar = compat_cookiejar.MozillaCookieJar(
+ opts_cookiefile)
+ if os.access(opts_cookiefile, os.R_OK):
+ self.cookiejar.load()
+
+ cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
+ if opts_proxy is not None:
+ if opts_proxy == '':
+ proxies = {}
+ else:
+ proxies = {'http': opts_proxy, 'https': opts_proxy}
+ else:
+ proxies = compat_urllib_request.getproxies()
+ # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
+ if 'http' in proxies and 'https' not in proxies:
+ proxies['https'] = proxies['http']
+ proxy_handler = PerRequestProxyHandler(proxies)
+
+ debuglevel = 1 if self.params.get('debug_printtraffic') else 0
+ https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
+ ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
+ data_handler = compat_urllib_request_DataHandler()
+
+ # When passing our own FileHandler instance, build_opener won't add the
+ # default FileHandler and allows us to disable the file protocol, which
+ # can be used for malicious purposes (see
+ # https://github.com/rg3/youtube-dl/issues/8227)
+ file_handler = compat_urllib_request.FileHandler()
+
+ def file_open(*args, **kwargs):
+ raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
+ file_handler.file_open = file_open
+
+ opener = compat_urllib_request.build_opener(
+ proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
+
+ # Delete the default user-agent header, which would otherwise apply in
+ # cases where our custom HTTP handler doesn't come into play
+ # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+ opener.addheaders = []
+ self._opener = opener
+
+ def encode(self, s):
+ if isinstance(s, bytes):
+ return s # Already encoded
+
+ try:
+ return s.encode(self.get_encoding())
+ except UnicodeEncodeError as err:
+ err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
+ raise
+
+ def get_encoding(self):
+ encoding = self.params.get('encoding')
+ if encoding is None:
+ encoding = preferredencoding()
+ return encoding
+
+ def _write_thumbnails(self, info_dict, filename):
+ if self.params.get('writethumbnail', False):
+ thumbnails = info_dict.get('thumbnails')
+ if thumbnails:
+ thumbnails = [thumbnails[-1]]
+ elif self.params.get('write_all_thumbnails', False):
+ thumbnails = info_dict.get('thumbnails')
+ else:
+ return
+
+ if not thumbnails:
+ # No thumbnails present, so return immediately
+ return
+
+ for t in thumbnails:
+ thumb_ext = determine_ext(t['url'], 'jpg')
+ suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
+ thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
+ t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
+ self.to_screen('[%s] %s: Thumbnail %sis already present' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id))
+ else:
+ self.to_screen('[%s] %s: Downloading thumbnail %s...' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id))
+ try:
+ uf = self.urlopen(t['url'])
+ with open(encodeFilename(thumb_filename), 'wb') as thumbf:
+ shutil.copyfileobj(uf, thumbf)
+ self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self.report_warning('Unable to download thumbnail "%s": %s' %
+ (t['url'], error_to_compat_str(err)))
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
new file mode 100644
index 0000000..ba435ea
--- /dev/null
+++ b/youtube_dl/__init__.py
@@ -0,0 +1,481 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+__license__ = 'Public Domain'
+
+import codecs
+import io
+import os
+import random
+import sys
+
+
+from .options import (
+ parseOpts,
+)
+from .compat import (
+ compat_getpass,
+ compat_shlex_split,
+ workaround_optparse_bug9161,
+)
+from .utils import (
+ DateRange,
+ decodeOption,
+ DEFAULT_OUTTMPL,
+ DownloadError,
+ expand_path,
+ match_filter_func,
+ MaxDownloadsReached,
+ preferredencoding,
+ read_batch_urls,
+ SameFileError,
+ setproctitle,
+ std_headers,
+ write_string,
+ render_table,
+)
+from .update import update_self
+from .downloader import (
+ FileDownloader,
+)
+from .extractor import gen_extractors, list_extractors
+from .extractor.adobepass import MSO_INFO
+from .YoutubeDL import YoutubeDL
+
+
+def _real_main(argv=None):
+ # Compatibility fixes for Windows
+ if sys.platform == 'win32':
+ # https://github.com/rg3/youtube-dl/issues/820
+ codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
+
+ workaround_optparse_bug9161()
+
+ setproctitle('youtube-dl')
+
+ parser, opts, args = parseOpts(argv)
+
+ # Set user agent
+ if opts.user_agent is not None:
+ std_headers['User-Agent'] = opts.user_agent
+
+ # Set referer
+ if opts.referer is not None:
+ std_headers['Referer'] = opts.referer
+
+ # Custom HTTP headers
+ if opts.headers is not None:
+ for h in opts.headers:
+ if ':' not in h:
+ parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
+ key, value = h.split(':', 1)
+ if opts.verbose:
+ write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
+ std_headers[key] = value
+
+ # Dump user agent
+ if opts.dump_user_agent:
+ write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
+ sys.exit(0)
+
+ # Batch file verification
+ batch_urls = []
+ if opts.batchfile is not None:
+ try:
+ if opts.batchfile == '-':
+ batchfd = sys.stdin
+ else:
+ batchfd = io.open(
+ expand_path(opts.batchfile),
+ 'r', encoding='utf-8', errors='ignore')
+ batch_urls = read_batch_urls(batchfd)
+ if opts.verbose:
+ write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
+ except IOError:
+ sys.exit('ERROR: batch file could not be read')
+ all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
+ _enc = preferredencoding()
+ all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
+
+ if opts.list_extractors:
+ for ie in list_extractors(opts.age_limit):
+ write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
+ matchedUrls = [url for url in all_urls if ie.suitable(url)]
+ for mu in matchedUrls:
+ write_string(' ' + mu + '\n', out=sys.stdout)
+ sys.exit(0)
+ if opts.list_extractor_descriptions:
+ for ie in list_extractors(opts.age_limit):
+ if not ie._WORKING:
+ continue
+ desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
+ if desc is False:
+ continue
+ if hasattr(ie, 'SEARCH_KEY'):
+ _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
+ _COUNTS = ('', '5', '10', 'all')
+ desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
+ write_string(desc + '\n', out=sys.stdout)
+ sys.exit(0)
+ if opts.ap_list_mso:
+ table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]
+ write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout)
+ sys.exit(0)
+
+ # Conflicting, missing and erroneous options
+ if opts.usenetrc and (opts.username is not None or opts.password is not None):
+ parser.error('using .netrc conflicts with giving username/password')
+ if opts.password is not None and opts.username is None:
+ parser.error('account username missing\n')
+ if opts.ap_password is not None and opts.ap_username is None:
+ parser.error('TV Provider account username missing\n')
+ if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
+ parser.error('using output template conflicts with using title, video ID or auto number')
+ if opts.autonumber_size is not None:
+ if opts.autonumber_size <= 0:
+ parser.error('auto number size must be positive')
+ if opts.autonumber_start is not None:
+ if opts.autonumber_start < 0:
+ parser.error('auto number start must be positive or 0')
+ if opts.usetitle and opts.useid:
+ parser.error('using title conflicts with using video ID')
+ if opts.username is not None and opts.password is None:
+ opts.password = compat_getpass('Type account password and press [Return]: ')
+ if opts.ap_username is not None and opts.ap_password is None:
+ opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
+ if opts.ratelimit is not None:
+ numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
+ if numeric_limit is None:
+ parser.error('invalid rate limit specified')
+ opts.ratelimit = numeric_limit
+ if opts.min_filesize is not None:
+ numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
+ if numeric_limit is None:
+ parser.error('invalid min_filesize specified')
+ opts.min_filesize = numeric_limit
+ if opts.max_filesize is not None:
+ numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
+ if numeric_limit is None:
+ parser.error('invalid max_filesize specified')
+ opts.max_filesize = numeric_limit
+ if opts.sleep_interval is not None:
+ if opts.sleep_interval < 0:
+ parser.error('sleep interval must be positive or 0')
+ if opts.max_sleep_interval is not None:
+ if opts.max_sleep_interval < 0:
+ parser.error('max sleep interval must be positive or 0')
+ if opts.max_sleep_interval < opts.sleep_interval:
+ parser.error('max sleep interval must be greater than or equal to min sleep interval')
+ else:
+ opts.max_sleep_interval = opts.sleep_interval
+ if opts.ap_mso and opts.ap_mso not in MSO_INFO:
+ parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
+
+ def parse_retries(retries):
+ if retries in ('inf', 'infinite'):
+ parsed_retries = float('inf')
+ else:
+ try:
+ parsed_retries = int(retries)
+ except (TypeError, ValueError):
+ parser.error('invalid retry count specified')
+ return parsed_retries
+ if opts.retries is not None:
+ opts.retries = parse_retries(opts.retries)
+ if opts.fragment_retries is not None:
+ opts.fragment_retries = parse_retries(opts.fragment_retries)
+ if opts.buffersize is not None:
+ numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
+ if numeric_buffersize is None:
+ parser.error('invalid buffer size specified')
+ opts.buffersize = numeric_buffersize
+ if opts.http_chunk_size is not None:
+ numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size)
+ if not numeric_chunksize:
+ parser.error('invalid http chunk size specified')
+ opts.http_chunk_size = numeric_chunksize
+ if opts.playliststart <= 0:
+ raise ValueError('Playlist start must be positive')
+ if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
+ raise ValueError('Playlist end must be greater than playlist start')
+ if opts.extractaudio:
+ if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
+ parser.error('invalid audio format specified')
+ if opts.audioquality:
+ opts.audioquality = opts.audioquality.strip('k').strip('K')
+ if not opts.audioquality.isdigit():
+ parser.error('invalid audio quality specified')
+ if opts.recodevideo is not None:
+ if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
+ parser.error('invalid video recode format specified')
+ if opts.convertsubtitles is not None:
+ if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
+ parser.error('invalid subtitle format specified')
+
+ if opts.date is not None:
+ date = DateRange.day(opts.date)
+ else:
+ date = DateRange(opts.dateafter, opts.datebefore)
+
+ # Do not download videos when there are audio-only formats
+ if opts.extractaudio and not opts.keepvideo and opts.format is None:
+ opts.format = 'bestaudio/best'
+
+ # --all-sub automatically sets --write-sub if --write-auto-sub is not given
+ # this was the old behaviour if only --all-sub was given.
+ if opts.allsubtitles and not opts.writeautomaticsub:
+ opts.writesubtitles = True
+
+ outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
+ (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
+ (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
+ (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
+ (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
+ (opts.useid and '%(id)s.%(ext)s') or
+ (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
+ DEFAULT_OUTTMPL)
+ if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
+ parser.error('Cannot download a video and extract audio into the same'
+ ' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
+ ' template'.format(outtmpl))
+
+ any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
+ any_printing = opts.print_json
+ download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
+
+ # PostProcessors
+ postprocessors = []
+ if opts.metafromtitle:
+ postprocessors.append({
+ 'key': 'MetadataFromTitle',
+ 'titleformat': opts.metafromtitle
+ })
+ if opts.extractaudio:
+ postprocessors.append({
+ 'key': 'FFmpegExtractAudio',
+ 'preferredcodec': opts.audioformat,
+ 'preferredquality': opts.audioquality,
+ 'nopostoverwrites': opts.nopostoverwrites,
+ })
+ if opts.recodevideo:
+ postprocessors.append({
+ 'key': 'FFmpegVideoConvertor',
+ 'preferedformat': opts.recodevideo,
+ })
+ # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
+ # FFmpegExtractAudioPP as containers before conversion may not support
+ # metadata (3gp, webm, etc.)
+ # And this post-processor should be placed before other metadata
+ # manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of
+ # extra metadata. By default ffmpeg preserves metadata applicable for both
+ # source and target containers. From this point the container won't change,
+ # so metadata can be added here.
+ if opts.addmetadata:
+ postprocessors.append({'key': 'FFmpegMetadata'})
+ if opts.convertsubtitles:
+ postprocessors.append({
+ 'key': 'FFmpegSubtitlesConvertor',
+ 'format': opts.convertsubtitles,
+ })
+ if opts.embedsubtitles:
+ postprocessors.append({
+ 'key': 'FFmpegEmbedSubtitle',
+ })
+ if opts.embedthumbnail:
+ already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
+ postprocessors.append({
+ 'key': 'EmbedThumbnail',
+ 'already_have_thumbnail': already_have_thumbnail
+ })
+ if not already_have_thumbnail:
+ opts.writethumbnail = True
+ # XAttrMetadataPP should be run after post-processors that may change file
+ # contents
+ if opts.xattrs:
+ postprocessors.append({'key': 'XAttrMetadata'})
+ # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
+ # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
+ if opts.exec_cmd:
+ postprocessors.append({
+ 'key': 'ExecAfterDownload',
+ 'exec_cmd': opts.exec_cmd,
+ })
+ external_downloader_args = None
+ if opts.external_downloader_args:
+ external_downloader_args = compat_shlex_split(opts.external_downloader_args)
+ postprocessor_args = None
+ if opts.postprocessor_args:
+ postprocessor_args = compat_shlex_split(opts.postprocessor_args)
+ match_filter = (
+ None if opts.match_filter is None
+ else match_filter_func(opts.match_filter))
+
+ ydl_opts = {
+ 'usenetrc': opts.usenetrc,
+ 'username': opts.username,
+ 'password': opts.password,
+ 'twofactor': opts.twofactor,
+ 'videopassword': opts.videopassword,
+ 'ap_mso': opts.ap_mso,
+ 'ap_username': opts.ap_username,
+ 'ap_password': opts.ap_password,
+ 'quiet': (opts.quiet or any_getting or any_printing),
+ 'no_warnings': opts.no_warnings,
+ 'forceurl': opts.geturl,
+ 'forcetitle': opts.gettitle,
+ 'forceid': opts.getid,
+ 'forcethumbnail': opts.getthumbnail,
+ 'forcedescription': opts.getdescription,
+ 'forceduration': opts.getduration,
+ 'forcefilename': opts.getfilename,
+ 'forceformat': opts.getformat,
+ 'forcejson': opts.dumpjson or opts.print_json,
+ 'dump_single_json': opts.dump_single_json,
+ 'simulate': opts.simulate or any_getting,
+ 'skip_download': opts.skip_download,
+ 'format': opts.format,
+ 'listformats': opts.listformats,
+ 'outtmpl': outtmpl,
+ 'autonumber_size': opts.autonumber_size,
+ 'autonumber_start': opts.autonumber_start,
+ 'restrictfilenames': opts.restrictfilenames,
+ 'ignoreerrors': opts.ignoreerrors,
+ 'force_generic_extractor': opts.force_generic_extractor,
+ 'ratelimit': opts.ratelimit,
+ 'nooverwrites': opts.nooverwrites,
+ 'retries': opts.retries,
+ 'fragment_retries': opts.fragment_retries,
+ 'skip_unavailable_fragments': opts.skip_unavailable_fragments,
+ 'keep_fragments': opts.keep_fragments,
+ 'buffersize': opts.buffersize,
+ 'noresizebuffer': opts.noresizebuffer,
+ 'http_chunk_size': opts.http_chunk_size,
+ 'continuedl': opts.continue_dl,
+ 'noprogress': opts.noprogress,
+ 'progress_with_newline': opts.progress_with_newline,
+ 'playliststart': opts.playliststart,
+ 'playlistend': opts.playlistend,
+ 'playlistreverse': opts.playlist_reverse,
+ 'playlistrandom': opts.playlist_random,
+ 'noplaylist': opts.noplaylist,
+ 'logtostderr': opts.outtmpl == '-',
+ 'consoletitle': opts.consoletitle,
+ 'nopart': opts.nopart,
+ 'updatetime': opts.updatetime,
+ 'writedescription': opts.writedescription,
+ 'writeannotations': opts.writeannotations,
+ 'writeinfojson': opts.writeinfojson,
+ 'writethumbnail': opts.writethumbnail,
+ 'write_all_thumbnails': opts.write_all_thumbnails,
+ 'writesubtitles': opts.writesubtitles,
+ 'writeautomaticsub': opts.writeautomaticsub,
+ 'allsubtitles': opts.allsubtitles,
+ 'listsubtitles': opts.listsubtitles,
+ 'subtitlesformat': opts.subtitlesformat,
+ 'subtitleslangs': opts.subtitleslangs,
+ 'matchtitle': decodeOption(opts.matchtitle),
+ 'rejecttitle': decodeOption(opts.rejecttitle),
+ 'max_downloads': opts.max_downloads,
+ 'prefer_free_formats': opts.prefer_free_formats,
+ 'verbose': opts.verbose,
+ 'dump_intermediate_pages': opts.dump_intermediate_pages,
+ 'write_pages': opts.write_pages,
+ 'test': opts.test,
+ 'keepvideo': opts.keepvideo,
+ 'min_filesize': opts.min_filesize,
+ 'max_filesize': opts.max_filesize,
+ 'min_views': opts.min_views,
+ 'max_views': opts.max_views,
+ 'daterange': date,
+ 'cachedir': opts.cachedir,
+ 'youtube_print_sig_code': opts.youtube_print_sig_code,
+ 'age_limit': opts.age_limit,
+ 'download_archive': download_archive_fn,
+ 'cookiefile': opts.cookiefile,
+ 'nocheckcertificate': opts.no_check_certificate,
+ 'prefer_insecure': opts.prefer_insecure,
+ 'proxy': opts.proxy,
+ 'socket_timeout': opts.socket_timeout,
+ 'bidi_workaround': opts.bidi_workaround,
+ 'debug_printtraffic': opts.debug_printtraffic,
+ 'prefer_ffmpeg': opts.prefer_ffmpeg,
+ 'include_ads': opts.include_ads,
+ 'default_search': opts.default_search,
+ 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
+ 'encoding': opts.encoding,
+ 'extract_flat': opts.extract_flat,
+ 'mark_watched': opts.mark_watched,
+ 'merge_output_format': opts.merge_output_format,
+ 'postprocessors': postprocessors,
+ 'fixup': opts.fixup,
+ 'source_address': opts.source_address,
+ 'call_home': opts.call_home,
+ 'sleep_interval': opts.sleep_interval,
+ 'max_sleep_interval': opts.max_sleep_interval,
+ 'external_downloader': opts.external_downloader,
+ 'list_thumbnails': opts.list_thumbnails,
+ 'playlist_items': opts.playlist_items,
+ 'xattr_set_filesize': opts.xattr_set_filesize,
+ 'match_filter': match_filter,
+ 'no_color': opts.no_color,
+ 'ffmpeg_location': opts.ffmpeg_location,
+ 'hls_prefer_native': opts.hls_prefer_native,
+ 'hls_use_mpegts': opts.hls_use_mpegts,
+ 'external_downloader_args': external_downloader_args,
+ 'postprocessor_args': postprocessor_args,
+ 'cn_verification_proxy': opts.cn_verification_proxy,
+ 'geo_verification_proxy': opts.geo_verification_proxy,
+ 'config_location': opts.config_location,
+ 'geo_bypass': opts.geo_bypass,
+ 'geo_bypass_country': opts.geo_bypass_country,
+ 'geo_bypass_ip_block': opts.geo_bypass_ip_block,
+ # just for deprecation check
+ 'autonumber': opts.autonumber if opts.autonumber is True else None,
+ 'usetitle': opts.usetitle if opts.usetitle is True else None,
+ }
+
+ with YoutubeDL(ydl_opts) as ydl:
+ # Update version
+ if opts.update_self:
+ update_self(ydl.to_screen, opts.verbose, ydl._opener)
+
+ # Remove cache dir
+ if opts.rm_cachedir:
+ ydl.cache.remove()
+
+ # Maybe do nothing
+ if (len(all_urls) < 1) and (opts.load_info_filename is None):
+ if opts.update_self or opts.rm_cachedir:
+ sys.exit()
+
+ ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
+ parser.error(
+ 'You must provide at least one URL.\n'
+ 'Type youtube-dl --help to see a list of all options.')
+
+ try:
+ if opts.load_info_filename is not None:
+ retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
+ else:
+ retcode = ydl.download(all_urls)
+ except MaxDownloadsReached:
+ ydl.to_screen('--max-download limit reached, aborting.')
+ retcode = 101
+
+ sys.exit(retcode)
+
+
+def main(argv=None):
+ try:
+ _real_main(argv)
+ except DownloadError:
+ sys.exit(1)
+ except SameFileError:
+ sys.exit('ERROR: fixed output name but more than one file to download')
+ except KeyboardInterrupt:
+ sys.exit('\nERROR: Interrupted by user')
+
+
+__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
diff --git a/youtube_dl/__main__.py b/youtube_dl/__main__.py
new file mode 100644
index 0000000..138f5fb
--- /dev/null
+++ b/youtube_dl/__main__.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+# Execute with
+# $ python youtube_dl/__main__.py (2.6+)
+# $ python -m youtube_dl (2.7+)
+
+import sys
+
+if __package__ is None and not hasattr(sys, 'frozen'):
+ # direct call of __main__.py
+ import os.path
+ path = os.path.realpath(os.path.abspath(__file__))
+ sys.path.insert(0, os.path.dirname(os.path.dirname(path)))
+
+import youtube_dl
+
+if __name__ == '__main__':
+ youtube_dl.main()
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py
new file mode 100644
index 0000000..461bb6d
--- /dev/null
+++ b/youtube_dl/aes.py
@@ -0,0 +1,361 @@
+from __future__ import unicode_literals
+
+from math import ceil
+
+from .compat import compat_b64decode
+from .utils import bytes_to_intlist, intlist_to_bytes
+
+BLOCK_SIZE_BYTES = 16
+
+
+def aes_ctr_decrypt(data, key, counter):
+ """
+ Decrypt with aes in counter mode
+
+ @param {int[]} data cipher
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block)
+ returns the next counter block
+ @returns {int[]} decrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ decrypted_data = []
+ for i in range(block_count):
+ counter_block = counter.next_value()
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ block += [0] * (BLOCK_SIZE_BYTES - len(block))
+
+ cipher_counter_block = aes_encrypt(counter_block, expanded_key)
+ decrypted_data += xor(block, cipher_counter_block)
+ decrypted_data = decrypted_data[:len(data)]
+
+ return decrypted_data
+
+
+def aes_cbc_decrypt(data, key, iv):
+ """
+ Decrypt with aes in CBC mode
+
+ @param {int[]} data cipher
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte IV
+ @returns {int[]} decrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ decrypted_data = []
+ previous_cipher_block = iv
+ for i in range(block_count):
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ block += [0] * (BLOCK_SIZE_BYTES - len(block))
+
+ decrypted_block = aes_decrypt(block, expanded_key)
+ decrypted_data += xor(decrypted_block, previous_cipher_block)
+ previous_cipher_block = block
+ decrypted_data = decrypted_data[:len(data)]
+
+ return decrypted_data
+
+
+def aes_cbc_encrypt(data, key, iv):
+ """
+ Encrypt with aes in CBC mode. Using PKCS#7 padding
+
+ @param {int[]} data cleartext
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte IV
+ @returns {int[]} encrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ encrypted_data = []
+ previous_cipher_block = iv
+ for i in range(block_count):
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ remaining_length = BLOCK_SIZE_BYTES - len(block)
+ block += [remaining_length] * remaining_length
+ mixed_block = xor(block, previous_cipher_block)
+
+ encrypted_block = aes_encrypt(mixed_block, expanded_key)
+ encrypted_data += encrypted_block
+
+ previous_cipher_block = encrypted_block
+
+ return encrypted_data
+
+
+def key_expansion(data):
+ """
+ Generate key schedule
+
+ @param {int[]} data 16/24/32-Byte cipher key
+ @returns {int[]} 176/208/240-Byte expanded key
+ """
+ data = data[:] # copy
+ rcon_iteration = 1
+ key_size_bytes = len(data)
+ expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
+
+ while len(data) < expanded_key_size_bytes:
+ temp = data[-4:]
+ temp = key_schedule_core(temp, rcon_iteration)
+ rcon_iteration += 1
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ for _ in range(3):
+ temp = data[-4:]
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ if key_size_bytes == 32:
+ temp = data[-4:]
+ temp = sub_bytes(temp)
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
+ temp = data[-4:]
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+ data = data[:expanded_key_size_bytes]
+
+ return data
+
+
+def aes_encrypt(data, expanded_key):
+ """
+ Encrypt one block with aes
+
+ @param {int[]} data 16-Byte state
+ @param {int[]} expanded_key 176/208/240-Byte expanded key
+ @returns {int[]} 16-Byte cipher
+ """
+ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+
+ data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
+ for i in range(1, rounds + 1):
+ data = sub_bytes(data)
+ data = shift_rows(data)
+ if i != rounds:
+ data = mix_columns(data)
+ data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
+
+ return data
+
+
+def aes_decrypt(data, expanded_key):
+ """
+ Decrypt one block with aes
+
+ @param {int[]} data 16-Byte cipher
+ @param {int[]} expanded_key 176/208/240-Byte expanded key
+ @returns {int[]} 16-Byte state
+ """
+ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+
+ for i in range(rounds, 0, -1):
+ data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
+ if i != rounds:
+ data = mix_columns_inv(data)
+ data = shift_rows_inv(data)
+ data = sub_bytes_inv(data)
+ data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
+
+ return data
+
+
+def aes_decrypt_text(data, password, key_size_bytes):
+ """
+ Decrypt text
+ - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
+ - The cipher key is retrieved by encrypting the first 16 Byte of 'password'
+ with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
+ - Mode of operation is 'counter'
+
+ @param {str} data Base64 encoded string
+ @param {str,unicode} password Password (will be encoded with utf-8)
+ @param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
+ @returns {str} Decrypted data
+ """
+ NONCE_LENGTH_BYTES = 8
+
+ data = bytes_to_intlist(compat_b64decode(data))
+ password = bytes_to_intlist(password.encode('utf-8'))
+
+ key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
+ key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
+
+ nonce = data[:NONCE_LENGTH_BYTES]
+ cipher = data[NONCE_LENGTH_BYTES:]
+
+ class Counter(object):
+ __value = nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
+
+ def next_value(self):
+ temp = self.__value
+ self.__value = inc(self.__value)
+ return temp
+
+ decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
+ plaintext = intlist_to_bytes(decrypted_data)
+
+ return plaintext
+
+
+RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
+SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+ 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+ 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+ 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+ 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+ 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+ 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+ 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+ 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+ 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+ 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+ 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+ 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+ 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+ 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+ 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
+SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d)
+MIX_COLUMN_MATRIX = ((0x2, 0x3, 0x1, 0x1),
+ (0x1, 0x2, 0x3, 0x1),
+ (0x1, 0x1, 0x2, 0x3),
+ (0x3, 0x1, 0x1, 0x2))
+MIX_COLUMN_MATRIX_INV = ((0xE, 0xB, 0xD, 0x9),
+ (0x9, 0xE, 0xB, 0xD),
+ (0xD, 0x9, 0xE, 0xB),
+ (0xB, 0xD, 0x9, 0xE))
+RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
+ 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
+ 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
+ 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD,
+ 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88,
+ 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A,
+ 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3,
+ 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0,
+ 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41,
+ 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75,
+ 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80,
+ 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54,
+ 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA,
+ 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E,
+ 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17,
+ 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01)
+RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
+ 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
+ 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
+ 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
+ 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
+ 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
+ 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
+ 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
+ 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
+ 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
+ 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
+ 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
+ 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
+ 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
+ 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
+ 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
+
+
+def sub_bytes(data):
+ return [SBOX[x] for x in data]
+
+
+def sub_bytes_inv(data):
+ return [SBOX_INV[x] for x in data]
+
+
+def rotate(data):
+ return data[1:] + [data[0]]
+
+
+def key_schedule_core(data, rcon_iteration):
+ data = rotate(data)
+ data = sub_bytes(data)
+ data[0] = data[0] ^ RCON[rcon_iteration]
+
+ return data
+
+
+def xor(data1, data2):
+ return [x ^ y for x, y in zip(data1, data2)]
+
+
+def rijndael_mul(a, b):
+ if(a == 0 or b == 0):
+ return 0
+ return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
+
+
+def mix_column(data, matrix):
+ data_mixed = []
+ for row in range(4):
+ mixed = 0
+ for column in range(4):
+ # xor is (+) and (-)
+ mixed ^= rijndael_mul(data[column], matrix[row][column])
+ data_mixed.append(mixed)
+ return data_mixed
+
+
+def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
+ data_mixed = []
+ for i in range(4):
+ column = data[i * 4: (i + 1) * 4]
+ data_mixed += mix_column(column, matrix)
+ return data_mixed
+
+
+def mix_columns_inv(data):
+ return mix_columns(data, MIX_COLUMN_MATRIX_INV)
+
+
+def shift_rows(data):
+ data_shifted = []
+ for column in range(4):
+ for row in range(4):
+ data_shifted.append(data[((column + row) & 0b11) * 4 + row])
+ return data_shifted
+
+
+def shift_rows_inv(data):
+ data_shifted = []
+ for column in range(4):
+ for row in range(4):
+ data_shifted.append(data[((column - row) & 0b11) * 4 + row])
+ return data_shifted
+
+
+def inc(data):
+ data = data[:] # copy
+ for i in range(len(data) - 1, -1, -1):
+ if data[i] == 255:
+ data[i] = 0
+ else:
+ data[i] = data[i] + 1
+ break
+ return data
+
+
+__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py
new file mode 100644
index 0000000..7bdade1
--- /dev/null
+++ b/youtube_dl/cache.py
@@ -0,0 +1,96 @@
+from __future__ import unicode_literals
+
+import errno
+import io
+import json
+import os
+import re
+import shutil
+import traceback
+
+from .compat import compat_getenv
+from .utils import (
+ expand_path,
+ write_json_file,
+)
+
+
+class Cache(object):
+ def __init__(self, ydl):
+ self._ydl = ydl
+
+ def _get_root_dir(self):
+ res = self._ydl.params.get('cachedir')
+ if res is None:
+ cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
+ res = os.path.join(cache_root, 'youtube-dl')
+ return expand_path(res)
+
+ def _get_cache_fn(self, section, key, dtype):
+ assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
+ 'invalid section %r' % section
+ assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
+ return os.path.join(
+ self._get_root_dir(), section, '%s.%s' % (key, dtype))
+
+ @property
+ def enabled(self):
+ return self._ydl.params.get('cachedir') is not False
+
+ def store(self, section, key, data, dtype='json'):
+ assert dtype in ('json',)
+
+ if not self.enabled:
+ return
+
+ fn = self._get_cache_fn(section, key, dtype)
+ try:
+ try:
+ os.makedirs(os.path.dirname(fn))
+ except OSError as ose:
+ if ose.errno != errno.EEXIST:
+ raise
+ write_json_file(data, fn)
+ except Exception:
+ tb = traceback.format_exc()
+ self._ydl.report_warning(
+ 'Writing cache to %r failed: %s' % (fn, tb))
+
+ def load(self, section, key, dtype='json', default=None):
+ assert dtype in ('json',)
+
+ if not self.enabled:
+ return default
+
+ cache_fn = self._get_cache_fn(section, key, dtype)
+ try:
+ try:
+ with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
+ return json.load(cachef)
+ except ValueError:
+ try:
+ file_size = os.path.getsize(cache_fn)
+ except (OSError, IOError) as oe:
+ file_size = str(oe)
+ self._ydl.report_warning(
+ 'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
+ except IOError:
+ pass # No cache available
+
+ return default
+
+ def remove(self):
+ if not self.enabled:
+ self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
+ return
+
+ cachedir = self._get_root_dir()
+ if not any((term in cachedir) for term in ('cache', 'tmp')):
+ raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
+
+ self._ydl.to_screen(
+ 'Removing cache dir %s .' % cachedir, skip_eol=True)
+ if os.path.exists(cachedir):
+ self._ydl.to_screen('.', skip_eol=True)
+ shutil.rmtree(cachedir)
+ self._ydl.to_screen('.')
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
new file mode 100644
index 0000000..7b77034
--- /dev/null
+++ b/youtube_dl/compat.py
@@ -0,0 +1,3016 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import binascii
+import collections
+import ctypes
+import email
+import getpass
+import io
+import itertools
+import optparse
+import os
+import platform
+import re
+import shlex
+import shutil
+import socket
+import struct
+import subprocess
+import sys
+import xml.etree.ElementTree
+
+
+try:
+ import urllib.request as compat_urllib_request
+except ImportError: # Python 2
+ import urllib2 as compat_urllib_request
+
+try:
+ import urllib.error as compat_urllib_error
+except ImportError: # Python 2
+ import urllib2 as compat_urllib_error
+
+try:
+ import urllib.parse as compat_urllib_parse
+except ImportError: # Python 2
+ import urllib as compat_urllib_parse
+
+try:
+ from urllib.parse import urlparse as compat_urllib_parse_urlparse
+except ImportError: # Python 2
+ from urlparse import urlparse as compat_urllib_parse_urlparse
+
+try:
+ import urllib.parse as compat_urlparse
+except ImportError: # Python 2
+ import urlparse as compat_urlparse
+
+try:
+ import urllib.response as compat_urllib_response
+except ImportError: # Python 2
+ import urllib as compat_urllib_response
+
+try:
+ import http.cookiejar as compat_cookiejar
+except ImportError: # Python 2
+ import cookielib as compat_cookiejar
+
+try:
+ import http.cookies as compat_cookies
+except ImportError: # Python 2
+ import Cookie as compat_cookies
+
+try:
+ import html.entities as compat_html_entities
+except ImportError: # Python 2
+ import htmlentitydefs as compat_html_entities
+
+try: # Python >= 3.3
+ compat_html_entities_html5 = compat_html_entities.html5
+except AttributeError:
+ # Copied from CPython 3.5.1 html/entities.py
+ compat_html_entities_html5 = {
+ 'Aacute': '\xc1',
+ 'aacute': '\xe1',
+ 'Aacute;': '\xc1',
+ 'aacute;': '\xe1',
+ 'Abreve;': '\u0102',
+ 'abreve;': '\u0103',
+ 'ac;': '\u223e',
+ 'acd;': '\u223f',
+ 'acE;': '\u223e\u0333',
+ 'Acirc': '\xc2',
+ 'acirc': '\xe2',
+ 'Acirc;': '\xc2',
+ 'acirc;': '\xe2',
+ 'acute': '\xb4',
+ 'acute;': '\xb4',
+ 'Acy;': '\u0410',
+ 'acy;': '\u0430',
+ 'AElig': '\xc6',
+ 'aelig': '\xe6',
+ 'AElig;': '\xc6',
+ 'aelig;': '\xe6',
+ 'af;': '\u2061',
+ 'Afr;': '\U0001d504',
+ 'afr;': '\U0001d51e',
+ 'Agrave': '\xc0',
+ 'agrave': '\xe0',
+ 'Agrave;': '\xc0',
+ 'agrave;': '\xe0',
+ 'alefsym;': '\u2135',
+ 'aleph;': '\u2135',
+ 'Alpha;': '\u0391',
+ 'alpha;': '\u03b1',
+ 'Amacr;': '\u0100',
+ 'amacr;': '\u0101',
+ 'amalg;': '\u2a3f',
+ 'AMP': '&',
+ 'amp': '&',
+ 'AMP;': '&',
+ 'amp;': '&',
+ 'And;': '\u2a53',
+ 'and;': '\u2227',
+ 'andand;': '\u2a55',
+ 'andd;': '\u2a5c',
+ 'andslope;': '\u2a58',
+ 'andv;': '\u2a5a',
+ 'ang;': '\u2220',
+ 'ange;': '\u29a4',
+ 'angle;': '\u2220',
+ 'angmsd;': '\u2221',
+ 'angmsdaa;': '\u29a8',
+ 'angmsdab;': '\u29a9',
+ 'angmsdac;': '\u29aa',
+ 'angmsdad;': '\u29ab',
+ 'angmsdae;': '\u29ac',
+ 'angmsdaf;': '\u29ad',
+ 'angmsdag;': '\u29ae',
+ 'angmsdah;': '\u29af',
+ 'angrt;': '\u221f',
+ 'angrtvb;': '\u22be',
+ 'angrtvbd;': '\u299d',
+ 'angsph;': '\u2222',
+ 'angst;': '\xc5',
+ 'angzarr;': '\u237c',
+ 'Aogon;': '\u0104',
+ 'aogon;': '\u0105',
+ 'Aopf;': '\U0001d538',
+ 'aopf;': '\U0001d552',
+ 'ap;': '\u2248',
+ 'apacir;': '\u2a6f',
+ 'apE;': '\u2a70',
+ 'ape;': '\u224a',
+ 'apid;': '\u224b',
+ 'apos;': "'",
+ 'ApplyFunction;': '\u2061',
+ 'approx;': '\u2248',
+ 'approxeq;': '\u224a',
+ 'Aring': '\xc5',
+ 'aring': '\xe5',
+ 'Aring;': '\xc5',
+ 'aring;': '\xe5',
+ 'Ascr;': '\U0001d49c',
+ 'ascr;': '\U0001d4b6',
+ 'Assign;': '\u2254',
+ 'ast;': '*',
+ 'asymp;': '\u2248',
+ 'asympeq;': '\u224d',
+ 'Atilde': '\xc3',
+ 'atilde': '\xe3',
+ 'Atilde;': '\xc3',
+ 'atilde;': '\xe3',
+ 'Auml': '\xc4',
+ 'auml': '\xe4',
+ 'Auml;': '\xc4',
+ 'auml;': '\xe4',
+ 'awconint;': '\u2233',
+ 'awint;': '\u2a11',
+ 'backcong;': '\u224c',
+ 'backepsilon;': '\u03f6',
+ 'backprime;': '\u2035',
+ 'backsim;': '\u223d',
+ 'backsimeq;': '\u22cd',
+ 'Backslash;': '\u2216',
+ 'Barv;': '\u2ae7',
+ 'barvee;': '\u22bd',
+ 'Barwed;': '\u2306',
+ 'barwed;': '\u2305',
+ 'barwedge;': '\u2305',
+ 'bbrk;': '\u23b5',
+ 'bbrktbrk;': '\u23b6',
+ 'bcong;': '\u224c',
+ 'Bcy;': '\u0411',
+ 'bcy;': '\u0431',
+ 'bdquo;': '\u201e',
+ 'becaus;': '\u2235',
+ 'Because;': '\u2235',
+ 'because;': '\u2235',
+ 'bemptyv;': '\u29b0',
+ 'bepsi;': '\u03f6',
+ 'bernou;': '\u212c',
+ 'Bernoullis;': '\u212c',
+ 'Beta;': '\u0392',
+ 'beta;': '\u03b2',
+ 'beth;': '\u2136',
+ 'between;': '\u226c',
+ 'Bfr;': '\U0001d505',
+ 'bfr;': '\U0001d51f',
+ 'bigcap;': '\u22c2',
+ 'bigcirc;': '\u25ef',
+ 'bigcup;': '\u22c3',
+ 'bigodot;': '\u2a00',
+ 'bigoplus;': '\u2a01',
+ 'bigotimes;': '\u2a02',
+ 'bigsqcup;': '\u2a06',
+ 'bigstar;': '\u2605',
+ 'bigtriangledown;': '\u25bd',
+ 'bigtriangleup;': '\u25b3',
+ 'biguplus;': '\u2a04',
+ 'bigvee;': '\u22c1',
+ 'bigwedge;': '\u22c0',
+ 'bkarow;': '\u290d',
+ 'blacklozenge;': '\u29eb',
+ 'blacksquare;': '\u25aa',
+ 'blacktriangle;': '\u25b4',
+ 'blacktriangledown;': '\u25be',
+ 'blacktriangleleft;': '\u25c2',
+ 'blacktriangleright;': '\u25b8',
+ 'blank;': '\u2423',
+ 'blk12;': '\u2592',
+ 'blk14;': '\u2591',
+ 'blk34;': '\u2593',
+ 'block;': '\u2588',
+ 'bne;': '=\u20e5',
+ 'bnequiv;': '\u2261\u20e5',
+ 'bNot;': '\u2aed',
+ 'bnot;': '\u2310',
+ 'Bopf;': '\U0001d539',
+ 'bopf;': '\U0001d553',
+ 'bot;': '\u22a5',
+ 'bottom;': '\u22a5',
+ 'bowtie;': '\u22c8',
+ 'boxbox;': '\u29c9',
+ 'boxDL;': '\u2557',
+ 'boxDl;': '\u2556',
+ 'boxdL;': '\u2555',
+ 'boxdl;': '\u2510',
+ 'boxDR;': '\u2554',
+ 'boxDr;': '\u2553',
+ 'boxdR;': '\u2552',
+ 'boxdr;': '\u250c',
+ 'boxH;': '\u2550',
+ 'boxh;': '\u2500',
+ 'boxHD;': '\u2566',
+ 'boxHd;': '\u2564',
+ 'boxhD;': '\u2565',
+ 'boxhd;': '\u252c',
+ 'boxHU;': '\u2569',
+ 'boxHu;': '\u2567',
+ 'boxhU;': '\u2568',
+ 'boxhu;': '\u2534',
+ 'boxminus;': '\u229f',
+ 'boxplus;': '\u229e',
+ 'boxtimes;': '\u22a0',
+ 'boxUL;': '\u255d',
+ 'boxUl;': '\u255c',
+ 'boxuL;': '\u255b',
+ 'boxul;': '\u2518',
+ 'boxUR;': '\u255a',
+ 'boxUr;': '\u2559',
+ 'boxuR;': '\u2558',
+ 'boxur;': '\u2514',
+ 'boxV;': '\u2551',
+ 'boxv;': '\u2502',
+ 'boxVH;': '\u256c',
+ 'boxVh;': '\u256b',
+ 'boxvH;': '\u256a',
+ 'boxvh;': '\u253c',
+ 'boxVL;': '\u2563',
+ 'boxVl;': '\u2562',
+ 'boxvL;': '\u2561',
+ 'boxvl;': '\u2524',
+ 'boxVR;': '\u2560',
+ 'boxVr;': '\u255f',
+ 'boxvR;': '\u255e',
+ 'boxvr;': '\u251c',
+ 'bprime;': '\u2035',
+ 'Breve;': '\u02d8',
+ 'breve;': '\u02d8',
+ 'brvbar': '\xa6',
+ 'brvbar;': '\xa6',
+ 'Bscr;': '\u212c',
+ 'bscr;': '\U0001d4b7',
+ 'bsemi;': '\u204f',
+ 'bsim;': '\u223d',
+ 'bsime;': '\u22cd',
+ 'bsol;': '\\',
+ 'bsolb;': '\u29c5',
+ 'bsolhsub;': '\u27c8',
+ 'bull;': '\u2022',
+ 'bullet;': '\u2022',
+ 'bump;': '\u224e',
+ 'bumpE;': '\u2aae',
+ 'bumpe;': '\u224f',
+ 'Bumpeq;': '\u224e',
+ 'bumpeq;': '\u224f',
+ 'Cacute;': '\u0106',
+ 'cacute;': '\u0107',
+ 'Cap;': '\u22d2',
+ 'cap;': '\u2229',
+ 'capand;': '\u2a44',
+ 'capbrcup;': '\u2a49',
+ 'capcap;': '\u2a4b',
+ 'capcup;': '\u2a47',
+ 'capdot;': '\u2a40',
+ 'CapitalDifferentialD;': '\u2145',
+ 'caps;': '\u2229\ufe00',
+ 'caret;': '\u2041',
+ 'caron;': '\u02c7',
+ 'Cayleys;': '\u212d',
+ 'ccaps;': '\u2a4d',
+ 'Ccaron;': '\u010c',
+ 'ccaron;': '\u010d',
+ 'Ccedil': '\xc7',
+ 'ccedil': '\xe7',
+ 'Ccedil;': '\xc7',
+ 'ccedil;': '\xe7',
+ 'Ccirc;': '\u0108',
+ 'ccirc;': '\u0109',
+ 'Cconint;': '\u2230',
+ 'ccups;': '\u2a4c',
+ 'ccupssm;': '\u2a50',
+ 'Cdot;': '\u010a',
+ 'cdot;': '\u010b',
+ 'cedil': '\xb8',
+ 'cedil;': '\xb8',
+ 'Cedilla;': '\xb8',
+ 'cemptyv;': '\u29b2',
+ 'cent': '\xa2',
+ 'cent;': '\xa2',
+ 'CenterDot;': '\xb7',
+ 'centerdot;': '\xb7',
+ 'Cfr;': '\u212d',
+ 'cfr;': '\U0001d520',
+ 'CHcy;': '\u0427',
+ 'chcy;': '\u0447',
+ 'check;': '\u2713',
+ 'checkmark;': '\u2713',
+ 'Chi;': '\u03a7',
+ 'chi;': '\u03c7',
+ 'cir;': '\u25cb',
+ 'circ;': '\u02c6',
+ 'circeq;': '\u2257',
+ 'circlearrowleft;': '\u21ba',
+ 'circlearrowright;': '\u21bb',
+ 'circledast;': '\u229b',
+ 'circledcirc;': '\u229a',
+ 'circleddash;': '\u229d',
+ 'CircleDot;': '\u2299',
+ 'circledR;': '\xae',
+ 'circledS;': '\u24c8',
+ 'CircleMinus;': '\u2296',
+ 'CirclePlus;': '\u2295',
+ 'CircleTimes;': '\u2297',
+ 'cirE;': '\u29c3',
+ 'cire;': '\u2257',
+ 'cirfnint;': '\u2a10',
+ 'cirmid;': '\u2aef',
+ 'cirscir;': '\u29c2',
+ 'ClockwiseContourIntegral;': '\u2232',
+ 'CloseCurlyDoubleQuote;': '\u201d',
+ 'CloseCurlyQuote;': '\u2019',
+ 'clubs;': '\u2663',
+ 'clubsuit;': '\u2663',
+ 'Colon;': '\u2237',
+ 'colon;': ':',
+ 'Colone;': '\u2a74',
+ 'colone;': '\u2254',
+ 'coloneq;': '\u2254',
+ 'comma;': ',',
+ 'commat;': '@',
+ 'comp;': '\u2201',
+ 'compfn;': '\u2218',
+ 'complement;': '\u2201',
+ 'complexes;': '\u2102',
+ 'cong;': '\u2245',
+ 'congdot;': '\u2a6d',
+ 'Congruent;': '\u2261',
+ 'Conint;': '\u222f',
+ 'conint;': '\u222e',
+ 'ContourIntegral;': '\u222e',
+ 'Copf;': '\u2102',
+ 'copf;': '\U0001d554',
+ 'coprod;': '\u2210',
+ 'Coproduct;': '\u2210',
+ 'COPY': '\xa9',
+ 'copy': '\xa9',
+ 'COPY;': '\xa9',
+ 'copy;': '\xa9',
+ 'copysr;': '\u2117',
+ 'CounterClockwiseContourIntegral;': '\u2233',
+ 'crarr;': '\u21b5',
+ 'Cross;': '\u2a2f',
+ 'cross;': '\u2717',
+ 'Cscr;': '\U0001d49e',
+ 'cscr;': '\U0001d4b8',
+ 'csub;': '\u2acf',
+ 'csube;': '\u2ad1',
+ 'csup;': '\u2ad0',
+ 'csupe;': '\u2ad2',
+ 'ctdot;': '\u22ef',
+ 'cudarrl;': '\u2938',
+ 'cudarrr;': '\u2935',
+ 'cuepr;': '\u22de',
+ 'cuesc;': '\u22df',
+ 'cularr;': '\u21b6',
+ 'cularrp;': '\u293d',
+ 'Cup;': '\u22d3',
+ 'cup;': '\u222a',
+ 'cupbrcap;': '\u2a48',
+ 'CupCap;': '\u224d',
+ 'cupcap;': '\u2a46',
+ 'cupcup;': '\u2a4a',
+ 'cupdot;': '\u228d',
+ 'cupor;': '\u2a45',
+ 'cups;': '\u222a\ufe00',
+ 'curarr;': '\u21b7',
+ 'curarrm;': '\u293c',
+ 'curlyeqprec;': '\u22de',
+ 'curlyeqsucc;': '\u22df',
+ 'curlyvee;': '\u22ce',
+ 'curlywedge;': '\u22cf',
+ 'curren': '\xa4',
+ 'curren;': '\xa4',
+ 'curvearrowleft;': '\u21b6',
+ 'curvearrowright;': '\u21b7',
+ 'cuvee;': '\u22ce',
+ 'cuwed;': '\u22cf',
+ 'cwconint;': '\u2232',
+ 'cwint;': '\u2231',
+ 'cylcty;': '\u232d',
+ 'Dagger;': '\u2021',
+ 'dagger;': '\u2020',
+ 'daleth;': '\u2138',
+ 'Darr;': '\u21a1',
+ 'dArr;': '\u21d3',
+ 'darr;': '\u2193',
+ 'dash;': '\u2010',
+ 'Dashv;': '\u2ae4',
+ 'dashv;': '\u22a3',
+ 'dbkarow;': '\u290f',
+ 'dblac;': '\u02dd',
+ 'Dcaron;': '\u010e',
+ 'dcaron;': '\u010f',
+ 'Dcy;': '\u0414',
+ 'dcy;': '\u0434',
+ 'DD;': '\u2145',
+ 'dd;': '\u2146',
+ 'ddagger;': '\u2021',
+ 'ddarr;': '\u21ca',
+ 'DDotrahd;': '\u2911',
+ 'ddotseq;': '\u2a77',
+ 'deg': '\xb0',
+ 'deg;': '\xb0',
+ 'Del;': '\u2207',
+ 'Delta;': '\u0394',
+ 'delta;': '\u03b4',
+ 'demptyv;': '\u29b1',
+ 'dfisht;': '\u297f',
+ 'Dfr;': '\U0001d507',
+ 'dfr;': '\U0001d521',
+ 'dHar;': '\u2965',
+ 'dharl;': '\u21c3',
+ 'dharr;': '\u21c2',
+ 'DiacriticalAcute;': '\xb4',
+ 'DiacriticalDot;': '\u02d9',
+ 'DiacriticalDoubleAcute;': '\u02dd',
+ 'DiacriticalGrave;': '`',
+ 'DiacriticalTilde;': '\u02dc',
+ 'diam;': '\u22c4',
+ 'Diamond;': '\u22c4',
+ 'diamond;': '\u22c4',
+ 'diamondsuit;': '\u2666',
+ 'diams;': '\u2666',
+ 'die;': '\xa8',
+ 'DifferentialD;': '\u2146',
+ 'digamma;': '\u03dd',
+ 'disin;': '\u22f2',
+ 'div;': '\xf7',
+ 'divide': '\xf7',
+ 'divide;': '\xf7',
+ 'divideontimes;': '\u22c7',
+ 'divonx;': '\u22c7',
+ 'DJcy;': '\u0402',
+ 'djcy;': '\u0452',
+ 'dlcorn;': '\u231e',
+ 'dlcrop;': '\u230d',
+ 'dollar;': '$',
+ 'Dopf;': '\U0001d53b',
+ 'dopf;': '\U0001d555',
+ 'Dot;': '\xa8',
+ 'dot;': '\u02d9',
+ 'DotDot;': '\u20dc',
+ 'doteq;': '\u2250',
+ 'doteqdot;': '\u2251',
+ 'DotEqual;': '\u2250',
+ 'dotminus;': '\u2238',
+ 'dotplus;': '\u2214',
+ 'dotsquare;': '\u22a1',
+ 'doublebarwedge;': '\u2306',
+ 'DoubleContourIntegral;': '\u222f',
+ 'DoubleDot;': '\xa8',
+ 'DoubleDownArrow;': '\u21d3',
+ 'DoubleLeftArrow;': '\u21d0',
+ 'DoubleLeftRightArrow;': '\u21d4',
+ 'DoubleLeftTee;': '\u2ae4',
+ 'DoubleLongLeftArrow;': '\u27f8',
+ 'DoubleLongLeftRightArrow;': '\u27fa',
+ 'DoubleLongRightArrow;': '\u27f9',
+ 'DoubleRightArrow;': '\u21d2',
+ 'DoubleRightTee;': '\u22a8',
+ 'DoubleUpArrow;': '\u21d1',
+ 'DoubleUpDownArrow;': '\u21d5',
+ 'DoubleVerticalBar;': '\u2225',
+ 'DownArrow;': '\u2193',
+ 'Downarrow;': '\u21d3',
+ 'downarrow;': '\u2193',
+ 'DownArrowBar;': '\u2913',
+ 'DownArrowUpArrow;': '\u21f5',
+ 'DownBreve;': '\u0311',
+ 'downdownarrows;': '\u21ca',
+ 'downharpoonleft;': '\u21c3',
+ 'downharpoonright;': '\u21c2',
+ 'DownLeftRightVector;': '\u2950',
+ 'DownLeftTeeVector;': '\u295e',
+ 'DownLeftVector;': '\u21bd',
+ 'DownLeftVectorBar;': '\u2956',
+ 'DownRightTeeVector;': '\u295f',
+ 'DownRightVector;': '\u21c1',
+ 'DownRightVectorBar;': '\u2957',
+ 'DownTee;': '\u22a4',
+ 'DownTeeArrow;': '\u21a7',
+ 'drbkarow;': '\u2910',
+ 'drcorn;': '\u231f',
+ 'drcrop;': '\u230c',
+ 'Dscr;': '\U0001d49f',
+ 'dscr;': '\U0001d4b9',
+ 'DScy;': '\u0405',
+ 'dscy;': '\u0455',
+ 'dsol;': '\u29f6',
+ 'Dstrok;': '\u0110',
+ 'dstrok;': '\u0111',
+ 'dtdot;': '\u22f1',
+ 'dtri;': '\u25bf',
+ 'dtrif;': '\u25be',
+ 'duarr;': '\u21f5',
+ 'duhar;': '\u296f',
+ 'dwangle;': '\u29a6',
+ 'DZcy;': '\u040f',
+ 'dzcy;': '\u045f',
+ 'dzigrarr;': '\u27ff',
+ 'Eacute': '\xc9',
+ 'eacute': '\xe9',
+ 'Eacute;': '\xc9',
+ 'eacute;': '\xe9',
+ 'easter;': '\u2a6e',
+ 'Ecaron;': '\u011a',
+ 'ecaron;': '\u011b',
+ 'ecir;': '\u2256',
+ 'Ecirc': '\xca',
+ 'ecirc': '\xea',
+ 'Ecirc;': '\xca',
+ 'ecirc;': '\xea',
+ 'ecolon;': '\u2255',
+ 'Ecy;': '\u042d',
+ 'ecy;': '\u044d',
+ 'eDDot;': '\u2a77',
+ 'Edot;': '\u0116',
+ 'eDot;': '\u2251',
+ 'edot;': '\u0117',
+ 'ee;': '\u2147',
+ 'efDot;': '\u2252',
+ 'Efr;': '\U0001d508',
+ 'efr;': '\U0001d522',
+ 'eg;': '\u2a9a',
+ 'Egrave': '\xc8',
+ 'egrave': '\xe8',
+ 'Egrave;': '\xc8',
+ 'egrave;': '\xe8',
+ 'egs;': '\u2a96',
+ 'egsdot;': '\u2a98',
+ 'el;': '\u2a99',
+ 'Element;': '\u2208',
+ 'elinters;': '\u23e7',
+ 'ell;': '\u2113',
+ 'els;': '\u2a95',
+ 'elsdot;': '\u2a97',
+ 'Emacr;': '\u0112',
+ 'emacr;': '\u0113',
+ 'empty;': '\u2205',
+ 'emptyset;': '\u2205',
+ 'EmptySmallSquare;': '\u25fb',
+ 'emptyv;': '\u2205',
+ 'EmptyVerySmallSquare;': '\u25ab',
+ 'emsp13;': '\u2004',
+ 'emsp14;': '\u2005',
+ 'emsp;': '\u2003',
+ 'ENG;': '\u014a',
+ 'eng;': '\u014b',
+ 'ensp;': '\u2002',
+ 'Eogon;': '\u0118',
+ 'eogon;': '\u0119',
+ 'Eopf;': '\U0001d53c',
+ 'eopf;': '\U0001d556',
+ 'epar;': '\u22d5',
+ 'eparsl;': '\u29e3',
+ 'eplus;': '\u2a71',
+ 'epsi;': '\u03b5',
+ 'Epsilon;': '\u0395',
+ 'epsilon;': '\u03b5',
+ 'epsiv;': '\u03f5',
+ 'eqcirc;': '\u2256',
+ 'eqcolon;': '\u2255',
+ 'eqsim;': '\u2242',
+ 'eqslantgtr;': '\u2a96',
+ 'eqslantless;': '\u2a95',
+ 'Equal;': '\u2a75',
+ 'equals;': '=',
+ 'EqualTilde;': '\u2242',
+ 'equest;': '\u225f',
+ 'Equilibrium;': '\u21cc',
+ 'equiv;': '\u2261',
+ 'equivDD;': '\u2a78',
+ 'eqvparsl;': '\u29e5',
+ 'erarr;': '\u2971',
+ 'erDot;': '\u2253',
+ 'Escr;': '\u2130',
+ 'escr;': '\u212f',
+ 'esdot;': '\u2250',
+ 'Esim;': '\u2a73',
+ 'esim;': '\u2242',
+ 'Eta;': '\u0397',
+ 'eta;': '\u03b7',
+ 'ETH': '\xd0',
+ 'eth': '\xf0',
+ 'ETH;': '\xd0',
+ 'eth;': '\xf0',
+ 'Euml': '\xcb',
+ 'euml': '\xeb',
+ 'Euml;': '\xcb',
+ 'euml;': '\xeb',
+ 'euro;': '\u20ac',
+ 'excl;': '!',
+ 'exist;': '\u2203',
+ 'Exists;': '\u2203',
+ 'expectation;': '\u2130',
+ 'ExponentialE;': '\u2147',
+ 'exponentiale;': '\u2147',
+ 'fallingdotseq;': '\u2252',
+ 'Fcy;': '\u0424',
+ 'fcy;': '\u0444',
+ 'female;': '\u2640',
+ 'ffilig;': '\ufb03',
+ 'fflig;': '\ufb00',
+ 'ffllig;': '\ufb04',
+ 'Ffr;': '\U0001d509',
+ 'ffr;': '\U0001d523',
+ 'filig;': '\ufb01',
+ 'FilledSmallSquare;': '\u25fc',
+ 'FilledVerySmallSquare;': '\u25aa',
+ 'fjlig;': 'fj',
+ 'flat;': '\u266d',
+ 'fllig;': '\ufb02',
+ 'fltns;': '\u25b1',
+ 'fnof;': '\u0192',
+ 'Fopf;': '\U0001d53d',
+ 'fopf;': '\U0001d557',
+ 'ForAll;': '\u2200',
+ 'forall;': '\u2200',
+ 'fork;': '\u22d4',
+ 'forkv;': '\u2ad9',
+ 'Fouriertrf;': '\u2131',
+ 'fpartint;': '\u2a0d',
+ 'frac12': '\xbd',
+ 'frac12;': '\xbd',
+ 'frac13;': '\u2153',
+ 'frac14': '\xbc',
+ 'frac14;': '\xbc',
+ 'frac15;': '\u2155',
+ 'frac16;': '\u2159',
+ 'frac18;': '\u215b',
+ 'frac23;': '\u2154',
+ 'frac25;': '\u2156',
+ 'frac34': '\xbe',
+ 'frac34;': '\xbe',
+ 'frac35;': '\u2157',
+ 'frac38;': '\u215c',
+ 'frac45;': '\u2158',
+ 'frac56;': '\u215a',
+ 'frac58;': '\u215d',
+ 'frac78;': '\u215e',
+ 'frasl;': '\u2044',
+ 'frown;': '\u2322',
+ 'Fscr;': '\u2131',
+ 'fscr;': '\U0001d4bb',
+ 'gacute;': '\u01f5',
+ 'Gamma;': '\u0393',
+ 'gamma;': '\u03b3',
+ 'Gammad;': '\u03dc',
+ 'gammad;': '\u03dd',
+ 'gap;': '\u2a86',
+ 'Gbreve;': '\u011e',
+ 'gbreve;': '\u011f',
+ 'Gcedil;': '\u0122',
+ 'Gcirc;': '\u011c',
+ 'gcirc;': '\u011d',
+ 'Gcy;': '\u0413',
+ 'gcy;': '\u0433',
+ 'Gdot;': '\u0120',
+ 'gdot;': '\u0121',
+ 'gE;': '\u2267',
+ 'ge;': '\u2265',
+ 'gEl;': '\u2a8c',
+ 'gel;': '\u22db',
+ 'geq;': '\u2265',
+ 'geqq;': '\u2267',
+ 'geqslant;': '\u2a7e',
+ 'ges;': '\u2a7e',
+ 'gescc;': '\u2aa9',
+ 'gesdot;': '\u2a80',
+ 'gesdoto;': '\u2a82',
+ 'gesdotol;': '\u2a84',
+ 'gesl;': '\u22db\ufe00',
+ 'gesles;': '\u2a94',
+ 'Gfr;': '\U0001d50a',
+ 'gfr;': '\U0001d524',
+ 'Gg;': '\u22d9',
+ 'gg;': '\u226b',
+ 'ggg;': '\u22d9',
+ 'gimel;': '\u2137',
+ 'GJcy;': '\u0403',
+ 'gjcy;': '\u0453',
+ 'gl;': '\u2277',
+ 'gla;': '\u2aa5',
+ 'glE;': '\u2a92',
+ 'glj;': '\u2aa4',
+ 'gnap;': '\u2a8a',
+ 'gnapprox;': '\u2a8a',
+ 'gnE;': '\u2269',
+ 'gne;': '\u2a88',
+ 'gneq;': '\u2a88',
+ 'gneqq;': '\u2269',
+ 'gnsim;': '\u22e7',
+ 'Gopf;': '\U0001d53e',
+ 'gopf;': '\U0001d558',
+ 'grave;': '`',
+ 'GreaterEqual;': '\u2265',
+ 'GreaterEqualLess;': '\u22db',
+ 'GreaterFullEqual;': '\u2267',
+ 'GreaterGreater;': '\u2aa2',
+ 'GreaterLess;': '\u2277',
+ 'GreaterSlantEqual;': '\u2a7e',
+ 'GreaterTilde;': '\u2273',
+ 'Gscr;': '\U0001d4a2',
+ 'gscr;': '\u210a',
+ 'gsim;': '\u2273',
+ 'gsime;': '\u2a8e',
+ 'gsiml;': '\u2a90',
+ 'GT': '>',
+ 'gt': '>',
+ 'GT;': '>',
+ 'Gt;': '\u226b',
+ 'gt;': '>',
+ 'gtcc;': '\u2aa7',
+ 'gtcir;': '\u2a7a',
+ 'gtdot;': '\u22d7',
+ 'gtlPar;': '\u2995',
+ 'gtquest;': '\u2a7c',
+ 'gtrapprox;': '\u2a86',
+ 'gtrarr;': '\u2978',
+ 'gtrdot;': '\u22d7',
+ 'gtreqless;': '\u22db',
+ 'gtreqqless;': '\u2a8c',
+ 'gtrless;': '\u2277',
+ 'gtrsim;': '\u2273',
+ 'gvertneqq;': '\u2269\ufe00',
+ 'gvnE;': '\u2269\ufe00',
+ 'Hacek;': '\u02c7',
+ 'hairsp;': '\u200a',
+ 'half;': '\xbd',
+ 'hamilt;': '\u210b',
+ 'HARDcy;': '\u042a',
+ 'hardcy;': '\u044a',
+ 'hArr;': '\u21d4',
+ 'harr;': '\u2194',
+ 'harrcir;': '\u2948',
+ 'harrw;': '\u21ad',
+ 'Hat;': '^',
+ 'hbar;': '\u210f',
+ 'Hcirc;': '\u0124',
+ 'hcirc;': '\u0125',
+ 'hearts;': '\u2665',
+ 'heartsuit;': '\u2665',
+ 'hellip;': '\u2026',
+ 'hercon;': '\u22b9',
+ 'Hfr;': '\u210c',
+ 'hfr;': '\U0001d525',
+ 'HilbertSpace;': '\u210b',
+ 'hksearow;': '\u2925',
+ 'hkswarow;': '\u2926',
+ 'hoarr;': '\u21ff',
+ 'homtht;': '\u223b',
+ 'hookleftarrow;': '\u21a9',
+ 'hookrightarrow;': '\u21aa',
+ 'Hopf;': '\u210d',
+ 'hopf;': '\U0001d559',
+ 'horbar;': '\u2015',
+ 'HorizontalLine;': '\u2500',
+ 'Hscr;': '\u210b',
+ 'hscr;': '\U0001d4bd',
+ 'hslash;': '\u210f',
+ 'Hstrok;': '\u0126',
+ 'hstrok;': '\u0127',
+ 'HumpDownHump;': '\u224e',
+ 'HumpEqual;': '\u224f',
+ 'hybull;': '\u2043',
+ 'hyphen;': '\u2010',
+ 'Iacute': '\xcd',
+ 'iacute': '\xed',
+ 'Iacute;': '\xcd',
+ 'iacute;': '\xed',
+ 'ic;': '\u2063',
+ 'Icirc': '\xce',
+ 'icirc': '\xee',
+ 'Icirc;': '\xce',
+ 'icirc;': '\xee',
+ 'Icy;': '\u0418',
+ 'icy;': '\u0438',
+ 'Idot;': '\u0130',
+ 'IEcy;': '\u0415',
+ 'iecy;': '\u0435',
+ 'iexcl': '\xa1',
+ 'iexcl;': '\xa1',
+ 'iff;': '\u21d4',
+ 'Ifr;': '\u2111',
+ 'ifr;': '\U0001d526',
+ 'Igrave': '\xcc',
+ 'igrave': '\xec',
+ 'Igrave;': '\xcc',
+ 'igrave;': '\xec',
+ 'ii;': '\u2148',
+ 'iiiint;': '\u2a0c',
+ 'iiint;': '\u222d',
+ 'iinfin;': '\u29dc',
+ 'iiota;': '\u2129',
+ 'IJlig;': '\u0132',
+ 'ijlig;': '\u0133',
+ 'Im;': '\u2111',
+ 'Imacr;': '\u012a',
+ 'imacr;': '\u012b',
+ 'image;': '\u2111',
+ 'ImaginaryI;': '\u2148',
+ 'imagline;': '\u2110',
+ 'imagpart;': '\u2111',
+ 'imath;': '\u0131',
+ 'imof;': '\u22b7',
+ 'imped;': '\u01b5',
+ 'Implies;': '\u21d2',
+ 'in;': '\u2208',
+ 'incare;': '\u2105',
+ 'infin;': '\u221e',
+ 'infintie;': '\u29dd',
+ 'inodot;': '\u0131',
+ 'Int;': '\u222c',
+ 'int;': '\u222b',
+ 'intcal;': '\u22ba',
+ 'integers;': '\u2124',
+ 'Integral;': '\u222b',
+ 'intercal;': '\u22ba',
+ 'Intersection;': '\u22c2',
+ 'intlarhk;': '\u2a17',
+ 'intprod;': '\u2a3c',
+ 'InvisibleComma;': '\u2063',
+ 'InvisibleTimes;': '\u2062',
+ 'IOcy;': '\u0401',
+ 'iocy;': '\u0451',
+ 'Iogon;': '\u012e',
+ 'iogon;': '\u012f',
+ 'Iopf;': '\U0001d540',
+ 'iopf;': '\U0001d55a',
+ 'Iota;': '\u0399',
+ 'iota;': '\u03b9',
+ 'iprod;': '\u2a3c',
+ 'iquest': '\xbf',
+ 'iquest;': '\xbf',
+ 'Iscr;': '\u2110',
+ 'iscr;': '\U0001d4be',
+ 'isin;': '\u2208',
+ 'isindot;': '\u22f5',
+ 'isinE;': '\u22f9',
+ 'isins;': '\u22f4',
+ 'isinsv;': '\u22f3',
+ 'isinv;': '\u2208',
+ 'it;': '\u2062',
+ 'Itilde;': '\u0128',
+ 'itilde;': '\u0129',
+ 'Iukcy;': '\u0406',
+ 'iukcy;': '\u0456',
+ 'Iuml': '\xcf',
+ 'iuml': '\xef',
+ 'Iuml;': '\xcf',
+ 'iuml;': '\xef',
+ 'Jcirc;': '\u0134',
+ 'jcirc;': '\u0135',
+ 'Jcy;': '\u0419',
+ 'jcy;': '\u0439',
+ 'Jfr;': '\U0001d50d',
+ 'jfr;': '\U0001d527',
+ 'jmath;': '\u0237',
+ 'Jopf;': '\U0001d541',
+ 'jopf;': '\U0001d55b',
+ 'Jscr;': '\U0001d4a5',
+ 'jscr;': '\U0001d4bf',
+ 'Jsercy;': '\u0408',
+ 'jsercy;': '\u0458',
+ 'Jukcy;': '\u0404',
+ 'jukcy;': '\u0454',
+ 'Kappa;': '\u039a',
+ 'kappa;': '\u03ba',
+ 'kappav;': '\u03f0',
+ 'Kcedil;': '\u0136',
+ 'kcedil;': '\u0137',
+ 'Kcy;': '\u041a',
+ 'kcy;': '\u043a',
+ 'Kfr;': '\U0001d50e',
+ 'kfr;': '\U0001d528',
+ 'kgreen;': '\u0138',
+ 'KHcy;': '\u0425',
+ 'khcy;': '\u0445',
+ 'KJcy;': '\u040c',
+ 'kjcy;': '\u045c',
+ 'Kopf;': '\U0001d542',
+ 'kopf;': '\U0001d55c',
+ 'Kscr;': '\U0001d4a6',
+ 'kscr;': '\U0001d4c0',
+ 'lAarr;': '\u21da',
+ 'Lacute;': '\u0139',
+ 'lacute;': '\u013a',
+ 'laemptyv;': '\u29b4',
+ 'lagran;': '\u2112',
+ 'Lambda;': '\u039b',
+ 'lambda;': '\u03bb',
+ 'Lang;': '\u27ea',
+ 'lang;': '\u27e8',
+ 'langd;': '\u2991',
+ 'langle;': '\u27e8',
+ 'lap;': '\u2a85',
+ 'Laplacetrf;': '\u2112',
+ 'laquo': '\xab',
+ 'laquo;': '\xab',
+ 'Larr;': '\u219e',
+ 'lArr;': '\u21d0',
+ 'larr;': '\u2190',
+ 'larrb;': '\u21e4',
+ 'larrbfs;': '\u291f',
+ 'larrfs;': '\u291d',
+ 'larrhk;': '\u21a9',
+ 'larrlp;': '\u21ab',
+ 'larrpl;': '\u2939',
+ 'larrsim;': '\u2973',
+ 'larrtl;': '\u21a2',
+ 'lat;': '\u2aab',
+ 'lAtail;': '\u291b',
+ 'latail;': '\u2919',
+ 'late;': '\u2aad',
+ 'lates;': '\u2aad\ufe00',
+ 'lBarr;': '\u290e',
+ 'lbarr;': '\u290c',
+ 'lbbrk;': '\u2772',
+ 'lbrace;': '{',
+ 'lbrack;': '[',
+ 'lbrke;': '\u298b',
+ 'lbrksld;': '\u298f',
+ 'lbrkslu;': '\u298d',
+ 'Lcaron;': '\u013d',
+ 'lcaron;': '\u013e',
+ 'Lcedil;': '\u013b',
+ 'lcedil;': '\u013c',
+ 'lceil;': '\u2308',
+ 'lcub;': '{',
+ 'Lcy;': '\u041b',
+ 'lcy;': '\u043b',
+ 'ldca;': '\u2936',
+ 'ldquo;': '\u201c',
+ 'ldquor;': '\u201e',
+ 'ldrdhar;': '\u2967',
+ 'ldrushar;': '\u294b',
+ 'ldsh;': '\u21b2',
+ 'lE;': '\u2266',
+ 'le;': '\u2264',
+ 'LeftAngleBracket;': '\u27e8',
+ 'LeftArrow;': '\u2190',
+ 'Leftarrow;': '\u21d0',
+ 'leftarrow;': '\u2190',
+ 'LeftArrowBar;': '\u21e4',
+ 'LeftArrowRightArrow;': '\u21c6',
+ 'leftarrowtail;': '\u21a2',
+ 'LeftCeiling;': '\u2308',
+ 'LeftDoubleBracket;': '\u27e6',
+ 'LeftDownTeeVector;': '\u2961',
+ 'LeftDownVector;': '\u21c3',
+ 'LeftDownVectorBar;': '\u2959',
+ 'LeftFloor;': '\u230a',
+ 'leftharpoondown;': '\u21bd',
+ 'leftharpoonup;': '\u21bc',
+ 'leftleftarrows;': '\u21c7',
+ 'LeftRightArrow;': '\u2194',
+ 'Leftrightarrow;': '\u21d4',
+ 'leftrightarrow;': '\u2194',
+ 'leftrightarrows;': '\u21c6',
+ 'leftrightharpoons;': '\u21cb',
+ 'leftrightsquigarrow;': '\u21ad',
+ 'LeftRightVector;': '\u294e',
+ 'LeftTee;': '\u22a3',
+ 'LeftTeeArrow;': '\u21a4',
+ 'LeftTeeVector;': '\u295a',
+ 'leftthreetimes;': '\u22cb',
+ 'LeftTriangle;': '\u22b2',
+ 'LeftTriangleBar;': '\u29cf',
+ 'LeftTriangleEqual;': '\u22b4',
+ 'LeftUpDownVector;': '\u2951',
+ 'LeftUpTeeVector;': '\u2960',
+ 'LeftUpVector;': '\u21bf',
+ 'LeftUpVectorBar;': '\u2958',
+ 'LeftVector;': '\u21bc',
+ 'LeftVectorBar;': '\u2952',
+ 'lEg;': '\u2a8b',
+ 'leg;': '\u22da',
+ 'leq;': '\u2264',
+ 'leqq;': '\u2266',
+ 'leqslant;': '\u2a7d',
+ 'les;': '\u2a7d',
+ 'lescc;': '\u2aa8',
+ 'lesdot;': '\u2a7f',
+ 'lesdoto;': '\u2a81',
+ 'lesdotor;': '\u2a83',
+ 'lesg;': '\u22da\ufe00',
+ 'lesges;': '\u2a93',
+ 'lessapprox;': '\u2a85',
+ 'lessdot;': '\u22d6',
+ 'lesseqgtr;': '\u22da',
+ 'lesseqqgtr;': '\u2a8b',
+ 'LessEqualGreater;': '\u22da',
+ 'LessFullEqual;': '\u2266',
+ 'LessGreater;': '\u2276',
+ 'lessgtr;': '\u2276',
+ 'LessLess;': '\u2aa1',
+ 'lesssim;': '\u2272',
+ 'LessSlantEqual;': '\u2a7d',
+ 'LessTilde;': '\u2272',
+ 'lfisht;': '\u297c',
+ 'lfloor;': '\u230a',
+ 'Lfr;': '\U0001d50f',
+ 'lfr;': '\U0001d529',
+ 'lg;': '\u2276',
+ 'lgE;': '\u2a91',
+ 'lHar;': '\u2962',
+ 'lhard;': '\u21bd',
+ 'lharu;': '\u21bc',
+ 'lharul;': '\u296a',
+ 'lhblk;': '\u2584',
+ 'LJcy;': '\u0409',
+ 'ljcy;': '\u0459',
+ 'Ll;': '\u22d8',
+ 'll;': '\u226a',
+ 'llarr;': '\u21c7',
+ 'llcorner;': '\u231e',
+ 'Lleftarrow;': '\u21da',
+ 'llhard;': '\u296b',
+ 'lltri;': '\u25fa',
+ 'Lmidot;': '\u013f',
+ 'lmidot;': '\u0140',
+ 'lmoust;': '\u23b0',
+ 'lmoustache;': '\u23b0',
+ 'lnap;': '\u2a89',
+ 'lnapprox;': '\u2a89',
+ 'lnE;': '\u2268',
+ 'lne;': '\u2a87',
+ 'lneq;': '\u2a87',
+ 'lneqq;': '\u2268',
+ 'lnsim;': '\u22e6',
+ 'loang;': '\u27ec',
+ 'loarr;': '\u21fd',
+ 'lobrk;': '\u27e6',
+ 'LongLeftArrow;': '\u27f5',
+ 'Longleftarrow;': '\u27f8',
+ 'longleftarrow;': '\u27f5',
+ 'LongLeftRightArrow;': '\u27f7',
+ 'Longleftrightarrow;': '\u27fa',
+ 'longleftrightarrow;': '\u27f7',
+ 'longmapsto;': '\u27fc',
+ 'LongRightArrow;': '\u27f6',
+ 'Longrightarrow;': '\u27f9',
+ 'longrightarrow;': '\u27f6',
+ 'looparrowleft;': '\u21ab',
+ 'looparrowright;': '\u21ac',
+ 'lopar;': '\u2985',
+ 'Lopf;': '\U0001d543',
+ 'lopf;': '\U0001d55d',
+ 'loplus;': '\u2a2d',
+ 'lotimes;': '\u2a34',
+ 'lowast;': '\u2217',
+ 'lowbar;': '_',
+ 'LowerLeftArrow;': '\u2199',
+ 'LowerRightArrow;': '\u2198',
+ 'loz;': '\u25ca',
+ 'lozenge;': '\u25ca',
+ 'lozf;': '\u29eb',
+ 'lpar;': '(',
+ 'lparlt;': '\u2993',
+ 'lrarr;': '\u21c6',
+ 'lrcorner;': '\u231f',
+ 'lrhar;': '\u21cb',
+ 'lrhard;': '\u296d',
+ 'lrm;': '\u200e',
+ 'lrtri;': '\u22bf',
+ 'lsaquo;': '\u2039',
+ 'Lscr;': '\u2112',
+ 'lscr;': '\U0001d4c1',
+ 'Lsh;': '\u21b0',
+ 'lsh;': '\u21b0',
+ 'lsim;': '\u2272',
+ 'lsime;': '\u2a8d',
+ 'lsimg;': '\u2a8f',
+ 'lsqb;': '[',
+ 'lsquo;': '\u2018',
+ 'lsquor;': '\u201a',
+ 'Lstrok;': '\u0141',
+ 'lstrok;': '\u0142',
+ 'LT': '<',
+ 'lt': '<',
+ 'LT;': '<',
+ 'Lt;': '\u226a',
+ 'lt;': '<',
+ 'ltcc;': '\u2aa6',
+ 'ltcir;': '\u2a79',
+ 'ltdot;': '\u22d6',
+ 'lthree;': '\u22cb',
+ 'ltimes;': '\u22c9',
+ 'ltlarr;': '\u2976',
+ 'ltquest;': '\u2a7b',
+ 'ltri;': '\u25c3',
+ 'ltrie;': '\u22b4',
+ 'ltrif;': '\u25c2',
+ 'ltrPar;': '\u2996',
+ 'lurdshar;': '\u294a',
+ 'luruhar;': '\u2966',
+ 'lvertneqq;': '\u2268\ufe00',
+ 'lvnE;': '\u2268\ufe00',
+ 'macr': '\xaf',
+ 'macr;': '\xaf',
+ 'male;': '\u2642',
+ 'malt;': '\u2720',
+ 'maltese;': '\u2720',
+ 'Map;': '\u2905',
+ 'map;': '\u21a6',
+ 'mapsto;': '\u21a6',
+ 'mapstodown;': '\u21a7',
+ 'mapstoleft;': '\u21a4',
+ 'mapstoup;': '\u21a5',
+ 'marker;': '\u25ae',
+ 'mcomma;': '\u2a29',
+ 'Mcy;': '\u041c',
+ 'mcy;': '\u043c',
+ 'mdash;': '\u2014',
+ 'mDDot;': '\u223a',
+ 'measuredangle;': '\u2221',
+ 'MediumSpace;': '\u205f',
+ 'Mellintrf;': '\u2133',
+ 'Mfr;': '\U0001d510',
+ 'mfr;': '\U0001d52a',
+ 'mho;': '\u2127',
+ 'micro': '\xb5',
+ 'micro;': '\xb5',
+ 'mid;': '\u2223',
+ 'midast;': '*',
+ 'midcir;': '\u2af0',
+ 'middot': '\xb7',
+ 'middot;': '\xb7',
+ 'minus;': '\u2212',
+ 'minusb;': '\u229f',
+ 'minusd;': '\u2238',
+ 'minusdu;': '\u2a2a',
+ 'MinusPlus;': '\u2213',
+ 'mlcp;': '\u2adb',
+ 'mldr;': '\u2026',
+ 'mnplus;': '\u2213',
+ 'models;': '\u22a7',
+ 'Mopf;': '\U0001d544',
+ 'mopf;': '\U0001d55e',
+ 'mp;': '\u2213',
+ 'Mscr;': '\u2133',
+ 'mscr;': '\U0001d4c2',
+ 'mstpos;': '\u223e',
+ 'Mu;': '\u039c',
+ 'mu;': '\u03bc',
+ 'multimap;': '\u22b8',
+ 'mumap;': '\u22b8',
+ 'nabla;': '\u2207',
+ 'Nacute;': '\u0143',
+ 'nacute;': '\u0144',
+ 'nang;': '\u2220\u20d2',
+ 'nap;': '\u2249',
+ 'napE;': '\u2a70\u0338',
+ 'napid;': '\u224b\u0338',
+ 'napos;': '\u0149',
+ 'napprox;': '\u2249',
+ 'natur;': '\u266e',
+ 'natural;': '\u266e',
+ 'naturals;': '\u2115',
+ 'nbsp': '\xa0',
+ 'nbsp;': '\xa0',
+ 'nbump;': '\u224e\u0338',
+ 'nbumpe;': '\u224f\u0338',
+ 'ncap;': '\u2a43',
+ 'Ncaron;': '\u0147',
+ 'ncaron;': '\u0148',
+ 'Ncedil;': '\u0145',
+ 'ncedil;': '\u0146',
+ 'ncong;': '\u2247',
+ 'ncongdot;': '\u2a6d\u0338',
+ 'ncup;': '\u2a42',
+ 'Ncy;': '\u041d',
+ 'ncy;': '\u043d',
+ 'ndash;': '\u2013',
+ 'ne;': '\u2260',
+ 'nearhk;': '\u2924',
+ 'neArr;': '\u21d7',
+ 'nearr;': '\u2197',
+ 'nearrow;': '\u2197',
+ 'nedot;': '\u2250\u0338',
+ 'NegativeMediumSpace;': '\u200b',
+ 'NegativeThickSpace;': '\u200b',
+ 'NegativeThinSpace;': '\u200b',
+ 'NegativeVeryThinSpace;': '\u200b',
+ 'nequiv;': '\u2262',
+ 'nesear;': '\u2928',
+ 'nesim;': '\u2242\u0338',
+ 'NestedGreaterGreater;': '\u226b',
+ 'NestedLessLess;': '\u226a',
+ 'NewLine;': '\n',
+ 'nexist;': '\u2204',
+ 'nexists;': '\u2204',
+ 'Nfr;': '\U0001d511',
+ 'nfr;': '\U0001d52b',
+ 'ngE;': '\u2267\u0338',
+ 'nge;': '\u2271',
+ 'ngeq;': '\u2271',
+ 'ngeqq;': '\u2267\u0338',
+ 'ngeqslant;': '\u2a7e\u0338',
+ 'nges;': '\u2a7e\u0338',
+ 'nGg;': '\u22d9\u0338',
+ 'ngsim;': '\u2275',
+ 'nGt;': '\u226b\u20d2',
+ 'ngt;': '\u226f',
+ 'ngtr;': '\u226f',
+ 'nGtv;': '\u226b\u0338',
+ 'nhArr;': '\u21ce',
+ 'nharr;': '\u21ae',
+ 'nhpar;': '\u2af2',
+ 'ni;': '\u220b',
+ 'nis;': '\u22fc',
+ 'nisd;': '\u22fa',
+ 'niv;': '\u220b',
+ 'NJcy;': '\u040a',
+ 'njcy;': '\u045a',
+ 'nlArr;': '\u21cd',
+ 'nlarr;': '\u219a',
+ 'nldr;': '\u2025',
+ 'nlE;': '\u2266\u0338',
+ 'nle;': '\u2270',
+ 'nLeftarrow;': '\u21cd',
+ 'nleftarrow;': '\u219a',
+ 'nLeftrightarrow;': '\u21ce',
+ 'nleftrightarrow;': '\u21ae',
+ 'nleq;': '\u2270',
+ 'nleqq;': '\u2266\u0338',
+ 'nleqslant;': '\u2a7d\u0338',
+ 'nles;': '\u2a7d\u0338',
+ 'nless;': '\u226e',
+ 'nLl;': '\u22d8\u0338',
+ 'nlsim;': '\u2274',
+ 'nLt;': '\u226a\u20d2',
+ 'nlt;': '\u226e',
+ 'nltri;': '\u22ea',
+ 'nltrie;': '\u22ec',
+ 'nLtv;': '\u226a\u0338',
+ 'nmid;': '\u2224',
+ 'NoBreak;': '\u2060',
+ 'NonBreakingSpace;': '\xa0',
+ 'Nopf;': '\u2115',
+ 'nopf;': '\U0001d55f',
+ 'not': '\xac',
+ 'Not;': '\u2aec',
+ 'not;': '\xac',
+ 'NotCongruent;': '\u2262',
+ 'NotCupCap;': '\u226d',
+ 'NotDoubleVerticalBar;': '\u2226',
+ 'NotElement;': '\u2209',
+ 'NotEqual;': '\u2260',
+ 'NotEqualTilde;': '\u2242\u0338',
+ 'NotExists;': '\u2204',
+ 'NotGreater;': '\u226f',
+ 'NotGreaterEqual;': '\u2271',
+ 'NotGreaterFullEqual;': '\u2267\u0338',
+ 'NotGreaterGreater;': '\u226b\u0338',
+ 'NotGreaterLess;': '\u2279',
+ 'NotGreaterSlantEqual;': '\u2a7e\u0338',
+ 'NotGreaterTilde;': '\u2275',
+ 'NotHumpDownHump;': '\u224e\u0338',
+ 'NotHumpEqual;': '\u224f\u0338',
+ 'notin;': '\u2209',
+ 'notindot;': '\u22f5\u0338',
+ 'notinE;': '\u22f9\u0338',
+ 'notinva;': '\u2209',
+ 'notinvb;': '\u22f7',
+ 'notinvc;': '\u22f6',
+ 'NotLeftTriangle;': '\u22ea',
+ 'NotLeftTriangleBar;': '\u29cf\u0338',
+ 'NotLeftTriangleEqual;': '\u22ec',
+ 'NotLess;': '\u226e',
+ 'NotLessEqual;': '\u2270',
+ 'NotLessGreater;': '\u2278',
+ 'NotLessLess;': '\u226a\u0338',
+ 'NotLessSlantEqual;': '\u2a7d\u0338',
+ 'NotLessTilde;': '\u2274',
+ 'NotNestedGreaterGreater;': '\u2aa2\u0338',
+ 'NotNestedLessLess;': '\u2aa1\u0338',
+ 'notni;': '\u220c',
+ 'notniva;': '\u220c',
+ 'notnivb;': '\u22fe',
+ 'notnivc;': '\u22fd',
+ 'NotPrecedes;': '\u2280',
+ 'NotPrecedesEqual;': '\u2aaf\u0338',
+ 'NotPrecedesSlantEqual;': '\u22e0',
+ 'NotReverseElement;': '\u220c',
+ 'NotRightTriangle;': '\u22eb',
+ 'NotRightTriangleBar;': '\u29d0\u0338',
+ 'NotRightTriangleEqual;': '\u22ed',
+ 'NotSquareSubset;': '\u228f\u0338',
+ 'NotSquareSubsetEqual;': '\u22e2',
+ 'NotSquareSuperset;': '\u2290\u0338',
+ 'NotSquareSupersetEqual;': '\u22e3',
+ 'NotSubset;': '\u2282\u20d2',
+ 'NotSubsetEqual;': '\u2288',
+ 'NotSucceeds;': '\u2281',
+ 'NotSucceedsEqual;': '\u2ab0\u0338',
+ 'NotSucceedsSlantEqual;': '\u22e1',
+ 'NotSucceedsTilde;': '\u227f\u0338',
+ 'NotSuperset;': '\u2283\u20d2',
+ 'NotSupersetEqual;': '\u2289',
+ 'NotTilde;': '\u2241',
+ 'NotTildeEqual;': '\u2244',
+ 'NotTildeFullEqual;': '\u2247',
+ 'NotTildeTilde;': '\u2249',
+ 'NotVerticalBar;': '\u2224',
+ 'npar;': '\u2226',
+ 'nparallel;': '\u2226',
+ 'nparsl;': '\u2afd\u20e5',
+ 'npart;': '\u2202\u0338',
+ 'npolint;': '\u2a14',
+ 'npr;': '\u2280',
+ 'nprcue;': '\u22e0',
+ 'npre;': '\u2aaf\u0338',
+ 'nprec;': '\u2280',
+ 'npreceq;': '\u2aaf\u0338',
+ 'nrArr;': '\u21cf',
+ 'nrarr;': '\u219b',
+ 'nrarrc;': '\u2933\u0338',
+ 'nrarrw;': '\u219d\u0338',
+ 'nRightarrow;': '\u21cf',
+ 'nrightarrow;': '\u219b',
+ 'nrtri;': '\u22eb',
+ 'nrtrie;': '\u22ed',
+ 'nsc;': '\u2281',
+ 'nsccue;': '\u22e1',
+ 'nsce;': '\u2ab0\u0338',
+ 'Nscr;': '\U0001d4a9',
+ 'nscr;': '\U0001d4c3',
+ 'nshortmid;': '\u2224',
+ 'nshortparallel;': '\u2226',
+ 'nsim;': '\u2241',
+ 'nsime;': '\u2244',
+ 'nsimeq;': '\u2244',
+ 'nsmid;': '\u2224',
+ 'nspar;': '\u2226',
+ 'nsqsube;': '\u22e2',
+ 'nsqsupe;': '\u22e3',
+ 'nsub;': '\u2284',
+ 'nsubE;': '\u2ac5\u0338',
+ 'nsube;': '\u2288',
+ 'nsubset;': '\u2282\u20d2',
+ 'nsubseteq;': '\u2288',
+ 'nsubseteqq;': '\u2ac5\u0338',
+ 'nsucc;': '\u2281',
+ 'nsucceq;': '\u2ab0\u0338',
+ 'nsup;': '\u2285',
+ 'nsupE;': '\u2ac6\u0338',
+ 'nsupe;': '\u2289',
+ 'nsupset;': '\u2283\u20d2',
+ 'nsupseteq;': '\u2289',
+ 'nsupseteqq;': '\u2ac6\u0338',
+ 'ntgl;': '\u2279',
+ 'Ntilde': '\xd1',
+ 'ntilde': '\xf1',
+ 'Ntilde;': '\xd1',
+ 'ntilde;': '\xf1',
+ 'ntlg;': '\u2278',
+ 'ntriangleleft;': '\u22ea',
+ 'ntrianglelefteq;': '\u22ec',
+ 'ntriangleright;': '\u22eb',
+ 'ntrianglerighteq;': '\u22ed',
+ 'Nu;': '\u039d',
+ 'nu;': '\u03bd',
+ 'num;': '#',
+ 'numero;': '\u2116',
+ 'numsp;': '\u2007',
+ 'nvap;': '\u224d\u20d2',
+ 'nVDash;': '\u22af',
+ 'nVdash;': '\u22ae',
+ 'nvDash;': '\u22ad',
+ 'nvdash;': '\u22ac',
+ 'nvge;': '\u2265\u20d2',
+ 'nvgt;': '>\u20d2',
+ 'nvHarr;': '\u2904',
+ 'nvinfin;': '\u29de',
+ 'nvlArr;': '\u2902',
+ 'nvle;': '\u2264\u20d2',
+ 'nvlt;': '<\u20d2',
+ 'nvltrie;': '\u22b4\u20d2',
+ 'nvrArr;': '\u2903',
+ 'nvrtrie;': '\u22b5\u20d2',
+ 'nvsim;': '\u223c\u20d2',
+ 'nwarhk;': '\u2923',
+ 'nwArr;': '\u21d6',
+ 'nwarr;': '\u2196',
+ 'nwarrow;': '\u2196',
+ 'nwnear;': '\u2927',
+ 'Oacute': '\xd3',
+ 'oacute': '\xf3',
+ 'Oacute;': '\xd3',
+ 'oacute;': '\xf3',
+ 'oast;': '\u229b',
+ 'ocir;': '\u229a',
+ 'Ocirc': '\xd4',
+ 'ocirc': '\xf4',
+ 'Ocirc;': '\xd4',
+ 'ocirc;': '\xf4',
+ 'Ocy;': '\u041e',
+ 'ocy;': '\u043e',
+ 'odash;': '\u229d',
+ 'Odblac;': '\u0150',
+ 'odblac;': '\u0151',
+ 'odiv;': '\u2a38',
+ 'odot;': '\u2299',
+ 'odsold;': '\u29bc',
+ 'OElig;': '\u0152',
+ 'oelig;': '\u0153',
+ 'ofcir;': '\u29bf',
+ 'Ofr;': '\U0001d512',
+ 'ofr;': '\U0001d52c',
+ 'ogon;': '\u02db',
+ 'Ograve': '\xd2',
+ 'ograve': '\xf2',
+ 'Ograve;': '\xd2',
+ 'ograve;': '\xf2',
+ 'ogt;': '\u29c1',
+ 'ohbar;': '\u29b5',
+ 'ohm;': '\u03a9',
+ 'oint;': '\u222e',
+ 'olarr;': '\u21ba',
+ 'olcir;': '\u29be',
+ 'olcross;': '\u29bb',
+ 'oline;': '\u203e',
+ 'olt;': '\u29c0',
+ 'Omacr;': '\u014c',
+ 'omacr;': '\u014d',
+ 'Omega;': '\u03a9',
+ 'omega;': '\u03c9',
+ 'Omicron;': '\u039f',
+ 'omicron;': '\u03bf',
+ 'omid;': '\u29b6',
+ 'ominus;': '\u2296',
+ 'Oopf;': '\U0001d546',
+ 'oopf;': '\U0001d560',
+ 'opar;': '\u29b7',
+ 'OpenCurlyDoubleQuote;': '\u201c',
+ 'OpenCurlyQuote;': '\u2018',
+ 'operp;': '\u29b9',
+ 'oplus;': '\u2295',
+ 'Or;': '\u2a54',
+ 'or;': '\u2228',
+ 'orarr;': '\u21bb',
+ 'ord;': '\u2a5d',
+ 'order;': '\u2134',
+ 'orderof;': '\u2134',
+ 'ordf': '\xaa',
+ 'ordf;': '\xaa',
+ 'ordm': '\xba',
+ 'ordm;': '\xba',
+ 'origof;': '\u22b6',
+ 'oror;': '\u2a56',
+ 'orslope;': '\u2a57',
+ 'orv;': '\u2a5b',
+ 'oS;': '\u24c8',
+ 'Oscr;': '\U0001d4aa',
+ 'oscr;': '\u2134',
+ 'Oslash': '\xd8',
+ 'oslash': '\xf8',
+ 'Oslash;': '\xd8',
+ 'oslash;': '\xf8',
+ 'osol;': '\u2298',
+ 'Otilde': '\xd5',
+ 'otilde': '\xf5',
+ 'Otilde;': '\xd5',
+ 'otilde;': '\xf5',
+ 'Otimes;': '\u2a37',
+ 'otimes;': '\u2297',
+ 'otimesas;': '\u2a36',
+ 'Ouml': '\xd6',
+ 'ouml': '\xf6',
+ 'Ouml;': '\xd6',
+ 'ouml;': '\xf6',
+ 'ovbar;': '\u233d',
+ 'OverBar;': '\u203e',
+ 'OverBrace;': '\u23de',
+ 'OverBracket;': '\u23b4',
+ 'OverParenthesis;': '\u23dc',
+ 'par;': '\u2225',
+ 'para': '\xb6',
+ 'para;': '\xb6',
+ 'parallel;': '\u2225',
+ 'parsim;': '\u2af3',
+ 'parsl;': '\u2afd',
+ 'part;': '\u2202',
+ 'PartialD;': '\u2202',
+ 'Pcy;': '\u041f',
+ 'pcy;': '\u043f',
+ 'percnt;': '%',
+ 'period;': '.',
+ 'permil;': '\u2030',
+ 'perp;': '\u22a5',
+ 'pertenk;': '\u2031',
+ 'Pfr;': '\U0001d513',
+ 'pfr;': '\U0001d52d',
+ 'Phi;': '\u03a6',
+ 'phi;': '\u03c6',
+ 'phiv;': '\u03d5',
+ 'phmmat;': '\u2133',
+ 'phone;': '\u260e',
+ 'Pi;': '\u03a0',
+ 'pi;': '\u03c0',
+ 'pitchfork;': '\u22d4',
+ 'piv;': '\u03d6',
+ 'planck;': '\u210f',
+ 'planckh;': '\u210e',
+ 'plankv;': '\u210f',
+ 'plus;': '+',
+ 'plusacir;': '\u2a23',
+ 'plusb;': '\u229e',
+ 'pluscir;': '\u2a22',
+ 'plusdo;': '\u2214',
+ 'plusdu;': '\u2a25',
+ 'pluse;': '\u2a72',
+ 'PlusMinus;': '\xb1',
+ 'plusmn': '\xb1',
+ 'plusmn;': '\xb1',
+ 'plussim;': '\u2a26',
+ 'plustwo;': '\u2a27',
+ 'pm;': '\xb1',
+ 'Poincareplane;': '\u210c',
+ 'pointint;': '\u2a15',
+ 'Popf;': '\u2119',
+ 'popf;': '\U0001d561',
+ 'pound': '\xa3',
+ 'pound;': '\xa3',
+ 'Pr;': '\u2abb',
+ 'pr;': '\u227a',
+ 'prap;': '\u2ab7',
+ 'prcue;': '\u227c',
+ 'prE;': '\u2ab3',
+ 'pre;': '\u2aaf',
+ 'prec;': '\u227a',
+ 'precapprox;': '\u2ab7',
+ 'preccurlyeq;': '\u227c',
+ 'Precedes;': '\u227a',
+ 'PrecedesEqual;': '\u2aaf',
+ 'PrecedesSlantEqual;': '\u227c',
+ 'PrecedesTilde;': '\u227e',
+ 'preceq;': '\u2aaf',
+ 'precnapprox;': '\u2ab9',
+ 'precneqq;': '\u2ab5',
+ 'precnsim;': '\u22e8',
+ 'precsim;': '\u227e',
+ 'Prime;': '\u2033',
+ 'prime;': '\u2032',
+ 'primes;': '\u2119',
+ 'prnap;': '\u2ab9',
+ 'prnE;': '\u2ab5',
+ 'prnsim;': '\u22e8',
+ 'prod;': '\u220f',
+ 'Product;': '\u220f',
+ 'profalar;': '\u232e',
+ 'profline;': '\u2312',
+ 'profsurf;': '\u2313',
+ 'prop;': '\u221d',
+ 'Proportion;': '\u2237',
+ 'Proportional;': '\u221d',
+ 'propto;': '\u221d',
+ 'prsim;': '\u227e',
+ 'prurel;': '\u22b0',
+ 'Pscr;': '\U0001d4ab',
+ 'pscr;': '\U0001d4c5',
+ 'Psi;': '\u03a8',
+ 'psi;': '\u03c8',
+ 'puncsp;': '\u2008',
+ 'Qfr;': '\U0001d514',
+ 'qfr;': '\U0001d52e',
+ 'qint;': '\u2a0c',
+ 'Qopf;': '\u211a',
+ 'qopf;': '\U0001d562',
+ 'qprime;': '\u2057',
+ 'Qscr;': '\U0001d4ac',
+ 'qscr;': '\U0001d4c6',
+ 'quaternions;': '\u210d',
+ 'quatint;': '\u2a16',
+ 'quest;': '?',
+ 'questeq;': '\u225f',
+ 'QUOT': '"',
+ 'quot': '"',
+ 'QUOT;': '"',
+ 'quot;': '"',
+ 'rAarr;': '\u21db',
+ 'race;': '\u223d\u0331',
+ 'Racute;': '\u0154',
+ 'racute;': '\u0155',
+ 'radic;': '\u221a',
+ 'raemptyv;': '\u29b3',
+ 'Rang;': '\u27eb',
+ 'rang;': '\u27e9',
+ 'rangd;': '\u2992',
+ 'range;': '\u29a5',
+ 'rangle;': '\u27e9',
+ 'raquo': '\xbb',
+ 'raquo;': '\xbb',
+ 'Rarr;': '\u21a0',
+ 'rArr;': '\u21d2',
+ 'rarr;': '\u2192',
+ 'rarrap;': '\u2975',
+ 'rarrb;': '\u21e5',
+ 'rarrbfs;': '\u2920',
+ 'rarrc;': '\u2933',
+ 'rarrfs;': '\u291e',
+ 'rarrhk;': '\u21aa',
+ 'rarrlp;': '\u21ac',
+ 'rarrpl;': '\u2945',
+ 'rarrsim;': '\u2974',
+ 'Rarrtl;': '\u2916',
+ 'rarrtl;': '\u21a3',
+ 'rarrw;': '\u219d',
+ 'rAtail;': '\u291c',
+ 'ratail;': '\u291a',
+ 'ratio;': '\u2236',
+ 'rationals;': '\u211a',
+ 'RBarr;': '\u2910',
+ 'rBarr;': '\u290f',
+ 'rbarr;': '\u290d',
+ 'rbbrk;': '\u2773',
+ 'rbrace;': '}',
+ 'rbrack;': ']',
+ 'rbrke;': '\u298c',
+ 'rbrksld;': '\u298e',
+ 'rbrkslu;': '\u2990',
+ 'Rcaron;': '\u0158',
+ 'rcaron;': '\u0159',
+ 'Rcedil;': '\u0156',
+ 'rcedil;': '\u0157',
+ 'rceil;': '\u2309',
+ 'rcub;': '}',
+ 'Rcy;': '\u0420',
+ 'rcy;': '\u0440',
+ 'rdca;': '\u2937',
+ 'rdldhar;': '\u2969',
+ 'rdquo;': '\u201d',
+ 'rdquor;': '\u201d',
+ 'rdsh;': '\u21b3',
+ 'Re;': '\u211c',
+ 'real;': '\u211c',
+ 'realine;': '\u211b',
+ 'realpart;': '\u211c',
+ 'reals;': '\u211d',
+ 'rect;': '\u25ad',
+ 'REG': '\xae',
+ 'reg': '\xae',
+ 'REG;': '\xae',
+ 'reg;': '\xae',
+ 'ReverseElement;': '\u220b',
+ 'ReverseEquilibrium;': '\u21cb',
+ 'ReverseUpEquilibrium;': '\u296f',
+ 'rfisht;': '\u297d',
+ 'rfloor;': '\u230b',
+ 'Rfr;': '\u211c',
+ 'rfr;': '\U0001d52f',
+ 'rHar;': '\u2964',
+ 'rhard;': '\u21c1',
+ 'rharu;': '\u21c0',
+ 'rharul;': '\u296c',
+ 'Rho;': '\u03a1',
+ 'rho;': '\u03c1',
+ 'rhov;': '\u03f1',
+ 'RightAngleBracket;': '\u27e9',
+ 'RightArrow;': '\u2192',
+ 'Rightarrow;': '\u21d2',
+ 'rightarrow;': '\u2192',
+ 'RightArrowBar;': '\u21e5',
+ 'RightArrowLeftArrow;': '\u21c4',
+ 'rightarrowtail;': '\u21a3',
+ 'RightCeiling;': '\u2309',
+ 'RightDoubleBracket;': '\u27e7',
+ 'RightDownTeeVector;': '\u295d',
+ 'RightDownVector;': '\u21c2',
+ 'RightDownVectorBar;': '\u2955',
+ 'RightFloor;': '\u230b',
+ 'rightharpoondown;': '\u21c1',
+ 'rightharpoonup;': '\u21c0',
+ 'rightleftarrows;': '\u21c4',
+ 'rightleftharpoons;': '\u21cc',
+ 'rightrightarrows;': '\u21c9',
+ 'rightsquigarrow;': '\u219d',
+ 'RightTee;': '\u22a2',
+ 'RightTeeArrow;': '\u21a6',
+ 'RightTeeVector;': '\u295b',
+ 'rightthreetimes;': '\u22cc',
+ 'RightTriangle;': '\u22b3',
+ 'RightTriangleBar;': '\u29d0',
+ 'RightTriangleEqual;': '\u22b5',
+ 'RightUpDownVector;': '\u294f',
+ 'RightUpTeeVector;': '\u295c',
+ 'RightUpVector;': '\u21be',
+ 'RightUpVectorBar;': '\u2954',
+ 'RightVector;': '\u21c0',
+ 'RightVectorBar;': '\u2953',
+ 'ring;': '\u02da',
+ 'risingdotseq;': '\u2253',
+ 'rlarr;': '\u21c4',
+ 'rlhar;': '\u21cc',
+ 'rlm;': '\u200f',
+ 'rmoust;': '\u23b1',
+ 'rmoustache;': '\u23b1',
+ 'rnmid;': '\u2aee',
+ 'roang;': '\u27ed',
+ 'roarr;': '\u21fe',
+ 'robrk;': '\u27e7',
+ 'ropar;': '\u2986',
+ 'Ropf;': '\u211d',
+ 'ropf;': '\U0001d563',
+ 'roplus;': '\u2a2e',
+ 'rotimes;': '\u2a35',
+ 'RoundImplies;': '\u2970',
+ 'rpar;': ')',
+ 'rpargt;': '\u2994',
+ 'rppolint;': '\u2a12',
+ 'rrarr;': '\u21c9',
+ 'Rrightarrow;': '\u21db',
+ 'rsaquo;': '\u203a',
+ 'Rscr;': '\u211b',
+ 'rscr;': '\U0001d4c7',
+ 'Rsh;': '\u21b1',
+ 'rsh;': '\u21b1',
+ 'rsqb;': ']',
+ 'rsquo;': '\u2019',
+ 'rsquor;': '\u2019',
+ 'rthree;': '\u22cc',
+ 'rtimes;': '\u22ca',
+ 'rtri;': '\u25b9',
+ 'rtrie;': '\u22b5',
+ 'rtrif;': '\u25b8',
+ 'rtriltri;': '\u29ce',
+ 'RuleDelayed;': '\u29f4',
+ 'ruluhar;': '\u2968',
+ 'rx;': '\u211e',
+ 'Sacute;': '\u015a',
+ 'sacute;': '\u015b',
+ 'sbquo;': '\u201a',
+ 'Sc;': '\u2abc',
+ 'sc;': '\u227b',
+ 'scap;': '\u2ab8',
+ 'Scaron;': '\u0160',
+ 'scaron;': '\u0161',
+ 'sccue;': '\u227d',
+ 'scE;': '\u2ab4',
+ 'sce;': '\u2ab0',
+ 'Scedil;': '\u015e',
+ 'scedil;': '\u015f',
+ 'Scirc;': '\u015c',
+ 'scirc;': '\u015d',
+ 'scnap;': '\u2aba',
+ 'scnE;': '\u2ab6',
+ 'scnsim;': '\u22e9',
+ 'scpolint;': '\u2a13',
+ 'scsim;': '\u227f',
+ 'Scy;': '\u0421',
+ 'scy;': '\u0441',
+ 'sdot;': '\u22c5',
+ 'sdotb;': '\u22a1',
+ 'sdote;': '\u2a66',
+ 'searhk;': '\u2925',
+ 'seArr;': '\u21d8',
+ 'searr;': '\u2198',
+ 'searrow;': '\u2198',
+ 'sect': '\xa7',
+ 'sect;': '\xa7',
+ 'semi;': ';',
+ 'seswar;': '\u2929',
+ 'setminus;': '\u2216',
+ 'setmn;': '\u2216',
+ 'sext;': '\u2736',
+ 'Sfr;': '\U0001d516',
+ 'sfr;': '\U0001d530',
+ 'sfrown;': '\u2322',
+ 'sharp;': '\u266f',
+ 'SHCHcy;': '\u0429',
+ 'shchcy;': '\u0449',
+ 'SHcy;': '\u0428',
+ 'shcy;': '\u0448',
+ 'ShortDownArrow;': '\u2193',
+ 'ShortLeftArrow;': '\u2190',
+ 'shortmid;': '\u2223',
+ 'shortparallel;': '\u2225',
+ 'ShortRightArrow;': '\u2192',
+ 'ShortUpArrow;': '\u2191',
+ 'shy': '\xad',
+ 'shy;': '\xad',
+ 'Sigma;': '\u03a3',
+ 'sigma;': '\u03c3',
+ 'sigmaf;': '\u03c2',
+ 'sigmav;': '\u03c2',
+ 'sim;': '\u223c',
+ 'simdot;': '\u2a6a',
+ 'sime;': '\u2243',
+ 'simeq;': '\u2243',
+ 'simg;': '\u2a9e',
+ 'simgE;': '\u2aa0',
+ 'siml;': '\u2a9d',
+ 'simlE;': '\u2a9f',
+ 'simne;': '\u2246',
+ 'simplus;': '\u2a24',
+ 'simrarr;': '\u2972',
+ 'slarr;': '\u2190',
+ 'SmallCircle;': '\u2218',
+ 'smallsetminus;': '\u2216',
+ 'smashp;': '\u2a33',
+ 'smeparsl;': '\u29e4',
+ 'smid;': '\u2223',
+ 'smile;': '\u2323',
+ 'smt;': '\u2aaa',
+ 'smte;': '\u2aac',
+ 'smtes;': '\u2aac\ufe00',
+ 'SOFTcy;': '\u042c',
+ 'softcy;': '\u044c',
+ 'sol;': '/',
+ 'solb;': '\u29c4',
+ 'solbar;': '\u233f',
+ 'Sopf;': '\U0001d54a',
+ 'sopf;': '\U0001d564',
+ 'spades;': '\u2660',
+ 'spadesuit;': '\u2660',
+ 'spar;': '\u2225',
+ 'sqcap;': '\u2293',
+ 'sqcaps;': '\u2293\ufe00',
+ 'sqcup;': '\u2294',
+ 'sqcups;': '\u2294\ufe00',
+ 'Sqrt;': '\u221a',
+ 'sqsub;': '\u228f',
+ 'sqsube;': '\u2291',
+ 'sqsubset;': '\u228f',
+ 'sqsubseteq;': '\u2291',
+ 'sqsup;': '\u2290',
+ 'sqsupe;': '\u2292',
+ 'sqsupset;': '\u2290',
+ 'sqsupseteq;': '\u2292',
+ 'squ;': '\u25a1',
+ 'Square;': '\u25a1',
+ 'square;': '\u25a1',
+ 'SquareIntersection;': '\u2293',
+ 'SquareSubset;': '\u228f',
+ 'SquareSubsetEqual;': '\u2291',
+ 'SquareSuperset;': '\u2290',
+ 'SquareSupersetEqual;': '\u2292',
+ 'SquareUnion;': '\u2294',
+ 'squarf;': '\u25aa',
+ 'squf;': '\u25aa',
+ 'srarr;': '\u2192',
+ 'Sscr;': '\U0001d4ae',
+ 'sscr;': '\U0001d4c8',
+ 'ssetmn;': '\u2216',
+ 'ssmile;': '\u2323',
+ 'sstarf;': '\u22c6',
+ 'Star;': '\u22c6',
+ 'star;': '\u2606',
+ 'starf;': '\u2605',
+ 'straightepsilon;': '\u03f5',
+ 'straightphi;': '\u03d5',
+ 'strns;': '\xaf',
+ 'Sub;': '\u22d0',
+ 'sub;': '\u2282',
+ 'subdot;': '\u2abd',
+ 'subE;': '\u2ac5',
+ 'sube;': '\u2286',
+ 'subedot;': '\u2ac3',
+ 'submult;': '\u2ac1',
+ 'subnE;': '\u2acb',
+ 'subne;': '\u228a',
+ 'subplus;': '\u2abf',
+ 'subrarr;': '\u2979',
+ 'Subset;': '\u22d0',
+ 'subset;': '\u2282',
+ 'subseteq;': '\u2286',
+ 'subseteqq;': '\u2ac5',
+ 'SubsetEqual;': '\u2286',
+ 'subsetneq;': '\u228a',
+ 'subsetneqq;': '\u2acb',
+ 'subsim;': '\u2ac7',
+ 'subsub;': '\u2ad5',
+ 'subsup;': '\u2ad3',
+ 'succ;': '\u227b',
+ 'succapprox;': '\u2ab8',
+ 'succcurlyeq;': '\u227d',
+ 'Succeeds;': '\u227b',
+ 'SucceedsEqual;': '\u2ab0',
+ 'SucceedsSlantEqual;': '\u227d',
+ 'SucceedsTilde;': '\u227f',
+ 'succeq;': '\u2ab0',
+ 'succnapprox;': '\u2aba',
+ 'succneqq;': '\u2ab6',
+ 'succnsim;': '\u22e9',
+ 'succsim;': '\u227f',
+ 'SuchThat;': '\u220b',
+ 'Sum;': '\u2211',
+ 'sum;': '\u2211',
+ 'sung;': '\u266a',
+ 'sup1': '\xb9',
+ 'sup1;': '\xb9',
+ 'sup2': '\xb2',
+ 'sup2;': '\xb2',
+ 'sup3': '\xb3',
+ 'sup3;': '\xb3',
+ 'Sup;': '\u22d1',
+ 'sup;': '\u2283',
+ 'supdot;': '\u2abe',
+ 'supdsub;': '\u2ad8',
+ 'supE;': '\u2ac6',
+ 'supe;': '\u2287',
+ 'supedot;': '\u2ac4',
+ 'Superset;': '\u2283',
+ 'SupersetEqual;': '\u2287',
+ 'suphsol;': '\u27c9',
+ 'suphsub;': '\u2ad7',
+ 'suplarr;': '\u297b',
+ 'supmult;': '\u2ac2',
+ 'supnE;': '\u2acc',
+ 'supne;': '\u228b',
+ 'supplus;': '\u2ac0',
+ 'Supset;': '\u22d1',
+ 'supset;': '\u2283',
+ 'supseteq;': '\u2287',
+ 'supseteqq;': '\u2ac6',
+ 'supsetneq;': '\u228b',
+ 'supsetneqq;': '\u2acc',
+ 'supsim;': '\u2ac8',
+ 'supsub;': '\u2ad4',
+ 'supsup;': '\u2ad6',
+ 'swarhk;': '\u2926',
+ 'swArr;': '\u21d9',
+ 'swarr;': '\u2199',
+ 'swarrow;': '\u2199',
+ 'swnwar;': '\u292a',
+ 'szlig': '\xdf',
+ 'szlig;': '\xdf',
+ 'Tab;': '\t',
+ 'target;': '\u2316',
+ 'Tau;': '\u03a4',
+ 'tau;': '\u03c4',
+ 'tbrk;': '\u23b4',
+ 'Tcaron;': '\u0164',
+ 'tcaron;': '\u0165',
+ 'Tcedil;': '\u0162',
+ 'tcedil;': '\u0163',
+ 'Tcy;': '\u0422',
+ 'tcy;': '\u0442',
+ 'tdot;': '\u20db',
+ 'telrec;': '\u2315',
+ 'Tfr;': '\U0001d517',
+ 'tfr;': '\U0001d531',
+ 'there4;': '\u2234',
+ 'Therefore;': '\u2234',
+ 'therefore;': '\u2234',
+ 'Theta;': '\u0398',
+ 'theta;': '\u03b8',
+ 'thetasym;': '\u03d1',
+ 'thetav;': '\u03d1',
+ 'thickapprox;': '\u2248',
+ 'thicksim;': '\u223c',
+ 'ThickSpace;': '\u205f\u200a',
+ 'thinsp;': '\u2009',
+ 'ThinSpace;': '\u2009',
+ 'thkap;': '\u2248',
+ 'thksim;': '\u223c',
+ 'THORN': '\xde',
+ 'thorn': '\xfe',
+ 'THORN;': '\xde',
+ 'thorn;': '\xfe',
+ 'Tilde;': '\u223c',
+ 'tilde;': '\u02dc',
+ 'TildeEqual;': '\u2243',
+ 'TildeFullEqual;': '\u2245',
+ 'TildeTilde;': '\u2248',
+ 'times': '\xd7',
+ 'times;': '\xd7',
+ 'timesb;': '\u22a0',
+ 'timesbar;': '\u2a31',
+ 'timesd;': '\u2a30',
+ 'tint;': '\u222d',
+ 'toea;': '\u2928',
+ 'top;': '\u22a4',
+ 'topbot;': '\u2336',
+ 'topcir;': '\u2af1',
+ 'Topf;': '\U0001d54b',
+ 'topf;': '\U0001d565',
+ 'topfork;': '\u2ada',
+ 'tosa;': '\u2929',
+ 'tprime;': '\u2034',
+ 'TRADE;': '\u2122',
+ 'trade;': '\u2122',
+ 'triangle;': '\u25b5',
+ 'triangledown;': '\u25bf',
+ 'triangleleft;': '\u25c3',
+ 'trianglelefteq;': '\u22b4',
+ 'triangleq;': '\u225c',
+ 'triangleright;': '\u25b9',
+ 'trianglerighteq;': '\u22b5',
+ 'tridot;': '\u25ec',
+ 'trie;': '\u225c',
+ 'triminus;': '\u2a3a',
+ 'TripleDot;': '\u20db',
+ 'triplus;': '\u2a39',
+ 'trisb;': '\u29cd',
+ 'tritime;': '\u2a3b',
+ 'trpezium;': '\u23e2',
+ 'Tscr;': '\U0001d4af',
+ 'tscr;': '\U0001d4c9',
+ 'TScy;': '\u0426',
+ 'tscy;': '\u0446',
+ 'TSHcy;': '\u040b',
+ 'tshcy;': '\u045b',
+ 'Tstrok;': '\u0166',
+ 'tstrok;': '\u0167',
+ 'twixt;': '\u226c',
+ 'twoheadleftarrow;': '\u219e',
+ 'twoheadrightarrow;': '\u21a0',
+ 'Uacute': '\xda',
+ 'uacute': '\xfa',
+ 'Uacute;': '\xda',
+ 'uacute;': '\xfa',
+ 'Uarr;': '\u219f',
+ 'uArr;': '\u21d1',
+ 'uarr;': '\u2191',
+ 'Uarrocir;': '\u2949',
+ 'Ubrcy;': '\u040e',
+ 'ubrcy;': '\u045e',
+ 'Ubreve;': '\u016c',
+ 'ubreve;': '\u016d',
+ 'Ucirc': '\xdb',
+ 'ucirc': '\xfb',
+ 'Ucirc;': '\xdb',
+ 'ucirc;': '\xfb',
+ 'Ucy;': '\u0423',
+ 'ucy;': '\u0443',
+ 'udarr;': '\u21c5',
+ 'Udblac;': '\u0170',
+ 'udblac;': '\u0171',
+ 'udhar;': '\u296e',
+ 'ufisht;': '\u297e',
+ 'Ufr;': '\U0001d518',
+ 'ufr;': '\U0001d532',
+ 'Ugrave': '\xd9',
+ 'ugrave': '\xf9',
+ 'Ugrave;': '\xd9',
+ 'ugrave;': '\xf9',
+ 'uHar;': '\u2963',
+ 'uharl;': '\u21bf',
+ 'uharr;': '\u21be',
+ 'uhblk;': '\u2580',
+ 'ulcorn;': '\u231c',
+ 'ulcorner;': '\u231c',
+ 'ulcrop;': '\u230f',
+ 'ultri;': '\u25f8',
+ 'Umacr;': '\u016a',
+ 'umacr;': '\u016b',
+ 'uml': '\xa8',
+ 'uml;': '\xa8',
+ 'UnderBar;': '_',
+ 'UnderBrace;': '\u23df',
+ 'UnderBracket;': '\u23b5',
+ 'UnderParenthesis;': '\u23dd',
+ 'Union;': '\u22c3',
+ 'UnionPlus;': '\u228e',
+ 'Uogon;': '\u0172',
+ 'uogon;': '\u0173',
+ 'Uopf;': '\U0001d54c',
+ 'uopf;': '\U0001d566',
+ 'UpArrow;': '\u2191',
+ 'Uparrow;': '\u21d1',
+ 'uparrow;': '\u2191',
+ 'UpArrowBar;': '\u2912',
+ 'UpArrowDownArrow;': '\u21c5',
+ 'UpDownArrow;': '\u2195',
+ 'Updownarrow;': '\u21d5',
+ 'updownarrow;': '\u2195',
+ 'UpEquilibrium;': '\u296e',
+ 'upharpoonleft;': '\u21bf',
+ 'upharpoonright;': '\u21be',
+ 'uplus;': '\u228e',
+ 'UpperLeftArrow;': '\u2196',
+ 'UpperRightArrow;': '\u2197',
+ 'Upsi;': '\u03d2',
+ 'upsi;': '\u03c5',
+ 'upsih;': '\u03d2',
+ 'Upsilon;': '\u03a5',
+ 'upsilon;': '\u03c5',
+ 'UpTee;': '\u22a5',
+ 'UpTeeArrow;': '\u21a5',
+ 'upuparrows;': '\u21c8',
+ 'urcorn;': '\u231d',
+ 'urcorner;': '\u231d',
+ 'urcrop;': '\u230e',
+ 'Uring;': '\u016e',
+ 'uring;': '\u016f',
+ 'urtri;': '\u25f9',
+ 'Uscr;': '\U0001d4b0',
+ 'uscr;': '\U0001d4ca',
+ 'utdot;': '\u22f0',
+ 'Utilde;': '\u0168',
+ 'utilde;': '\u0169',
+ 'utri;': '\u25b5',
+ 'utrif;': '\u25b4',
+ 'uuarr;': '\u21c8',
+ 'Uuml': '\xdc',
+ 'uuml': '\xfc',
+ 'Uuml;': '\xdc',
+ 'uuml;': '\xfc',
+ 'uwangle;': '\u29a7',
+ 'vangrt;': '\u299c',
+ 'varepsilon;': '\u03f5',
+ 'varkappa;': '\u03f0',
+ 'varnothing;': '\u2205',
+ 'varphi;': '\u03d5',
+ 'varpi;': '\u03d6',
+ 'varpropto;': '\u221d',
+ 'vArr;': '\u21d5',
+ 'varr;': '\u2195',
+ 'varrho;': '\u03f1',
+ 'varsigma;': '\u03c2',
+ 'varsubsetneq;': '\u228a\ufe00',
+ 'varsubsetneqq;': '\u2acb\ufe00',
+ 'varsupsetneq;': '\u228b\ufe00',
+ 'varsupsetneqq;': '\u2acc\ufe00',
+ 'vartheta;': '\u03d1',
+ 'vartriangleleft;': '\u22b2',
+ 'vartriangleright;': '\u22b3',
+ 'Vbar;': '\u2aeb',
+ 'vBar;': '\u2ae8',
+ 'vBarv;': '\u2ae9',
+ 'Vcy;': '\u0412',
+ 'vcy;': '\u0432',
+ 'VDash;': '\u22ab',
+ 'Vdash;': '\u22a9',
+ 'vDash;': '\u22a8',
+ 'vdash;': '\u22a2',
+ 'Vdashl;': '\u2ae6',
+ 'Vee;': '\u22c1',
+ 'vee;': '\u2228',
+ 'veebar;': '\u22bb',
+ 'veeeq;': '\u225a',
+ 'vellip;': '\u22ee',
+ 'Verbar;': '\u2016',
+ 'verbar;': '|',
+ 'Vert;': '\u2016',
+ 'vert;': '|',
+ 'VerticalBar;': '\u2223',
+ 'VerticalLine;': '|',
+ 'VerticalSeparator;': '\u2758',
+ 'VerticalTilde;': '\u2240',
+ 'VeryThinSpace;': '\u200a',
+ 'Vfr;': '\U0001d519',
+ 'vfr;': '\U0001d533',
+ 'vltri;': '\u22b2',
+ 'vnsub;': '\u2282\u20d2',
+ 'vnsup;': '\u2283\u20d2',
+ 'Vopf;': '\U0001d54d',
+ 'vopf;': '\U0001d567',
+ 'vprop;': '\u221d',
+ 'vrtri;': '\u22b3',
+ 'Vscr;': '\U0001d4b1',
+ 'vscr;': '\U0001d4cb',
+ 'vsubnE;': '\u2acb\ufe00',
+ 'vsubne;': '\u228a\ufe00',
+ 'vsupnE;': '\u2acc\ufe00',
+ 'vsupne;': '\u228b\ufe00',
+ 'Vvdash;': '\u22aa',
+ 'vzigzag;': '\u299a',
+ 'Wcirc;': '\u0174',
+ 'wcirc;': '\u0175',
+ 'wedbar;': '\u2a5f',
+ 'Wedge;': '\u22c0',
+ 'wedge;': '\u2227',
+ 'wedgeq;': '\u2259',
+ 'weierp;': '\u2118',
+ 'Wfr;': '\U0001d51a',
+ 'wfr;': '\U0001d534',
+ 'Wopf;': '\U0001d54e',
+ 'wopf;': '\U0001d568',
+ 'wp;': '\u2118',
+ 'wr;': '\u2240',
+ 'wreath;': '\u2240',
+ 'Wscr;': '\U0001d4b2',
+ 'wscr;': '\U0001d4cc',
+ 'xcap;': '\u22c2',
+ 'xcirc;': '\u25ef',
+ 'xcup;': '\u22c3',
+ 'xdtri;': '\u25bd',
+ 'Xfr;': '\U0001d51b',
+ 'xfr;': '\U0001d535',
+ 'xhArr;': '\u27fa',
+ 'xharr;': '\u27f7',
+ 'Xi;': '\u039e',
+ 'xi;': '\u03be',
+ 'xlArr;': '\u27f8',
+ 'xlarr;': '\u27f5',
+ 'xmap;': '\u27fc',
+ 'xnis;': '\u22fb',
+ 'xodot;': '\u2a00',
+ 'Xopf;': '\U0001d54f',
+ 'xopf;': '\U0001d569',
+ 'xoplus;': '\u2a01',
+ 'xotime;': '\u2a02',
+ 'xrArr;': '\u27f9',
+ 'xrarr;': '\u27f6',
+ 'Xscr;': '\U0001d4b3',
+ 'xscr;': '\U0001d4cd',
+ 'xsqcup;': '\u2a06',
+ 'xuplus;': '\u2a04',
+ 'xutri;': '\u25b3',
+ 'xvee;': '\u22c1',
+ 'xwedge;': '\u22c0',
+ 'Yacute': '\xdd',
+ 'yacute': '\xfd',
+ 'Yacute;': '\xdd',
+ 'yacute;': '\xfd',
+ 'YAcy;': '\u042f',
+ 'yacy;': '\u044f',
+ 'Ycirc;': '\u0176',
+ 'ycirc;': '\u0177',
+ 'Ycy;': '\u042b',
+ 'ycy;': '\u044b',
+ 'yen': '\xa5',
+ 'yen;': '\xa5',
+ 'Yfr;': '\U0001d51c',
+ 'yfr;': '\U0001d536',
+ 'YIcy;': '\u0407',
+ 'yicy;': '\u0457',
+ 'Yopf;': '\U0001d550',
+ 'yopf;': '\U0001d56a',
+ 'Yscr;': '\U0001d4b4',
+ 'yscr;': '\U0001d4ce',
+ 'YUcy;': '\u042e',
+ 'yucy;': '\u044e',
+ 'yuml': '\xff',
+ 'Yuml;': '\u0178',
+ 'yuml;': '\xff',
+ 'Zacute;': '\u0179',
+ 'zacute;': '\u017a',
+ 'Zcaron;': '\u017d',
+ 'zcaron;': '\u017e',
+ 'Zcy;': '\u0417',
+ 'zcy;': '\u0437',
+ 'Zdot;': '\u017b',
+ 'zdot;': '\u017c',
+ 'zeetrf;': '\u2128',
+ 'ZeroWidthSpace;': '\u200b',
+ 'Zeta;': '\u0396',
+ 'zeta;': '\u03b6',
+ 'Zfr;': '\u2128',
+ 'zfr;': '\U0001d537',
+ 'ZHcy;': '\u0416',
+ 'zhcy;': '\u0436',
+ 'zigrarr;': '\u21dd',
+ 'Zopf;': '\u2124',
+ 'zopf;': '\U0001d56b',
+ 'Zscr;': '\U0001d4b5',
+ 'zscr;': '\U0001d4cf',
+ 'zwj;': '\u200d',
+ 'zwnj;': '\u200c',
+ }
+
+try:
+ import http.client as compat_http_client
+except ImportError: # Python 2
+ import httplib as compat_http_client
+
+try:
+ from urllib.error import HTTPError as compat_HTTPError
+except ImportError: # Python 2
+ from urllib2 import HTTPError as compat_HTTPError
+
+try:
+ from urllib.request import urlretrieve as compat_urlretrieve
+except ImportError: # Python 2
+ from urllib import urlretrieve as compat_urlretrieve
+
+try:
+ from html.parser import HTMLParser as compat_HTMLParser
+except ImportError: # Python 2
+ from HTMLParser import HTMLParser as compat_HTMLParser
+
+try: # Python 2
+ from HTMLParser import HTMLParseError as compat_HTMLParseError
+except ImportError: # Python <3.4
+ try:
+ from html.parser import HTMLParseError as compat_HTMLParseError
+ except ImportError: # Python >3.4
+
+ # HTMLParseError has been deprecated in Python 3.3 and removed in
+ # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
+ # and uniform cross-version exceptiong handling
+ class compat_HTMLParseError(Exception):
+ pass
+
+try:
+ from subprocess import DEVNULL
+ compat_subprocess_get_DEVNULL = lambda: DEVNULL
+except ImportError:
+ compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
+
+try:
+ import http.server as compat_http_server
+except ImportError:
+ import BaseHTTPServer as compat_http_server
+
+try:
+ compat_str = unicode # Python 2
+except NameError:
+ compat_str = str
+
+try:
+ from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
+ from urllib.parse import unquote as compat_urllib_parse_unquote
+ from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
+except ImportError: # Python 2
+ _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
+ else re.compile(r'([\x00-\x7f]+)'))
+
+ # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
+ # implementations from cpython 3.4.3's stdlib. Python 2's version
+ # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
+
+ def compat_urllib_parse_unquote_to_bytes(string):
+ """unquote_to_bytes('abc%20def') -> b'abc def'."""
+ # Note: strings are encoded as UTF-8. This is only an issue if it contains
+ # unescaped non-ASCII characters, which URIs should not.
+ if not string:
+ # Is it a string-like object?
+ string.split
+ return b''
+ if isinstance(string, compat_str):
+ string = string.encode('utf-8')
+ bits = string.split(b'%')
+ if len(bits) == 1:
+ return string
+ res = [bits[0]]
+ append = res.append
+ for item in bits[1:]:
+ try:
+ append(compat_urllib_parse._hextochr[item[:2]])
+ append(item[2:])
+ except KeyError:
+ append(b'%')
+ append(item)
+ return b''.join(res)
+
+ def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
+ """Replace %xx escapes by their single-character equivalent. The optional
+ encoding and errors parameters specify how to decode percent-encoded
+ sequences into Unicode characters, as accepted by the bytes.decode()
+ method.
+ By default, percent-encoded sequences are decoded with UTF-8, and invalid
+ sequences are replaced by a placeholder character.
+
+ unquote('abc%20def') -> 'abc def'.
+ """
+ if '%' not in string:
+ string.split
+ return string
+ if encoding is None:
+ encoding = 'utf-8'
+ if errors is None:
+ errors = 'replace'
+ bits = _asciire.split(string)
+ res = [bits[0]]
+ append = res.append
+ for i in range(1, len(bits), 2):
+ append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
+ append(bits[i + 1])
+ return ''.join(res)
+
+ def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
+ """Like unquote(), but also replace plus signs by spaces, as required for
+ unquoting HTML form values.
+
+ unquote_plus('%7e/abc+def') -> '~/abc def'
+ """
+ string = string.replace('+', ' ')
+ return compat_urllib_parse_unquote(string, encoding, errors)
+
+try:
+ from urllib.parse import urlencode as compat_urllib_parse_urlencode
+except ImportError: # Python 2
+ # Python 2 will choke in urlencode on mixture of byte and unicode strings.
+ # Possible solutions are to either port it from python 3 with all
+ # the friends or manually ensure input query contains only byte strings.
+ # We will stick with latter thus recursively encoding the whole query.
+ def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
+ def encode_elem(e):
+ if isinstance(e, dict):
+ e = encode_dict(e)
+ elif isinstance(e, (list, tuple,)):
+ list_e = encode_list(e)
+ e = tuple(list_e) if isinstance(e, tuple) else list_e
+ elif isinstance(e, compat_str):
+ e = e.encode(encoding)
+ return e
+
+ def encode_dict(d):
+ return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
+
+ def encode_list(l):
+ return [encode_elem(e) for e in l]
+
+ return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
+
+try:
+ from urllib.request import DataHandler as compat_urllib_request_DataHandler
+except ImportError: # Python < 3.4
+ # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
+ class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
+ def data_open(self, req):
+ # data URLs as specified in RFC 2397.
+ #
+ # ignores POSTed data
+ #
+ # syntax:
+ # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
+ # mediatype := [ type "/" subtype ] *( ";" parameter )
+ # data := *urlchar
+ # parameter := attribute "=" value
+ url = req.get_full_url()
+
+ scheme, data = url.split(':', 1)
+ mediatype, data = data.split(',', 1)
+
+ # even base64 encoded data URLs might be quoted so unquote in any case:
+ data = compat_urllib_parse_unquote_to_bytes(data)
+ if mediatype.endswith(';base64'):
+ data = binascii.a2b_base64(data)
+ mediatype = mediatype[:-7]
+
+ if not mediatype:
+ mediatype = 'text/plain;charset=US-ASCII'
+
+ headers = email.message_from_string(
+ 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
+
+ return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
+
+try:
+ compat_basestring = basestring # Python 2
+except NameError:
+ compat_basestring = str
+
+try:
+ compat_chr = unichr # Python 2
+except NameError:
+ compat_chr = chr
+
+try:
+ from xml.etree.ElementTree import ParseError as compat_xml_parse_error
+except ImportError: # Python 2.6
+ from xml.parsers.expat import ExpatError as compat_xml_parse_error
+
+
+etree = xml.etree.ElementTree
+
+
+class _TreeBuilder(etree.TreeBuilder):
+ def doctype(self, name, pubid, system):
+ pass
+
+
+if sys.version_info[0] >= 3:
+ def compat_etree_fromstring(text):
+ return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
+else:
+ # python 2.x tries to encode unicode strings with ascii (see the
+ # XMLParser._fixtext method)
+ try:
+ _etree_iter = etree.Element.iter
+ except AttributeError: # Python <=2.6
+ def _etree_iter(root):
+ for el in root.findall('*'):
+ yield el
+ for sub in _etree_iter(el):
+ yield sub
+
+ # on 2.6 XML doesn't have a parser argument, function copied from CPython
+ # 2.7 source
+ def _XML(text, parser=None):
+ if not parser:
+ parser = etree.XMLParser(target=_TreeBuilder())
+ parser.feed(text)
+ return parser.close()
+
+ def _element_factory(*args, **kwargs):
+ el = etree.Element(*args, **kwargs)
+ for k, v in el.items():
+ if isinstance(v, bytes):
+ el.set(k, v.decode('utf-8'))
+ return el
+
+ def compat_etree_fromstring(text):
+ doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
+ for el in _etree_iter(doc):
+ if el.text is not None and isinstance(el.text, bytes):
+ el.text = el.text.decode('utf-8')
+ return doc
+
+if hasattr(etree, 'register_namespace'):
+ compat_etree_register_namespace = etree.register_namespace
+else:
+ def compat_etree_register_namespace(prefix, uri):
+ """Register a namespace prefix.
+ The registry is global, and any existing mapping for either the
+ given prefix or the namespace URI will be removed.
+ *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
+ attributes in this namespace will be serialized with prefix if possible.
+ ValueError is raised if prefix is reserved or is invalid.
+ """
+ if re.match(r"ns\d+$", prefix):
+ raise ValueError("Prefix format reserved for internal use")
+ for k, v in list(etree._namespace_map.items()):
+ if k == uri or v == prefix:
+ del etree._namespace_map[k]
+ etree._namespace_map[uri] = prefix
+
+if sys.version_info < (2, 7):
+ # Here comes the crazy part: In 2.6, if the xpath is a unicode,
+ # .//node does not match if a node is a direct child of . !
+ def compat_xpath(xpath):
+ if isinstance(xpath, compat_str):
+ xpath = xpath.encode('ascii')
+ return xpath
+else:
+ compat_xpath = lambda xpath: xpath
+
+try:
+ from urllib.parse import parse_qs as compat_parse_qs
+except ImportError: # Python 2
+ # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
+ # Python 2's version is apparently totally broken
+
+ def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ qs, _coerce_result = qs, compat_str
+ pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ r = []
+ for name_value in pairs:
+ if not name_value and not strict_parsing:
+ continue
+ nv = name_value.split('=', 1)
+ if len(nv) != 2:
+ if strict_parsing:
+ raise ValueError('bad query field: %r' % (name_value,))
+ # Handle case of a control-name with no equal sign
+ if keep_blank_values:
+ nv.append('')
+ else:
+ continue
+ if len(nv[1]) or keep_blank_values:
+ name = nv[0].replace('+', ' ')
+ name = compat_urllib_parse_unquote(
+ name, encoding=encoding, errors=errors)
+ name = _coerce_result(name)
+ value = nv[1].replace('+', ' ')
+ value = compat_urllib_parse_unquote(
+ value, encoding=encoding, errors=errors)
+ value = _coerce_result(value)
+ r.append((name, value))
+ return r
+
+ def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ parsed_result = {}
+ pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
+ encoding=encoding, errors=errors)
+ for name, value in pairs:
+ if name in parsed_result:
+ parsed_result[name].append(value)
+ else:
+ parsed_result[name] = [value]
+ return parsed_result
+
+
+compat_os_name = os._name if os.name == 'java' else os.name
+
+
+if compat_os_name == 'nt':
+ def compat_shlex_quote(s):
+ return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
+else:
+ try:
+ from shlex import quote as compat_shlex_quote
+ except ImportError: # Python < 3.3
+ def compat_shlex_quote(s):
+ if re.match(r'^[-_\w./]+$', s):
+ return s
+ else:
+ return "'" + s.replace("'", "'\"'\"'") + "'"
+
+
+try:
+ args = shlex.split('中文')
+ assert (isinstance(args, list) and
+ isinstance(args[0], compat_str) and
+ args[0] == '中文')
+ compat_shlex_split = shlex.split
+except (AssertionError, UnicodeEncodeError):
+ # Working around shlex issue with unicode strings on some python 2
+ # versions (see http://bugs.python.org/issue1548891)
+ def compat_shlex_split(s, comments=False, posix=True):
+ if isinstance(s, compat_str):
+ s = s.encode('utf-8')
+ return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
+
+
+def compat_ord(c):
+ if type(c) is int:
+ return c
+ else:
+ return ord(c)
+
+
+if sys.version_info >= (3, 0):
+ compat_getenv = os.getenv
+ compat_expanduser = os.path.expanduser
+
+ def compat_setenv(key, value, env=os.environ):
+ env[key] = value
+else:
+ # Environment variables should be decoded with filesystem encoding.
+ # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
+
+ def compat_getenv(key, default=None):
+ from .utils import get_filesystem_encoding
+ env = os.getenv(key, default)
+ if env:
+ env = env.decode(get_filesystem_encoding())
+ return env
+
+ def compat_setenv(key, value, env=os.environ):
+ def encode(v):
+ from .utils import get_filesystem_encoding
+ return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
+ env[encode(key)] = encode(value)
+
+ # HACK: The default implementations of os.path.expanduser from cpython do not decode
+ # environment variables with filesystem encoding. We will work around this by
+ # providing adjusted implementations.
+ # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
+ # for different platforms with correct environment variables decoding.
+
+ if compat_os_name == 'posix':
+ def compat_expanduser(path):
+ """Expand ~ and ~user constructions. If user or $HOME is unknown,
+ do nothing."""
+ if not path.startswith('~'):
+ return path
+ i = path.find('/', 1)
+ if i < 0:
+ i = len(path)
+ if i == 1:
+ if 'HOME' not in os.environ:
+ import pwd
+ userhome = pwd.getpwuid(os.getuid()).pw_dir
+ else:
+ userhome = compat_getenv('HOME')
+ else:
+ import pwd
+ try:
+ pwent = pwd.getpwnam(path[1:i])
+ except KeyError:
+ return path
+ userhome = pwent.pw_dir
+ userhome = userhome.rstrip('/')
+ return (userhome + path[i:]) or '/'
+ elif compat_os_name in ('nt', 'ce'):
+ def compat_expanduser(path):
+ """Expand ~ and ~user constructs.
+
+ If user or $HOME is unknown, do nothing."""
+ if path[:1] != '~':
+ return path
+ i, n = 1, len(path)
+ while i < n and path[i] not in '/\\':
+ i = i + 1
+
+ if 'HOME' in os.environ:
+ userhome = compat_getenv('HOME')
+ elif 'USERPROFILE' in os.environ:
+ userhome = compat_getenv('USERPROFILE')
+ elif 'HOMEPATH' not in os.environ:
+ return path
+ else:
+ try:
+ drive = compat_getenv('HOMEDRIVE')
+ except KeyError:
+ drive = ''
+ userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
+
+ if i != 1: # ~user
+ userhome = os.path.join(os.path.dirname(userhome), path[1:i])
+
+ return userhome + path[i:]
+ else:
+ compat_expanduser = os.path.expanduser
+
+
+if sys.version_info < (3, 0):
+ def compat_print(s):
+ from .utils import preferredencoding
+ print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
+else:
+ def compat_print(s):
+ assert isinstance(s, compat_str)
+ print(s)
+
+
+if sys.version_info < (3, 0) and sys.platform == 'win32':
+ def compat_getpass(prompt, *args, **kwargs):
+ if isinstance(prompt, compat_str):
+ from .utils import preferredencoding
+ prompt = prompt.encode(preferredencoding())
+ return getpass.getpass(prompt, *args, **kwargs)
+else:
+ compat_getpass = getpass.getpass
+
+try:
+ compat_input = raw_input
+except NameError: # Python 3
+ compat_input = input
+
+# Python < 2.6.5 require kwargs to be bytes
+try:
+ def _testfunc(x):
+ pass
+ _testfunc(**{'x': 0})
+except TypeError:
+ def compat_kwargs(kwargs):
+ return dict((bytes(k), v) for k, v in kwargs.items())
+else:
+ compat_kwargs = lambda kwargs: kwargs
+
+
+try:
+ compat_numeric_types = (int, float, long, complex)
+except NameError: # Python 3
+ compat_numeric_types = (int, float, complex)
+
+
+try:
+ compat_integer_types = (int, long)
+except NameError: # Python 3
+ compat_integer_types = (int, )
+
+
+if sys.version_info < (2, 7):
+ def compat_socket_create_connection(address, timeout, source_address=None):
+ host, port = address
+ err = None
+ for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
+ af, socktype, proto, canonname, sa = res
+ sock = None
+ try:
+ sock = socket.socket(af, socktype, proto)
+ sock.settimeout(timeout)
+ if source_address:
+ sock.bind(source_address)
+ sock.connect(sa)
+ return sock
+ except socket.error as _:
+ err = _
+ if sock is not None:
+ sock.close()
+ if err is not None:
+ raise err
+ else:
+ raise socket.error('getaddrinfo returns an empty list')
+else:
+ compat_socket_create_connection = socket.create_connection
+
+
+# Fix https://github.com/rg3/youtube-dl/issues/4223
+# See http://bugs.python.org/issue9161 for what is broken
+def workaround_optparse_bug9161():
+ op = optparse.OptionParser()
+ og = optparse.OptionGroup(op, 'foo')
+ try:
+ og.add_option('-t')
+ except TypeError:
+ real_add_option = optparse.OptionGroup.add_option
+
+ def _compat_add_option(self, *args, **kwargs):
+ enc = lambda v: (
+ v.encode('ascii', 'replace') if isinstance(v, compat_str)
+ else v)
+ bargs = [enc(a) for a in args]
+ bkwargs = dict(
+ (k, enc(v)) for k, v in kwargs.items())
+ return real_add_option(self, *bargs, **bkwargs)
+ optparse.OptionGroup.add_option = _compat_add_option
+
+
+if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
+ compat_get_terminal_size = shutil.get_terminal_size
+else:
+ _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
+
+ def compat_get_terminal_size(fallback=(80, 24)):
+ columns = compat_getenv('COLUMNS')
+ if columns:
+ columns = int(columns)
+ else:
+ columns = None
+ lines = compat_getenv('LINES')
+ if lines:
+ lines = int(lines)
+ else:
+ lines = None
+
+ if columns is None or lines is None or columns <= 0 or lines <= 0:
+ try:
+ sp = subprocess.Popen(
+ ['stty', 'size'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = sp.communicate()
+ _lines, _columns = map(int, out.split())
+ except Exception:
+ _columns, _lines = _terminal_size(*fallback)
+
+ if columns is None or columns <= 0:
+ columns = _columns
+ if lines is None or lines <= 0:
+ lines = _lines
+ return _terminal_size(columns, lines)
+
+try:
+ itertools.count(start=0, step=1)
+ compat_itertools_count = itertools.count
+except TypeError: # Python 2.6
+ def compat_itertools_count(start=0, step=1):
+ n = start
+ while True:
+ yield n
+ n += step
+
+if sys.version_info >= (3, 0):
+ from tokenize import tokenize as compat_tokenize_tokenize
+else:
+ from tokenize import generate_tokens as compat_tokenize_tokenize
+
+
+try:
+ struct.pack('!I', 0)
+except TypeError:
+ # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
+ # See https://bugs.python.org/issue19099
+ def compat_struct_pack(spec, *args):
+ if isinstance(spec, compat_str):
+ spec = spec.encode('ascii')
+ return struct.pack(spec, *args)
+
+ def compat_struct_unpack(spec, *args):
+ if isinstance(spec, compat_str):
+ spec = spec.encode('ascii')
+ return struct.unpack(spec, *args)
+
+ class compat_Struct(struct.Struct):
+ def __init__(self, fmt):
+ if isinstance(fmt, compat_str):
+ fmt = fmt.encode('ascii')
+ super(compat_Struct, self).__init__(fmt)
+else:
+ compat_struct_pack = struct.pack
+ compat_struct_unpack = struct.unpack
+ if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
+ class compat_Struct(struct.Struct):
+ def unpack(self, string):
+ if not isinstance(string, buffer): # noqa: F821
+ string = buffer(string) # noqa: F821
+ return super(compat_Struct, self).unpack(string)
+ else:
+ compat_Struct = struct.Struct
+
+
+try:
+ from future_builtins import zip as compat_zip
+except ImportError: # not 2.6+ or is 3.x
+ try:
+ from itertools import izip as compat_zip # < 2.5 or 3.x
+ except ImportError:
+ compat_zip = zip
+
+
+if sys.version_info < (3, 3):
+ def compat_b64decode(s, *args, **kwargs):
+ if isinstance(s, compat_str):
+ s = s.encode('ascii')
+ return base64.b64decode(s, *args, **kwargs)
+else:
+ compat_b64decode = base64.b64decode
+
+
+if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
+ # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
+ # names, see the original PyPy issue [1] and the youtube-dl one [2].
+ # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
+ # 2. https://github.com/rg3/youtube-dl/pull/4392
+ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+ real = ctypes.WINFUNCTYPE(*args, **kwargs)
+
+ def resf(tpl, *args, **kwargs):
+ funcname, dll = tpl
+ return real((str(funcname), dll), *args, **kwargs)
+
+ return resf
+else:
+ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+ return ctypes.WINFUNCTYPE(*args, **kwargs)
+
+
+__all__ = [
+ 'compat_HTMLParseError',
+ 'compat_HTMLParser',
+ 'compat_HTTPError',
+ 'compat_Struct',
+ 'compat_b64decode',
+ 'compat_basestring',
+ 'compat_chr',
+ 'compat_cookiejar',
+ 'compat_cookies',
+ 'compat_ctypes_WINFUNCTYPE',
+ 'compat_etree_fromstring',
+ 'compat_etree_register_namespace',
+ 'compat_expanduser',
+ 'compat_get_terminal_size',
+ 'compat_getenv',
+ 'compat_getpass',
+ 'compat_html_entities',
+ 'compat_html_entities_html5',
+ 'compat_http_client',
+ 'compat_http_server',
+ 'compat_input',
+ 'compat_integer_types',
+ 'compat_itertools_count',
+ 'compat_kwargs',
+ 'compat_numeric_types',
+ 'compat_ord',
+ 'compat_os_name',
+ 'compat_parse_qs',
+ 'compat_print',
+ 'compat_setenv',
+ 'compat_shlex_quote',
+ 'compat_shlex_split',
+ 'compat_socket_create_connection',
+ 'compat_str',
+ 'compat_struct_pack',
+ 'compat_struct_unpack',
+ 'compat_subprocess_get_DEVNULL',
+ 'compat_tokenize_tokenize',
+ 'compat_urllib_error',
+ 'compat_urllib_parse',
+ 'compat_urllib_parse_unquote',
+ 'compat_urllib_parse_unquote_plus',
+ 'compat_urllib_parse_unquote_to_bytes',
+ 'compat_urllib_parse_urlencode',
+ 'compat_urllib_parse_urlparse',
+ 'compat_urllib_request',
+ 'compat_urllib_request_DataHandler',
+ 'compat_urllib_response',
+ 'compat_urlparse',
+ 'compat_urlretrieve',
+ 'compat_xml_parse_error',
+ 'compat_xpath',
+ 'compat_zip',
+ 'workaround_optparse_bug9161',
+]
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
new file mode 100644
index 0000000..2e485df
--- /dev/null
+++ b/youtube_dl/downloader/__init__.py
@@ -0,0 +1,61 @@
+from __future__ import unicode_literals
+
+from .common import FileDownloader
+from .f4m import F4mFD
+from .hls import HlsFD
+from .http import HttpFD
+from .rtmp import RtmpFD
+from .dash import DashSegmentsFD
+from .rtsp import RtspFD
+from .ism import IsmFD
+from .external import (
+ get_external_downloader,
+ FFmpegFD,
+)
+
+from ..utils import (
+ determine_protocol,
+)
+
+PROTOCOL_MAP = {
+ 'rtmp': RtmpFD,
+ 'm3u8_native': HlsFD,
+ 'm3u8': FFmpegFD,
+ 'mms': RtspFD,
+ 'rtsp': RtspFD,
+ 'f4m': F4mFD,
+ 'http_dash_segments': DashSegmentsFD,
+ 'ism': IsmFD,
+}
+
+
+def get_suitable_downloader(info_dict, params={}):
+ """Get the downloader class that can handle the info dict."""
+ protocol = determine_protocol(info_dict)
+ info_dict['protocol'] = protocol
+
+ # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
+ # return FFmpegFD
+
+ external_downloader = params.get('external_downloader')
+ if external_downloader is not None:
+ ed = get_external_downloader(external_downloader)
+ if ed.can_download(info_dict):
+ return ed
+
+ if protocol.startswith('m3u8') and info_dict.get('is_live'):
+ return FFmpegFD
+
+ if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
+ return HlsFD
+
+ if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False:
+ return FFmpegFD
+
+ return PROTOCOL_MAP.get(protocol, HttpFD)
+
+
+__all__ = [
+ 'get_suitable_downloader',
+ 'FileDownloader',
+]
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
new file mode 100644
index 0000000..5979833
--- /dev/null
+++ b/youtube_dl/downloader/common.py
@@ -0,0 +1,389 @@
+from __future__ import division, unicode_literals
+
+import os
+import re
+import sys
+import time
+import random
+
+from ..compat import compat_os_name
+from ..utils import (
+ decodeArgument,
+ encodeFilename,
+ error_to_compat_str,
+ format_bytes,
+ shell_quote,
+ timeconvert,
+)
+
+
+class FileDownloader(object):
+ """File Downloader class.
+
+ File downloader objects are the ones responsible of downloading the
+ actual video file and writing it to disk.
+
+ File downloaders accept a lot of parameters. In order not to saturate
+ the object constructor with arguments, it receives a dictionary of
+ options instead.
+
+ Available options:
+
+ verbose: Print additional info to stdout.
+ quiet: Do not print messages to stdout.
+ ratelimit: Download speed limit, in bytes/sec.
+ retries: Number of times to retry for HTTP error 5xx
+ buffersize: Size of download buffer in bytes.
+ noresizebuffer: Do not automatically resize the download buffer.
+ continuedl: Try to continue downloads if possible.
+ noprogress: Do not print the progress bar.
+ logtostderr: Log messages to stderr instead of stdout.
+ consoletitle: Display progress in console window's titlebar.
+ nopart: Do not use temporary .part files.
+ updatetime: Use the Last-modified header to set output file timestamps.
+ test: Download only first bytes to test the downloader.
+ min_filesize: Skip files smaller than this size
+ max_filesize: Skip files larger than this size
+ xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
+ external_downloader_args: A list of additional command-line arguments for the
+ external downloader.
+ hls_use_mpegts: Use the mpegts container for HLS videos.
+ http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
+ useful for bypassing bandwidth throttling imposed by
+ a webserver (experimental)
+
+ Subclasses of this one must re-define the real_download method.
+ """
+
+ _TEST_FILE_SIZE = 10241
+ params = None
+
+ def __init__(self, ydl, params):
+ """Create a FileDownloader object with the given options."""
+ self.ydl = ydl
+ self._progress_hooks = []
+ self.params = params
+ self.add_progress_hook(self.report_progress)
+
+ @staticmethod
+ def format_seconds(seconds):
+ (mins, secs) = divmod(seconds, 60)
+ (hours, mins) = divmod(mins, 60)
+ if hours > 99:
+ return '--:--:--'
+ if hours == 0:
+ return '%02d:%02d' % (mins, secs)
+ else:
+ return '%02d:%02d:%02d' % (hours, mins, secs)
+
+ @staticmethod
+ def calc_percent(byte_counter, data_len):
+ if data_len is None:
+ return None
+ return float(byte_counter) / float(data_len) * 100.0
+
+ @staticmethod
+ def format_percent(percent):
+ if percent is None:
+ return '---.-%'
+ return '%6s' % ('%3.1f%%' % percent)
+
+ @staticmethod
+ def calc_eta(start, now, total, current):
+ if total is None:
+ return None
+ if now is None:
+ now = time.time()
+ dif = now - start
+ if current == 0 or dif < 0.001: # One millisecond
+ return None
+ rate = float(current) / dif
+ return int((float(total) - float(current)) / rate)
+
+ @staticmethod
+ def format_eta(eta):
+ if eta is None:
+ return '--:--'
+ return FileDownloader.format_seconds(eta)
+
+ @staticmethod
+ def calc_speed(start, now, bytes):
+ dif = now - start
+ if bytes == 0 or dif < 0.001: # One millisecond
+ return None
+ return float(bytes) / dif
+
+ @staticmethod
+ def format_speed(speed):
+ if speed is None:
+ return '%10s' % '---b/s'
+ return '%10s' % ('%s/s' % format_bytes(speed))
+
+ @staticmethod
+ def format_retries(retries):
+ return 'inf' if retries == float('inf') else '%.0f' % retries
+
+ @staticmethod
+ def best_block_size(elapsed_time, bytes):
+ new_min = max(bytes / 2.0, 1.0)
+ new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
+ if elapsed_time < 0.001:
+ return int(new_max)
+ rate = bytes / elapsed_time
+ if rate > new_max:
+ return int(new_max)
+ if rate < new_min:
+ return int(new_min)
+ return int(rate)
+
+ @staticmethod
+ def parse_bytes(bytestr):
+ """Parse a string indicating a byte quantity into an integer."""
+ matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
+ if matchobj is None:
+ return None
+ number = float(matchobj.group(1))
+ multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
+ return int(round(number * multiplier))
+
+ def to_screen(self, *args, **kargs):
+ self.ydl.to_screen(*args, **kargs)
+
+ def to_stderr(self, message):
+ self.ydl.to_screen(message)
+
+ def to_console_title(self, message):
+ self.ydl.to_console_title(message)
+
+ def trouble(self, *args, **kargs):
+ self.ydl.trouble(*args, **kargs)
+
+ def report_warning(self, *args, **kargs):
+ self.ydl.report_warning(*args, **kargs)
+
+ def report_error(self, *args, **kargs):
+ self.ydl.report_error(*args, **kargs)
+
+ def slow_down(self, start_time, now, byte_counter):
+ """Sleep if the download speed is over the rate limit."""
+ rate_limit = self.params.get('ratelimit')
+ if rate_limit is None or byte_counter == 0:
+ return
+ if now is None:
+ now = time.time()
+ elapsed = now - start_time
+ if elapsed <= 0.0:
+ return
+ speed = float(byte_counter) / elapsed
+ if speed > rate_limit:
+ time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
+
+ def temp_name(self, filename):
+ """Returns a temporary filename for the given filename."""
+ if self.params.get('nopart', False) or filename == '-' or \
+ (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
+ return filename
+ return filename + '.part'
+
+ def undo_temp_name(self, filename):
+ if filename.endswith('.part'):
+ return filename[:-len('.part')]
+ return filename
+
+ def ytdl_filename(self, filename):
+ return filename + '.ytdl'
+
+ def try_rename(self, old_filename, new_filename):
+ try:
+ if old_filename == new_filename:
+ return
+ os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
+ except (IOError, OSError) as err:
+ self.report_error('unable to rename file: %s' % error_to_compat_str(err))
+
+ def try_utime(self, filename, last_modified_hdr):
+ """Try to set the last-modified time of the given file."""
+ if last_modified_hdr is None:
+ return
+ if not os.path.isfile(encodeFilename(filename)):
+ return
+ timestr = last_modified_hdr
+ if timestr is None:
+ return
+ filetime = timeconvert(timestr)
+ if filetime is None:
+ return filetime
+ # Ignore obviously invalid dates
+ if filetime == 0:
+ return
+ try:
+ os.utime(filename, (time.time(), filetime))
+ except Exception:
+ pass
+ return filetime
+
+ def report_destination(self, filename):
+ """Report destination filename."""
+ self.to_screen('[download] Destination: ' + filename)
+
+ def _report_progress_status(self, msg, is_last_line=False):
+ fullmsg = '[download] ' + msg
+ if self.params.get('progress_with_newline', False):
+ self.to_screen(fullmsg)
+ else:
+ if compat_os_name == 'nt':
+ prev_len = getattr(self, '_report_progress_prev_line_length',
+ 0)
+ if prev_len > len(fullmsg):
+ fullmsg += ' ' * (prev_len - len(fullmsg))
+ self._report_progress_prev_line_length = len(fullmsg)
+ clear_line = '\r'
+ else:
+ clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r')
+ self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
+ self.to_console_title('youtube-dl ' + msg)
+
+ def report_progress(self, s):
+ if s['status'] == 'finished':
+ if self.params.get('noprogress', False):
+ self.to_screen('[download] Download completed')
+ else:
+ msg_template = '100%%'
+ if s.get('total_bytes') is not None:
+ s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ msg_template += ' of %(_total_bytes_str)s'
+ if s.get('elapsed') is not None:
+ s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+ msg_template += ' in %(_elapsed_str)s'
+ self._report_progress_status(
+ msg_template % s, is_last_line=True)
+
+ if self.params.get('noprogress'):
+ return
+
+ if s['status'] != 'downloading':
+ return
+
+ if s.get('eta') is not None:
+ s['_eta_str'] = self.format_eta(s['eta'])
+ else:
+ s['_eta_str'] = 'Unknown ETA'
+
+ if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
+ s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
+ elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
+ s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
+ else:
+ if s.get('downloaded_bytes') == 0:
+ s['_percent_str'] = self.format_percent(0)
+ else:
+ s['_percent_str'] = 'Unknown %'
+
+ if s.get('speed') is not None:
+ s['_speed_str'] = self.format_speed(s['speed'])
+ else:
+ s['_speed_str'] = 'Unknown speed'
+
+ if s.get('total_bytes') is not None:
+ s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
+ elif s.get('total_bytes_estimate') is not None:
+ s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
+ msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
+ else:
+ if s.get('downloaded_bytes') is not None:
+ s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
+ if s.get('elapsed'):
+ s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+ msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
+ else:
+ msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
+ else:
+ msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
+
+ self._report_progress_status(msg_template % s)
+
+ def report_resuming_byte(self, resume_len):
+ """Report attempt to resume at given byte."""
+ self.to_screen('[download] Resuming download at byte %s' % resume_len)
+
+ def report_retry(self, err, count, retries):
+ """Report retry in case of HTTP error 5xx"""
+ self.to_screen(
+ '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
+ % (error_to_compat_str(err), count, self.format_retries(retries)))
+
+ def report_file_already_downloaded(self, file_name):
+ """Report file has already been fully downloaded."""
+ try:
+ self.to_screen('[download] %s has already been downloaded' % file_name)
+ except UnicodeEncodeError:
+ self.to_screen('[download] The file has already been downloaded')
+
+ def report_unable_to_resume(self):
+ """Report it was impossible to resume download."""
+ self.to_screen('[download] Unable to resume')
+
+ def download(self, filename, info_dict):
+ """Download to a filename using the info from info_dict
+ Return True on success and False otherwise
+ """
+
+ nooverwrites_and_exists = (
+ self.params.get('nooverwrites', False) and
+ os.path.exists(encodeFilename(filename))
+ )
+
+ if not hasattr(filename, 'write'):
+ continuedl_and_exists = (
+ self.params.get('continuedl', True) and
+ os.path.isfile(encodeFilename(filename)) and
+ not self.params.get('nopart', False)
+ )
+
+ # Check file already present
+ if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
+ self.report_file_already_downloaded(filename)
+ self._hook_progress({
+ 'filename': filename,
+ 'status': 'finished',
+ 'total_bytes': os.path.getsize(encodeFilename(filename)),
+ })
+ return True
+
+ min_sleep_interval = self.params.get('sleep_interval')
+ if min_sleep_interval:
+ max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
+ sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
+ self.to_screen(
+ '[download] Sleeping %s seconds...' % (
+ int(sleep_interval) if sleep_interval.is_integer()
+ else '%.2f' % sleep_interval))
+ time.sleep(sleep_interval)
+
+ return self.real_download(filename, info_dict)
+
+ def real_download(self, filename, info_dict):
+ """Real download process. Redefine in subclasses."""
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def _hook_progress(self, status):
+ for ph in self._progress_hooks:
+ ph(status)
+
+ def add_progress_hook(self, ph):
+ # See YoutubeDl.py (search for progress_hooks) for a description of
+ # this interface
+ self._progress_hooks.append(ph)
+
+ def _debug_cmd(self, args, exe=None):
+ if not self.params.get('verbose', False):
+ return
+
+ str_args = [decodeArgument(a) for a in args]
+
+ if exe is None:
+ exe = os.path.basename(str_args[0])
+
+ self.to_screen('[debug] %s command line: %s' % (
+ exe, shell_quote(str_args)))
diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
new file mode 100644
index 0000000..eaa7adf
--- /dev/null
+++ b/youtube_dl/downloader/dash.py
@@ -0,0 +1,80 @@
+from __future__ import unicode_literals
+
+from .fragment import FragmentFD
+from ..compat import compat_urllib_error
+from ..utils import (
+ DownloadError,
+ urljoin,
+)
+
+
+class DashSegmentsFD(FragmentFD):
+ """
+ Download segments in a DASH manifest
+ """
+
+ FD_NAME = 'dashsegments'
+
+ def real_download(self, filename, info_dict):
+ fragment_base_url = info_dict.get('fragment_base_url')
+ fragments = info_dict['fragments'][:1] if self.params.get(
+ 'test', False) else info_dict['fragments']
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': len(fragments),
+ }
+
+ self._prepare_and_start_frag_download(ctx)
+
+ fragment_retries = self.params.get('fragment_retries', 0)
+ skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+
+ frag_index = 0
+ for i, fragment in enumerate(fragments):
+ frag_index += 1
+ if frag_index <= ctx['fragment_index']:
+ continue
+ # In DASH, the first segment contains necessary headers to
+ # generate a valid MP4 file, so always abort for the first segment
+ fatal = i == 0 or not skip_unavailable_fragments
+ count = 0
+ while count <= fragment_retries:
+ try:
+ fragment_url = fragment.get('url')
+ if not fragment_url:
+ assert fragment_base_url
+ fragment_url = urljoin(fragment_base_url, fragment['path'])
+ success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
+ if not success:
+ return False
+ self._append_fragment(ctx, frag_content)
+ break
+ except compat_urllib_error.HTTPError as err:
+ # YouTube may often return 404 HTTP error for a fragment causing the
+ # whole download to fail. However if the same fragment is immediately
+ # retried with the same request data this usually succeeds (1-2 attemps
+ # is usually enough) thus allowing to download the whole file successfully.
+ # To be future-proof we will retry all fragments that fail with any
+ # HTTP error.
+ count += 1
+ if count <= fragment_retries:
+ self.report_retry_fragment(err, frag_index, count, fragment_retries)
+ except DownloadError:
+ # Don't retry fragment if error occurred during HTTP downloading
+ # itself since it has own retry settings
+ if not fatal:
+ self.report_skip_fragment(frag_index)
+ break
+ raise
+
+ if count > fragment_retries:
+ if not fatal:
+ self.report_skip_fragment(frag_index)
+ continue
+ self.report_error('giving up after %s fragment retries' % fragment_retries)
+ return False
+
+ self._finish_frag_download(ctx)
+
+ return True
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
new file mode 100644
index 0000000..958d00a
--- /dev/null
+++ b/youtube_dl/downloader/external.py
@@ -0,0 +1,354 @@
+from __future__ import unicode_literals
+
+import os.path
+import re
+import subprocess
+import sys
+import time
+
+from .common import FileDownloader
+from ..compat import (
+ compat_setenv,
+ compat_str,
+)
+from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
+from ..utils import (
+ cli_option,
+ cli_valueless_option,
+ cli_bool_option,
+ cli_configuration_args,
+ encodeFilename,
+ encodeArgument,
+ handle_youtubedl_headers,
+ check_executable,
+ is_outdated_version,
+)
+
+
+class ExternalFD(FileDownloader):
+ def real_download(self, filename, info_dict):
+ self.report_destination(filename)
+ tmpfilename = self.temp_name(filename)
+
+ try:
+ started = time.time()
+ retval = self._call_downloader(tmpfilename, info_dict)
+ except KeyboardInterrupt:
+ if not info_dict.get('is_live'):
+ raise
+ # Live stream downloading cancellation should be considered as
+ # correct and expected termination thus all postprocessing
+ # should take place
+ retval = 0
+ self.to_screen('[%s] Interrupted by user' % self.get_basename())
+
+ if retval == 0:
+ status = {
+ 'filename': filename,
+ 'status': 'finished',
+ 'elapsed': time.time() - started,
+ }
+ if filename != '-':
+ fsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
+ self.try_rename(tmpfilename, filename)
+ status.update({
+ 'downloaded_bytes': fsize,
+ 'total_bytes': fsize,
+ })
+ self._hook_progress(status)
+ return True
+ else:
+ self.to_stderr('\n')
+ self.report_error('%s exited with code %d' % (
+ self.get_basename(), retval))
+ return False
+
+ @classmethod
+ def get_basename(cls):
+ return cls.__name__[:-2].lower()
+
+ @property
+ def exe(self):
+ return self.params.get('external_downloader')
+
+ @classmethod
+ def available(cls):
+ return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT])
+
+ @classmethod
+ def supports(cls, info_dict):
+ return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
+
+ @classmethod
+ def can_download(cls, info_dict):
+ return cls.available() and cls.supports(info_dict)
+
+ def _option(self, command_option, param):
+ return cli_option(self.params, command_option, param)
+
+ def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
+ return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
+
+ def _valueless_option(self, command_option, param, expected_value=True):
+ return cli_valueless_option(self.params, command_option, param, expected_value)
+
+ def _configuration_args(self, default=[]):
+ return cli_configuration_args(self.params, 'external_downloader_args', default)
+
+ def _call_downloader(self, tmpfilename, info_dict):
+ """ Either overwrite this or implement _make_cmd """
+ cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
+
+ self._debug_cmd(cmd)
+
+ p = subprocess.Popen(
+ cmd, stderr=subprocess.PIPE)
+ _, stderr = p.communicate()
+ if p.returncode != 0:
+ self.to_stderr(stderr.decode('utf-8', 'replace'))
+ return p.returncode
+
+
+class CurlFD(ExternalFD):
+ AVAILABLE_OPT = '-V'
+
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = [self.exe, '--location', '-o', tmpfilename]
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
+ cmd += self._valueless_option('--silent', 'noprogress')
+ cmd += self._valueless_option('--verbose', 'verbose')
+ cmd += self._option('--limit-rate', 'ratelimit')
+ cmd += self._option('--retry', 'retries')
+ cmd += self._option('--max-filesize', 'max_filesize')
+ cmd += self._option('--interface', 'source_address')
+ cmd += self._option('--proxy', 'proxy')
+ cmd += self._valueless_option('--insecure', 'nocheckcertificate')
+ cmd += self._configuration_args()
+ cmd += ['--', info_dict['url']]
+ return cmd
+
+ def _call_downloader(self, tmpfilename, info_dict):
+ cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
+
+ self._debug_cmd(cmd)
+
+ # curl writes the progress to stderr so don't capture it.
+ p = subprocess.Popen(cmd)
+ p.communicate()
+ return p.returncode
+
+
+class AxelFD(ExternalFD):
+ AVAILABLE_OPT = '-V'
+
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = [self.exe, '-o', tmpfilename]
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['-H', '%s: %s' % (key, val)]
+ cmd += self._configuration_args()
+ cmd += ['--', info_dict['url']]
+ return cmd
+
+
+class WgetFD(ExternalFD):
+ AVAILABLE_OPT = '--version'
+
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += self._option('--bind-address', 'source_address')
+ cmd += self._option('--proxy', 'proxy')
+ cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
+ cmd += self._configuration_args()
+ cmd += ['--', info_dict['url']]
+ return cmd
+
+
+class Aria2cFD(ExternalFD):
+ AVAILABLE_OPT = '-v'
+
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = [self.exe, '-c']
+ cmd += self._configuration_args([
+ '--min-split-size', '1M', '--max-connection-per-server', '4'])
+ dn = os.path.dirname(tmpfilename)
+ if dn:
+ cmd += ['--dir', dn]
+ cmd += ['--out', os.path.basename(tmpfilename)]
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += self._option('--interface', 'source_address')
+ cmd += self._option('--all-proxy', 'proxy')
+ cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
+ cmd += ['--', info_dict['url']]
+ return cmd
+
+
+class HttpieFD(ExternalFD):
+ @classmethod
+ def available(cls):
+ return check_executable('http', ['--version'])
+
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['%s:%s' % (key, val)]
+ return cmd
+
+
+class FFmpegFD(ExternalFD):
+ @classmethod
+ def supports(cls, info_dict):
+ return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
+
+ @classmethod
+ def available(cls):
+ return FFmpegPostProcessor().available
+
+ def _call_downloader(self, tmpfilename, info_dict):
+ url = info_dict['url']
+ ffpp = FFmpegPostProcessor(downloader=self)
+ if not ffpp.available:
+ self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
+ return False
+ ffpp.check_version()
+
+ args = [ffpp.executable, '-y']
+
+ for log_level in ('quiet', 'verbose'):
+ if self.params.get(log_level, False):
+ args += ['-loglevel', log_level]
+ break
+
+ seekable = info_dict.get('_seekable')
+ if seekable is not None:
+ # setting -seekable prevents ffmpeg from guessing if the server
+ # supports seeking(by adding the header `Range: bytes=0-`), which
+ # can cause problems in some cases
+ # https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127
+ # http://trac.ffmpeg.org/ticket/6125#comment:10
+ args += ['-seekable', '1' if seekable else '0']
+
+ args += self._configuration_args()
+
+ # start_time = info_dict.get('start_time') or 0
+ # if start_time:
+ # args += ['-ss', compat_str(start_time)]
+ # end_time = info_dict.get('end_time')
+ # if end_time:
+ # args += ['-t', compat_str(end_time - start_time)]
+
+ if info_dict['http_headers'] and re.match(r'^https?://', url):
+ # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
+ # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
+ headers = handle_youtubedl_headers(info_dict['http_headers'])
+ args += [
+ '-headers',
+ ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
+
+ env = None
+ proxy = self.params.get('proxy')
+ if proxy:
+ if not re.match(r'^[\da-zA-Z]+://', proxy):
+ proxy = 'http://%s' % proxy
+
+ if proxy.startswith('socks'):
+ self.report_warning(
+ '%s does not support SOCKS proxies. Downloading is likely to fail. '
+ 'Consider adding --hls-prefer-native to your command.' % self.get_basename())
+
+ # Since December 2015 ffmpeg supports -http_proxy option (see
+ # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
+ # We could switch to the following code if we are able to detect version properly
+ # args += ['-http_proxy', proxy]
+ env = os.environ.copy()
+ compat_setenv('HTTP_PROXY', proxy, env=env)
+ compat_setenv('http_proxy', proxy, env=env)
+
+ protocol = info_dict.get('protocol')
+
+ if protocol == 'rtmp':
+ player_url = info_dict.get('player_url')
+ page_url = info_dict.get('page_url')
+ app = info_dict.get('app')
+ play_path = info_dict.get('play_path')
+ tc_url = info_dict.get('tc_url')
+ flash_version = info_dict.get('flash_version')
+ live = info_dict.get('rtmp_live', False)
+ if player_url is not None:
+ args += ['-rtmp_swfverify', player_url]
+ if page_url is not None:
+ args += ['-rtmp_pageurl', page_url]
+ if app is not None:
+ args += ['-rtmp_app', app]
+ if play_path is not None:
+ args += ['-rtmp_playpath', play_path]
+ if tc_url is not None:
+ args += ['-rtmp_tcurl', tc_url]
+ if flash_version is not None:
+ args += ['-rtmp_flashver', flash_version]
+ if live:
+ args += ['-rtmp_live', 'live']
+
+ args += ['-i', url, '-c', 'copy']
+
+ if self.params.get('test', False):
+ args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
+
+ if protocol in ('m3u8', 'm3u8_native'):
+ if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
+ args += ['-f', 'mpegts']
+ else:
+ args += ['-f', 'mp4']
+ if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
+ args += ['-bsf:a', 'aac_adtstoasc']
+ elif protocol == 'rtmp':
+ args += ['-f', 'flv']
+ else:
+ args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
+
+ args = [encodeArgument(opt) for opt in args]
+ args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
+
+ self._debug_cmd(args)
+
+ proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
+ try:
+ retval = proc.wait()
+ except KeyboardInterrupt:
+ # subprocces.run would send the SIGKILL signal to ffmpeg and the
+ # mp4 file couldn't be played, but if we ask ffmpeg to quit it
+ # produces a file that is playable (this is mostly useful for live
+ # streams). Note that Windows is not affected and produces playable
+ # files (see https://github.com/rg3/youtube-dl/issues/8300).
+ if sys.platform != 'win32':
+ proc.communicate(b'q')
+ raise
+ return retval
+
+
+class AVconvFD(FFmpegFD):
+ pass
+
+
+_BY_NAME = dict(
+ (klass.get_basename(), klass)
+ for name, klass in globals().items()
+ if name.endswith('FD') and name != 'ExternalFD'
+)
+
+
+def list_external_downloaders():
+ return sorted(_BY_NAME.keys())
+
+
+def get_external_downloader(external_downloader):
+ """ Given the name of the executable, see whether we support the given
+ downloader . """
+ # Drop .exe extension on Windows
+ bn = os.path.splitext(os.path.basename(external_downloader))[0]
+ return _BY_NAME[bn]
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
new file mode 100644
index 0000000..15e71be
--- /dev/null
+++ b/youtube_dl/downloader/f4m.py
@@ -0,0 +1,438 @@
+from __future__ import division, unicode_literals
+
+import io
+import itertools
+import time
+
+from .fragment import FragmentFD
+from ..compat import (
+ compat_b64decode,
+ compat_etree_fromstring,
+ compat_urlparse,
+ compat_urllib_error,
+ compat_urllib_parse_urlparse,
+ compat_struct_pack,
+ compat_struct_unpack,
+)
+from ..utils import (
+ fix_xml_ampersands,
+ xpath_text,
+)
+
+
+class DataTruncatedError(Exception):
+ pass
+
+
+class FlvReader(io.BytesIO):
+ """
+ Reader for Flv files
+ The file format is documented in https://www.adobe.com/devnet/f4v.html
+ """
+
+ def read_bytes(self, n):
+ data = self.read(n)
+ if len(data) < n:
+ raise DataTruncatedError(
+ 'FlvReader error: need %d bytes while only %d bytes got' % (
+ n, len(data)))
+ return data
+
+ # Utility functions for reading numbers and strings
+ def read_unsigned_long_long(self):
+ return compat_struct_unpack('!Q', self.read_bytes(8))[0]
+
+ def read_unsigned_int(self):
+ return compat_struct_unpack('!I', self.read_bytes(4))[0]
+
+ def read_unsigned_char(self):
+ return compat_struct_unpack('!B', self.read_bytes(1))[0]
+
+ def read_string(self):
+ res = b''
+ while True:
+ char = self.read_bytes(1)
+ if char == b'\x00':
+ break
+ res += char
+ return res
+
+ def read_box_info(self):
+ """
+ Read a box and return the info as a tuple: (box_size, box_type, box_data)
+ """
+ real_size = size = self.read_unsigned_int()
+ box_type = self.read_bytes(4)
+ header_end = 8
+ if size == 1:
+ real_size = self.read_unsigned_long_long()
+ header_end = 16
+ return real_size, box_type, self.read_bytes(real_size - header_end)
+
+ def read_asrt(self):
+ # version
+ self.read_unsigned_char()
+ # flags
+ self.read_bytes(3)
+ quality_entry_count = self.read_unsigned_char()
+ # QualityEntryCount
+ for i in range(quality_entry_count):
+ self.read_string()
+
+ segment_run_count = self.read_unsigned_int()
+ segments = []
+ for i in range(segment_run_count):
+ first_segment = self.read_unsigned_int()
+ fragments_per_segment = self.read_unsigned_int()
+ segments.append((first_segment, fragments_per_segment))
+
+ return {
+ 'segment_run': segments,
+ }
+
+ def read_afrt(self):
+ # version
+ self.read_unsigned_char()
+ # flags
+ self.read_bytes(3)
+ # time scale
+ self.read_unsigned_int()
+
+ quality_entry_count = self.read_unsigned_char()
+ # QualitySegmentUrlModifiers
+ for i in range(quality_entry_count):
+ self.read_string()
+
+ fragments_count = self.read_unsigned_int()
+ fragments = []
+ for i in range(fragments_count):
+ first = self.read_unsigned_int()
+ first_ts = self.read_unsigned_long_long()
+ duration = self.read_unsigned_int()
+ if duration == 0:
+ discontinuity_indicator = self.read_unsigned_char()
+ else:
+ discontinuity_indicator = None
+ fragments.append({
+ 'first': first,
+ 'ts': first_ts,
+ 'duration': duration,
+ 'discontinuity_indicator': discontinuity_indicator,
+ })
+
+ return {
+ 'fragments': fragments,
+ }
+
+ def read_abst(self):
+ # version
+ self.read_unsigned_char()
+ # flags
+ self.read_bytes(3)
+
+ self.read_unsigned_int() # BootstrapinfoVersion
+ # Profile,Live,Update,Reserved
+ flags = self.read_unsigned_char()
+ live = flags & 0x20 != 0
+ # time scale
+ self.read_unsigned_int()
+ # CurrentMediaTime
+ self.read_unsigned_long_long()
+ # SmpteTimeCodeOffset
+ self.read_unsigned_long_long()
+
+ self.read_string() # MovieIdentifier
+ server_count = self.read_unsigned_char()
+ # ServerEntryTable
+ for i in range(server_count):
+ self.read_string()
+ quality_count = self.read_unsigned_char()
+ # QualityEntryTable
+ for i in range(quality_count):
+ self.read_string()
+ # DrmData
+ self.read_string()
+ # MetaData
+ self.read_string()
+
+ segments_count = self.read_unsigned_char()
+ segments = []
+ for i in range(segments_count):
+ box_size, box_type, box_data = self.read_box_info()
+ assert box_type == b'asrt'
+ segment = FlvReader(box_data).read_asrt()
+ segments.append(segment)
+ fragments_run_count = self.read_unsigned_char()
+ fragments = []
+ for i in range(fragments_run_count):
+ box_size, box_type, box_data = self.read_box_info()
+ assert box_type == b'afrt'
+ fragments.append(FlvReader(box_data).read_afrt())
+
+ return {
+ 'segments': segments,
+ 'fragments': fragments,
+ 'live': live,
+ }
+
+ def read_bootstrap_info(self):
+ total_size, box_type, box_data = self.read_box_info()
+ assert box_type == b'abst'
+ return FlvReader(box_data).read_abst()
+
+
+def read_bootstrap_info(bootstrap_bytes):
+ return FlvReader(bootstrap_bytes).read_bootstrap_info()
+
+
+def build_fragments_list(boot_info):
+ """ Return a list of (segment, fragment) for each fragment in the video """
+ res = []
+ segment_run_table = boot_info['segments'][0]
+ fragment_run_entry_table = boot_info['fragments'][0]['fragments']
+ first_frag_number = fragment_run_entry_table[0]['first']
+ fragments_counter = itertools.count(first_frag_number)
+ for segment, fragments_count in segment_run_table['segment_run']:
+ # In some live HDS streams (for example Rai), `fragments_count` is
+ # abnormal and causing out-of-memory errors. It's OK to change the
+ # number of fragments for live streams as they are updated periodically
+ if fragments_count == 4294967295 and boot_info['live']:
+ fragments_count = 2
+ for _ in range(fragments_count):
+ res.append((segment, next(fragments_counter)))
+
+ if boot_info['live']:
+ res = res[-2:]
+
+ return res
+
+
+def write_unsigned_int(stream, val):
+ stream.write(compat_struct_pack('!I', val))
+
+
+def write_unsigned_int_24(stream, val):
+ stream.write(compat_struct_pack('!I', val)[1:])
+
+
+def write_flv_header(stream):
+ """Writes the FLV header to stream"""
+ # FLV header
+ stream.write(b'FLV\x01')
+ stream.write(b'\x05')
+ stream.write(b'\x00\x00\x00\x09')
+ stream.write(b'\x00\x00\x00\x00')
+
+
+def write_metadata_tag(stream, metadata):
+ """Writes optional metadata tag to stream"""
+ SCRIPT_TAG = b'\x12'
+ FLV_TAG_HEADER_LEN = 11
+
+ if metadata:
+ stream.write(SCRIPT_TAG)
+ write_unsigned_int_24(stream, len(metadata))
+ stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
+ stream.write(metadata)
+ write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
+
+
+def remove_encrypted_media(media):
+ return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
+ 'drmAdditionalHeaderSetId' not in e.attrib,
+ media))
+
+
+def _add_ns(prop, ver=1):
+ return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
+
+
+def get_base_url(manifest):
+ base_url = xpath_text(
+ manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
+ 'base URL', default=None)
+ if base_url:
+ base_url = base_url.strip()
+ return base_url
+
+
+class F4mFD(FragmentFD):
+ """
+ A downloader for f4m manifests or AdobeHDS.
+ """
+
+ FD_NAME = 'f4m'
+
+ def _get_unencrypted_media(self, doc):
+ media = doc.findall(_add_ns('media'))
+ if not media:
+ self.report_error('No media found')
+ for e in (doc.findall(_add_ns('drmAdditionalHeader')) +
+ doc.findall(_add_ns('drmAdditionalHeaderSet'))):
+ # If id attribute is missing it's valid for all media nodes
+ # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
+ if 'id' not in e.attrib:
+ self.report_error('Missing ID in f4m DRM')
+ media = remove_encrypted_media(media)
+ if not media:
+ self.report_error('Unsupported DRM')
+ return media
+
+ def _get_bootstrap_from_url(self, bootstrap_url):
+ bootstrap = self.ydl.urlopen(bootstrap_url).read()
+ return read_bootstrap_info(bootstrap)
+
+ def _update_live_fragments(self, bootstrap_url, latest_fragment):
+ fragments_list = []
+ retries = 30
+ while (not fragments_list) and (retries > 0):
+ boot_info = self._get_bootstrap_from_url(bootstrap_url)
+ fragments_list = build_fragments_list(boot_info)
+ fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
+ if not fragments_list:
+ # Retry after a while
+ time.sleep(5.0)
+ retries -= 1
+
+ if not fragments_list:
+ self.report_error('Failed to update fragments')
+
+ return fragments_list
+
+ def _parse_bootstrap_node(self, node, base_url):
+ # Sometimes non empty inline bootstrap info can be specified along
+ # with bootstrap url attribute (e.g. dummy inline bootstrap info
+ # contains whitespace characters in [1]). We will prefer bootstrap
+ # url over inline bootstrap info when present.
+ # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
+ bootstrap_url = node.get('url')
+ if bootstrap_url:
+ bootstrap_url = compat_urlparse.urljoin(
+ base_url, bootstrap_url)
+ boot_info = self._get_bootstrap_from_url(bootstrap_url)
+ else:
+ bootstrap_url = None
+ bootstrap = compat_b64decode(node.text)
+ boot_info = read_bootstrap_info(bootstrap)
+ return boot_info, bootstrap_url
+
+ def real_download(self, filename, info_dict):
+ man_url = info_dict['url']
+ requested_bitrate = info_dict.get('tbr')
+ self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
+
+ urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
+ man_url = urlh.geturl()
+ # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
+ # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
+ # and https://github.com/rg3/youtube-dl/issues/7823)
+ manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
+
+ doc = compat_etree_fromstring(manifest)
+ formats = [(int(f.attrib.get('bitrate', -1)), f)
+ for f in self._get_unencrypted_media(doc)]
+ if requested_bitrate is None or len(formats) == 1:
+ # get the best format
+ formats = sorted(formats, key=lambda f: f[0])
+ rate, media = formats[-1]
+ else:
+ rate, media = list(filter(
+ lambda f: int(f[0]) == requested_bitrate, formats))[0]
+
+ # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
+ man_base_url = get_base_url(doc) or man_url
+
+ base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
+ bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
+ boot_info, bootstrap_url = self._parse_bootstrap_node(
+ bootstrap_node, man_base_url)
+ live = boot_info['live']
+ metadata_node = media.find(_add_ns('metadata'))
+ if metadata_node is not None:
+ metadata = compat_b64decode(metadata_node.text)
+ else:
+ metadata = None
+
+ fragments_list = build_fragments_list(boot_info)
+ test = self.params.get('test', False)
+ if test:
+ # We only download the first fragment
+ fragments_list = fragments_list[:1]
+ total_frags = len(fragments_list)
+ # For some akamai manifests we'll need to add a query to the fragment url
+ akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': total_frags,
+ 'live': live,
+ }
+
+ self._prepare_frag_download(ctx)
+
+ dest_stream = ctx['dest_stream']
+
+ if ctx['complete_frags_downloaded_bytes'] == 0:
+ write_flv_header(dest_stream)
+ if not live:
+ write_metadata_tag(dest_stream, metadata)
+
+ base_url_parsed = compat_urllib_parse_urlparse(base_url)
+
+ self._start_frag_download(ctx)
+
+ frag_index = 0
+ while fragments_list:
+ seg_i, frag_i = fragments_list.pop(0)
+ frag_index += 1
+ if frag_index <= ctx['fragment_index']:
+ continue
+ name = 'Seg%d-Frag%d' % (seg_i, frag_i)
+ query = []
+ if base_url_parsed.query:
+ query.append(base_url_parsed.query)
+ if akamai_pv:
+ query.append(akamai_pv.strip(';'))
+ if info_dict.get('extra_param_to_segment_url'):
+ query.append(info_dict['extra_param_to_segment_url'])
+ url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
+ try:
+ success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
+ if not success:
+ return False
+ reader = FlvReader(down_data)
+ while True:
+ try:
+ _, box_type, box_data = reader.read_box_info()
+ except DataTruncatedError:
+ if test:
+ # In tests, segments may be truncated, and thus
+ # FlvReader may not be able to parse the whole
+ # chunk. If so, write the segment as is
+ # See https://github.com/rg3/youtube-dl/issues/9214
+ dest_stream.write(down_data)
+ break
+ raise
+ if box_type == b'mdat':
+ self._append_fragment(ctx, box_data)
+ break
+ except (compat_urllib_error.HTTPError, ) as err:
+ if live and (err.code == 404 or err.code == 410):
+ # We didn't keep up with the live window. Continue
+ # with the next available fragment.
+ msg = 'Fragment %d unavailable' % frag_i
+ self.report_warning(msg)
+ fragments_list = []
+ else:
+ raise
+
+ if not fragments_list and not test and live and bootstrap_url:
+ fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
+ total_frags += len(fragments_list)
+ if fragments_list and (fragments_list[0][1] > frag_i + 1):
+ msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
+ self.report_warning(msg)
+
+ self._finish_frag_download(ctx)
+
+ return True
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
new file mode 100644
index 0000000..917f6dc
--- /dev/null
+++ b/youtube_dl/downloader/fragment.py
@@ -0,0 +1,268 @@
+from __future__ import division, unicode_literals
+
+import os
+import time
+import json
+
+from .common import FileDownloader
+from .http import HttpFD
+from ..utils import (
+ error_to_compat_str,
+ encodeFilename,
+ sanitize_open,
+ sanitized_Request,
+)
+
+
+class HttpQuietDownloader(HttpFD):
+ def to_screen(self, *args, **kargs):
+ pass
+
+
+class FragmentFD(FileDownloader):
+ """
+ A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
+
+ Available options:
+
+ fragment_retries: Number of times to retry a fragment for HTTP error (DASH
+ and hlsnative only)
+ skip_unavailable_fragments:
+ Skip unavailable fragments (DASH and hlsnative only)
+ keep_fragments: Keep downloaded fragments on disk after downloading is
+ finished
+
+ For each incomplete fragment download youtube-dl keeps on disk a special
+ bookkeeping file with download state and metadata (in future such files will
+ be used for any incomplete download handled by youtube-dl). This file is
+ used to properly handle resuming, check download file consistency and detect
+ potential errors. The file has a .ytdl extension and represents a standard
+ JSON file of the following format:
+
+ extractor:
+ Dictionary of extractor related data. TBD.
+
+ downloader:
+ Dictionary of downloader related data. May contain following data:
+ current_fragment:
+ Dictionary with current (being downloaded) fragment data:
+ index: 0-based index of current fragment among all fragments
+ fragment_count:
+ Total count of fragments
+
+ This feature is experimental and file format may change in future.
+ """
+
+ def report_retry_fragment(self, err, frag_index, count, retries):
+ self.to_screen(
+ '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...'
+ % (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
+
+ def report_skip_fragment(self, frag_index):
+ self.to_screen('[download] Skipping fragment %d...' % frag_index)
+
+ def _prepare_url(self, info_dict, url):
+ headers = info_dict.get('http_headers')
+ return sanitized_Request(url, None, headers) if headers else url
+
+ def _prepare_and_start_frag_download(self, ctx):
+ self._prepare_frag_download(ctx)
+ self._start_frag_download(ctx)
+
+ @staticmethod
+ def __do_ytdl_file(ctx):
+ return not ctx['live'] and not ctx['tmpfilename'] == '-'
+
+ def _read_ytdl_file(self, ctx):
+ assert 'ytdl_corrupt' not in ctx
+ stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
+ try:
+ ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
+ except Exception:
+ ctx['ytdl_corrupt'] = True
+ finally:
+ stream.close()
+
+ def _write_ytdl_file(self, ctx):
+ frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
+ downloader = {
+ 'current_fragment': {
+ 'index': ctx['fragment_index'],
+ },
+ }
+ if ctx.get('fragment_count') is not None:
+ downloader['fragment_count'] = ctx['fragment_count']
+ frag_index_stream.write(json.dumps({'downloader': downloader}))
+ frag_index_stream.close()
+
+ def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
+ fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
+ success = ctx['dl'].download(fragment_filename, {
+ 'url': frag_url,
+ 'http_headers': headers or info_dict.get('http_headers'),
+ })
+ if not success:
+ return False, None
+ down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
+ ctx['fragment_filename_sanitized'] = frag_sanitized
+ frag_content = down.read()
+ down.close()
+ return True, frag_content
+
+ def _append_fragment(self, ctx, frag_content):
+ try:
+ ctx['dest_stream'].write(frag_content)
+ ctx['dest_stream'].flush()
+ finally:
+ if self.__do_ytdl_file(ctx):
+ self._write_ytdl_file(ctx)
+ if not self.params.get('keep_fragments', False):
+ os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
+ del ctx['fragment_filename_sanitized']
+
+ def _prepare_frag_download(self, ctx):
+ if 'live' not in ctx:
+ ctx['live'] = False
+ if not ctx['live']:
+ total_frags_str = '%d' % ctx['total_frags']
+ ad_frags = ctx.get('ad_frags', 0)
+ if ad_frags:
+ total_frags_str += ' (not including %d ad)' % ad_frags
+ else:
+ total_frags_str = 'unknown (live)'
+ self.to_screen(
+ '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
+ self.report_destination(ctx['filename'])
+ dl = HttpQuietDownloader(
+ self.ydl,
+ {
+ 'continuedl': True,
+ 'quiet': True,
+ 'noprogress': True,
+ 'ratelimit': self.params.get('ratelimit'),
+ 'retries': self.params.get('retries', 0),
+ 'nopart': self.params.get('nopart', False),
+ 'test': self.params.get('test', False),
+ }
+ )
+ tmpfilename = self.temp_name(ctx['filename'])
+ open_mode = 'wb'
+ resume_len = 0
+
+ # Establish possible resume length
+ if os.path.isfile(encodeFilename(tmpfilename)):
+ open_mode = 'ab'
+ resume_len = os.path.getsize(encodeFilename(tmpfilename))
+
+ # Should be initialized before ytdl file check
+ ctx.update({
+ 'tmpfilename': tmpfilename,
+ 'fragment_index': 0,
+ })
+
+ if self.__do_ytdl_file(ctx):
+ if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
+ self._read_ytdl_file(ctx)
+ is_corrupt = ctx.get('ytdl_corrupt') is True
+ is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
+ if is_corrupt or is_inconsistent:
+ message = (
+ '.ytdl file is corrupt' if is_corrupt else
+ 'Inconsistent state of incomplete fragment download')
+ self.report_warning(
+ '%s. Restarting from the beginning...' % message)
+ ctx['fragment_index'] = resume_len = 0
+ if 'ytdl_corrupt' in ctx:
+ del ctx['ytdl_corrupt']
+ self._write_ytdl_file(ctx)
+ else:
+ self._write_ytdl_file(ctx)
+ assert ctx['fragment_index'] == 0
+
+ dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
+
+ ctx.update({
+ 'dl': dl,
+ 'dest_stream': dest_stream,
+ 'tmpfilename': tmpfilename,
+ # Total complete fragments downloaded so far in bytes
+ 'complete_frags_downloaded_bytes': resume_len,
+ })
+
+ def _start_frag_download(self, ctx):
+ total_frags = ctx['total_frags']
+ # This dict stores the download progress, it's updated by the progress
+ # hook
+ state = {
+ 'status': 'downloading',
+ 'downloaded_bytes': ctx['complete_frags_downloaded_bytes'],
+ 'fragment_index': ctx['fragment_index'],
+ 'fragment_count': total_frags,
+ 'filename': ctx['filename'],
+ 'tmpfilename': ctx['tmpfilename'],
+ }
+
+ start = time.time()
+ ctx.update({
+ 'started': start,
+ # Amount of fragment's bytes downloaded by the time of the previous
+ # frag progress hook invocation
+ 'prev_frag_downloaded_bytes': 0,
+ })
+
+ def frag_progress_hook(s):
+ if s['status'] not in ('downloading', 'finished'):
+ return
+
+ time_now = time.time()
+ state['elapsed'] = time_now - start
+ frag_total_bytes = s.get('total_bytes') or 0
+ if not ctx['live']:
+ estimated_size = (
+ (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
+ (state['fragment_index'] + 1) * total_frags)
+ state['total_bytes_estimate'] = estimated_size
+
+ if s['status'] == 'finished':
+ state['fragment_index'] += 1
+ ctx['fragment_index'] = state['fragment_index']
+ state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
+ ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
+ ctx['prev_frag_downloaded_bytes'] = 0
+ else:
+ frag_downloaded_bytes = s['downloaded_bytes']
+ state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
+ if not ctx['live']:
+ state['eta'] = self.calc_eta(
+ start, time_now, estimated_size,
+ state['downloaded_bytes'])
+ state['speed'] = s.get('speed') or ctx.get('speed')
+ ctx['speed'] = state['speed']
+ ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
+ self._hook_progress(state)
+
+ ctx['dl'].add_progress_hook(frag_progress_hook)
+
+ return start
+
+ def _finish_frag_download(self, ctx):
+ ctx['dest_stream'].close()
+ if self.__do_ytdl_file(ctx):
+ ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
+ if os.path.isfile(ytdl_filename):
+ os.remove(ytdl_filename)
+ elapsed = time.time() - ctx['started']
+
+ if ctx['tmpfilename'] == '-':
+ downloaded_bytes = ctx['complete_frags_downloaded_bytes']
+ else:
+ self.try_rename(ctx['tmpfilename'], ctx['filename'])
+ downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
+
+ self._hook_progress({
+ 'downloaded_bytes': downloaded_bytes,
+ 'total_bytes': downloaded_bytes,
+ 'filename': ctx['filename'],
+ 'status': 'finished',
+ 'elapsed': elapsed,
+ })
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
new file mode 100644
index 0000000..fd30452
--- /dev/null
+++ b/youtube_dl/downloader/hls.py
@@ -0,0 +1,204 @@
+from __future__ import unicode_literals
+
+import re
+import binascii
+try:
+ from Crypto.Cipher import AES
+ can_decrypt_frag = True
+except ImportError:
+ can_decrypt_frag = False
+
+from .fragment import FragmentFD
+from .external import FFmpegFD
+
+from ..compat import (
+ compat_urllib_error,
+ compat_urlparse,
+ compat_struct_pack,
+)
+from ..utils import (
+ parse_m3u8_attributes,
+ update_url_query,
+)
+
+
+class HlsFD(FragmentFD):
+ """ A limited implementation that does not require ffmpeg """
+
+ FD_NAME = 'hlsnative'
+
+ @staticmethod
+ def can_download(manifest, info_dict):
+ UNSUPPORTED_FEATURES = (
+ r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
+ # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
+
+ # Live streams heuristic does not always work (e.g. geo restricted to Germany
+ # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
+ # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
+
+ # This heuristic also is not correct since segments may not be appended as well.
+ # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
+ # no segments will definitely be appended to the end of the playlist.
+ # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
+ # # event media playlists [4]
+
+ # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
+ # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
+ # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
+ # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
+ )
+ check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
+ is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
+ check_results.append(can_decrypt_frag or not is_aes128_enc)
+ check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
+ check_results.append(not info_dict.get('is_live'))
+ return all(check_results)
+
+ def real_download(self, filename, info_dict):
+ man_url = info_dict['url']
+ self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
+
+ urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
+ man_url = urlh.geturl()
+ s = urlh.read().decode('utf-8', 'ignore')
+
+ if not self.can_download(s, info_dict):
+ if info_dict.get('extra_param_to_segment_url'):
+ self.report_error('pycrypto not found. Please install it.')
+ return False
+ self.report_warning(
+ 'hlsnative has detected features it does not support, '
+ 'extraction will be delegated to ffmpeg')
+ fd = FFmpegFD(self.ydl, self.params)
+ for ph in self._progress_hooks:
+ fd.add_progress_hook(ph)
+ return fd.real_download(filename, info_dict)
+
+ def is_ad_fragment(s):
+ return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
+ s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
+
+ media_frags = 0
+ ad_frags = 0
+ ad_frag_next = False
+ for line in s.splitlines():
+ line = line.strip()
+ if not line:
+ continue
+ if line.startswith('#'):
+ if is_ad_fragment(line):
+ ad_frags += 1
+ ad_frag_next = True
+ continue
+ if ad_frag_next:
+ ad_frag_next = False
+ continue
+ media_frags += 1
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': media_frags,
+ 'ad_frags': ad_frags,
+ }
+
+ self._prepare_and_start_frag_download(ctx)
+
+ fragment_retries = self.params.get('fragment_retries', 0)
+ skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+ test = self.params.get('test', False)
+
+ extra_query = None
+ extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
+ if extra_param_to_segment_url:
+ extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
+ i = 0
+ media_sequence = 0
+ decrypt_info = {'METHOD': 'NONE'}
+ byte_range = {}
+ frag_index = 0
+ ad_frag_next = False
+ for line in s.splitlines():
+ line = line.strip()
+ if line:
+ if not line.startswith('#'):
+ if ad_frag_next:
+ ad_frag_next = False
+ continue
+ frag_index += 1
+ if frag_index <= ctx['fragment_index']:
+ continue
+ frag_url = (
+ line
+ if re.match(r'^https?://', line)
+ else compat_urlparse.urljoin(man_url, line))
+ if extra_query:
+ frag_url = update_url_query(frag_url, extra_query)
+ count = 0
+ headers = info_dict.get('http_headers', {})
+ if byte_range:
+ headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
+ while count <= fragment_retries:
+ try:
+ success, frag_content = self._download_fragment(
+ ctx, frag_url, info_dict, headers)
+ if not success:
+ return False
+ break
+ except compat_urllib_error.HTTPError as err:
+ # Unavailable (possibly temporary) fragments may be served.
+ # First we try to retry then either skip or abort.
+ # See https://github.com/rg3/youtube-dl/issues/10165,
+ # https://github.com/rg3/youtube-dl/issues/10448).
+ count += 1
+ if count <= fragment_retries:
+ self.report_retry_fragment(err, frag_index, count, fragment_retries)
+ if count > fragment_retries:
+ if skip_unavailable_fragments:
+ i += 1
+ media_sequence += 1
+ self.report_skip_fragment(frag_index)
+ continue
+ self.report_error(
+ 'giving up after %s fragment retries' % fragment_retries)
+ return False
+ if decrypt_info['METHOD'] == 'AES-128':
+ iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
+ decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
+ self._prepare_url(info_dict, decrypt_info['URI'])).read()
+ frag_content = AES.new(
+ decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
+ self._append_fragment(ctx, frag_content)
+ # We only download the first fragment during the test
+ if test:
+ break
+ i += 1
+ media_sequence += 1
+ elif line.startswith('#EXT-X-KEY'):
+ decrypt_url = decrypt_info.get('URI')
+ decrypt_info = parse_m3u8_attributes(line[11:])
+ if decrypt_info['METHOD'] == 'AES-128':
+ if 'IV' in decrypt_info:
+ decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
+ if not re.match(r'^https?://', decrypt_info['URI']):
+ decrypt_info['URI'] = compat_urlparse.urljoin(
+ man_url, decrypt_info['URI'])
+ if extra_query:
+ decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
+ if decrypt_url != decrypt_info['URI']:
+ decrypt_info['KEY'] = None
+ elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
+ media_sequence = int(line[22:])
+ elif line.startswith('#EXT-X-BYTERANGE'):
+ splitted_byte_range = line[17:].split('@')
+ sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
+ byte_range = {
+ 'start': sub_range_start,
+ 'end': sub_range_start + int(splitted_byte_range[0]),
+ }
+ elif is_ad_fragment(line):
+ ad_frag_next = True
+
+ self._finish_frag_download(ctx)
+
+ return True
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
new file mode 100644
index 0000000..5b1e960
--- /dev/null
+++ b/youtube_dl/downloader/http.py
@@ -0,0 +1,354 @@
+from __future__ import unicode_literals
+
+import errno
+import os
+import socket
+import time
+import random
+import re
+
+from .common import FileDownloader
+from ..compat import (
+ compat_str,
+ compat_urllib_error,
+)
+from ..utils import (
+ ContentTooShortError,
+ encodeFilename,
+ int_or_none,
+ sanitize_open,
+ sanitized_Request,
+ write_xattr,
+ XAttrMetadataError,
+ XAttrUnavailableError,
+)
+
+
+class HttpFD(FileDownloader):
+ def real_download(self, filename, info_dict):
+ url = info_dict['url']
+
+ class DownloadContext(dict):
+ __getattr__ = dict.get
+ __setattr__ = dict.__setitem__
+ __delattr__ = dict.__delitem__
+
+ ctx = DownloadContext()
+ ctx.filename = filename
+ ctx.tmpfilename = self.temp_name(filename)
+ ctx.stream = None
+
+ # Do not include the Accept-Encoding header
+ headers = {'Youtubedl-no-compression': 'True'}
+ add_headers = info_dict.get('http_headers')
+ if add_headers:
+ headers.update(add_headers)
+
+ is_test = self.params.get('test', False)
+ chunk_size = self._TEST_FILE_SIZE if is_test else (
+ info_dict.get('downloader_options', {}).get('http_chunk_size') or
+ self.params.get('http_chunk_size') or 0)
+
+ ctx.open_mode = 'wb'
+ ctx.resume_len = 0
+ ctx.data_len = None
+ ctx.block_size = self.params.get('buffersize', 1024)
+ ctx.start_time = time.time()
+ ctx.chunk_size = None
+
+ if self.params.get('continuedl', True):
+ # Establish possible resume length
+ if os.path.isfile(encodeFilename(ctx.tmpfilename)):
+ ctx.resume_len = os.path.getsize(
+ encodeFilename(ctx.tmpfilename))
+
+ ctx.is_resume = ctx.resume_len > 0
+
+ count = 0
+ retries = self.params.get('retries', 0)
+
+ class SucceedDownload(Exception):
+ pass
+
+ class RetryDownload(Exception):
+ def __init__(self, source_error):
+ self.source_error = source_error
+
+ class NextFragment(Exception):
+ pass
+
+ def set_range(req, start, end):
+ range_header = 'bytes=%d-' % start
+ if end:
+ range_header += compat_str(end)
+ req.add_header('Range', range_header)
+
+ def establish_connection():
+ ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
+ if not is_test and chunk_size else chunk_size)
+ if ctx.resume_len > 0:
+ range_start = ctx.resume_len
+ if ctx.is_resume:
+ self.report_resuming_byte(ctx.resume_len)
+ ctx.open_mode = 'ab'
+ elif ctx.chunk_size > 0:
+ range_start = 0
+ else:
+ range_start = None
+ ctx.is_resume = False
+ range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
+ if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
+ range_end = ctx.data_len - 1
+ has_range = range_start is not None
+ ctx.has_range = has_range
+ request = sanitized_Request(url, None, headers)
+ if has_range:
+ set_range(request, range_start, range_end)
+ # Establish connection
+ try:
+ ctx.data = self.ydl.urlopen(request)
+ # When trying to resume, Content-Range HTTP header of response has to be checked
+ # to match the value of requested Range HTTP header. This is due to a webservers
+ # that don't support resuming and serve a whole file with no Content-Range
+ # set in response despite of requested Range (see
+ # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
+ if has_range:
+ content_range = ctx.data.headers.get('Content-Range')
+ if content_range:
+ content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
+ # Content-Range is present and matches requested Range, resume is possible
+ if content_range_m:
+ if range_start == int(content_range_m.group(1)):
+ content_range_end = int_or_none(content_range_m.group(2))
+ content_len = int_or_none(content_range_m.group(3))
+ accept_content_len = (
+ # Non-chunked download
+ not ctx.chunk_size or
+ # Chunked download and requested piece or
+ # its part is promised to be served
+ content_range_end == range_end or
+ content_len < range_end)
+ if accept_content_len:
+ ctx.data_len = content_len
+ return
+ # Content-Range is either not present or invalid. Assuming remote webserver is
+ # trying to send the whole file, resume is not possible, so wiping the local file
+ # and performing entire redownload
+ self.report_unable_to_resume()
+ ctx.resume_len = 0
+ ctx.open_mode = 'wb'
+ ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
+ return
+ except (compat_urllib_error.HTTPError, ) as err:
+ if err.code == 416:
+ # Unable to resume (requested range not satisfiable)
+ try:
+ # Open the connection again without the range header
+ ctx.data = self.ydl.urlopen(
+ sanitized_Request(url, None, headers))
+ content_length = ctx.data.info()['Content-Length']
+ except (compat_urllib_error.HTTPError, ) as err:
+ if err.code < 500 or err.code >= 600:
+ raise
+ else:
+ # Examine the reported length
+ if (content_length is not None and
+ (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
+ # The file had already been fully downloaded.
+ # Explanation to the above condition: in issue #175 it was revealed that
+ # YouTube sometimes adds or removes a few bytes from the end of the file,
+ # changing the file size slightly and causing problems for some users. So
+ # I decided to implement a suggested change and consider the file
+ # completely downloaded if the file size differs less than 100 bytes from
+ # the one in the hard drive.
+ self.report_file_already_downloaded(ctx.filename)
+ self.try_rename(ctx.tmpfilename, ctx.filename)
+ self._hook_progress({
+ 'filename': ctx.filename,
+ 'status': 'finished',
+ 'downloaded_bytes': ctx.resume_len,
+ 'total_bytes': ctx.resume_len,
+ })
+ raise SucceedDownload()
+ else:
+ # The length does not match, we start the download over
+ self.report_unable_to_resume()
+ ctx.resume_len = 0
+ ctx.open_mode = 'wb'
+ return
+ elif err.code < 500 or err.code >= 600:
+ # Unexpected HTTP error
+ raise
+ raise RetryDownload(err)
+ except socket.error as err:
+ if err.errno != errno.ECONNRESET:
+ # Connection reset is no problem, just retry
+ raise
+ raise RetryDownload(err)
+
+ def download():
+ data_len = ctx.data.info().get('Content-length', None)
+
+ # Range HTTP header may be ignored/unsupported by a webserver
+ # (e.g. extractor/scivee.py, extractor/bambuser.py).
+ # However, for a test we still would like to download just a piece of a file.
+ # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
+ # block size when downloading a file.
+ if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
+ data_len = self._TEST_FILE_SIZE
+
+ if data_len is not None:
+ data_len = int(data_len) + ctx.resume_len
+ min_data_len = self.params.get('min_filesize')
+ max_data_len = self.params.get('max_filesize')
+ if min_data_len is not None and data_len < min_data_len:
+ self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
+ return False
+ if max_data_len is not None and data_len > max_data_len:
+ self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
+ return False
+
+ byte_counter = 0 + ctx.resume_len
+ block_size = ctx.block_size
+ start = time.time()
+
+ # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
+ now = None # needed for slow_down() in the first loop run
+ before = start # start measuring
+
+ def retry(e):
+ to_stdout = ctx.tmpfilename == '-'
+ if not to_stdout:
+ ctx.stream.close()
+ ctx.stream = None
+ ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
+ raise RetryDownload(e)
+
+ while True:
+ try:
+ # Download and write
+ data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
+ # socket.timeout is a subclass of socket.error but may not have
+ # errno set
+ except socket.timeout as e:
+ retry(e)
+ except socket.error as e:
+ if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
+ raise
+ retry(e)
+
+ byte_counter += len(data_block)
+
+ # exit loop when download is finished
+ if len(data_block) == 0:
+ break
+
+ # Open destination file just in time
+ if ctx.stream is None:
+ try:
+ ctx.stream, ctx.tmpfilename = sanitize_open(
+ ctx.tmpfilename, ctx.open_mode)
+ assert ctx.stream is not None
+ ctx.filename = self.undo_temp_name(ctx.tmpfilename)
+ self.report_destination(ctx.filename)
+ except (OSError, IOError) as err:
+ self.report_error('unable to open for writing: %s' % str(err))
+ return False
+
+ if self.params.get('xattr_set_filesize', False) and data_len is not None:
+ try:
+ write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
+ except (XAttrUnavailableError, XAttrMetadataError) as err:
+ self.report_error('unable to set filesize xattr: %s' % str(err))
+
+ try:
+ ctx.stream.write(data_block)
+ except (IOError, OSError) as err:
+ self.to_stderr('\n')
+ self.report_error('unable to write data: %s' % str(err))
+ return False
+
+ # Apply rate limit
+ self.slow_down(start, now, byte_counter - ctx.resume_len)
+
+ # end measuring of one loop run
+ now = time.time()
+ after = now
+
+ # Adjust block size
+ if not self.params.get('noresizebuffer', False):
+ block_size = self.best_block_size(after - before, len(data_block))
+
+ before = after
+
+ # Progress message
+ speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
+ if ctx.data_len is None:
+ eta = None
+ else:
+ eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
+
+ self._hook_progress({
+ 'status': 'downloading',
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': ctx.data_len,
+ 'tmpfilename': ctx.tmpfilename,
+ 'filename': ctx.filename,
+ 'eta': eta,
+ 'speed': speed,
+ 'elapsed': now - ctx.start_time,
+ })
+
+ if is_test and byte_counter == data_len:
+ break
+
+ if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
+ ctx.resume_len = byte_counter
+ # ctx.block_size = block_size
+ raise NextFragment()
+
+ if ctx.stream is None:
+ self.to_stderr('\n')
+ self.report_error('Did not get any data blocks')
+ return False
+ if ctx.tmpfilename != '-':
+ ctx.stream.close()
+
+ if data_len is not None and byte_counter != data_len:
+ err = ContentTooShortError(byte_counter, int(data_len))
+ if count <= retries:
+ retry(err)
+ raise err
+
+ self.try_rename(ctx.tmpfilename, ctx.filename)
+
+ # Update file modification time
+ if self.params.get('updatetime', True):
+ info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
+
+ self._hook_progress({
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': byte_counter,
+ 'filename': ctx.filename,
+ 'status': 'finished',
+ 'elapsed': time.time() - ctx.start_time,
+ })
+
+ return True
+
+ while count <= retries:
+ try:
+ establish_connection()
+ return download()
+ except RetryDownload as e:
+ count += 1
+ if count <= retries:
+ self.report_retry(e.source_error, count, retries)
+ continue
+ except NextFragment:
+ continue
+ except SucceedDownload:
+ return True
+
+ self.report_error('giving up after %s retries' % retries)
+ return False
diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py
new file mode 100644
index 0000000..063fcf4
--- /dev/null
+++ b/youtube_dl/downloader/ism.py
@@ -0,0 +1,259 @@
+from __future__ import unicode_literals
+
+import time
+import binascii
+import io
+
+from .fragment import FragmentFD
+from ..compat import (
+ compat_Struct,
+ compat_urllib_error,
+)
+
+
+u8 = compat_Struct('>B')
+u88 = compat_Struct('>Bx')
+u16 = compat_Struct('>H')
+u1616 = compat_Struct('>Hxx')
+u32 = compat_Struct('>I')
+u64 = compat_Struct('>Q')
+
+s88 = compat_Struct('>bx')
+s16 = compat_Struct('>h')
+s1616 = compat_Struct('>hxx')
+s32 = compat_Struct('>i')
+
+unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
+
+TRACK_ENABLED = 0x1
+TRACK_IN_MOVIE = 0x2
+TRACK_IN_PREVIEW = 0x4
+
+SELF_CONTAINED = 0x1
+
+
+def box(box_type, payload):
+ return u32.pack(8 + len(payload)) + box_type + payload
+
+
+def full_box(box_type, version, flags, payload):
+ return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload)
+
+
+def write_piff_header(stream, params):
+ track_id = params['track_id']
+ fourcc = params['fourcc']
+ duration = params['duration']
+ timescale = params.get('timescale', 10000000)
+ language = params.get('language', 'und')
+ height = params.get('height', 0)
+ width = params.get('width', 0)
+ is_audio = width == 0 and height == 0
+ creation_time = modification_time = int(time.time())
+
+ ftyp_payload = b'isml' # major brand
+ ftyp_payload += u32.pack(1) # minor version
+ ftyp_payload += b'piff' + b'iso2' # compatible brands
+ stream.write(box(b'ftyp', ftyp_payload)) # File Type Box
+
+ mvhd_payload = u64.pack(creation_time)
+ mvhd_payload += u64.pack(modification_time)
+ mvhd_payload += u32.pack(timescale)
+ mvhd_payload += u64.pack(duration)
+ mvhd_payload += s1616.pack(1) # rate
+ mvhd_payload += s88.pack(1) # volume
+ mvhd_payload += u16.pack(0) # reserved
+ mvhd_payload += u32.pack(0) * 2 # reserved
+ mvhd_payload += unity_matrix
+ mvhd_payload += u32.pack(0) * 6 # pre defined
+ mvhd_payload += u32.pack(0xffffffff) # next track id
+ moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box
+
+ tkhd_payload = u64.pack(creation_time)
+ tkhd_payload += u64.pack(modification_time)
+ tkhd_payload += u32.pack(track_id) # track id
+ tkhd_payload += u32.pack(0) # reserved
+ tkhd_payload += u64.pack(duration)
+ tkhd_payload += u32.pack(0) * 2 # reserved
+ tkhd_payload += s16.pack(0) # layer
+ tkhd_payload += s16.pack(0) # alternate group
+ tkhd_payload += s88.pack(1 if is_audio else 0) # volume
+ tkhd_payload += u16.pack(0) # reserved
+ tkhd_payload += unity_matrix
+ tkhd_payload += u1616.pack(width)
+ tkhd_payload += u1616.pack(height)
+ trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box
+
+ mdhd_payload = u64.pack(creation_time)
+ mdhd_payload += u64.pack(modification_time)
+ mdhd_payload += u32.pack(timescale)
+ mdhd_payload += u64.pack(duration)
+ mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60))
+ mdhd_payload += u16.pack(0) # pre defined
+ mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
+
+ hdlr_payload = u32.pack(0) # pre defined
+ hdlr_payload += b'soun' if is_audio else b'vide' # handler type
+ hdlr_payload += u32.pack(0) * 3 # reserved
+ hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name
+ mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
+
+ if is_audio:
+ smhd_payload = s88.pack(0) # balance
+ smhd_payload += u16.pack(0) # reserved
+ media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
+ else:
+ vmhd_payload = u16.pack(0) # graphics mode
+ vmhd_payload += u16.pack(0) * 3 # opcolor
+ media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
+ minf_payload = media_header_box
+
+ dref_payload = u32.pack(1) # entry count
+ dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box
+ dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box
+ minf_payload += box(b'dinf', dinf_payload) # Data Information Box
+
+ stsd_payload = u32.pack(1) # entry count
+
+ sample_entry_payload = u8.pack(0) * 6 # reserved
+ sample_entry_payload += u16.pack(1) # data reference index
+ if is_audio:
+ sample_entry_payload += u32.pack(0) * 2 # reserved
+ sample_entry_payload += u16.pack(params.get('channels', 2))
+ sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
+ sample_entry_payload += u16.pack(0) # pre defined
+ sample_entry_payload += u16.pack(0) # reserved
+ sample_entry_payload += u1616.pack(params['sampling_rate'])
+
+ if fourcc == 'AACL':
+ sample_entry_box = box(b'mp4a', sample_entry_payload)
+ else:
+ sample_entry_payload += u16.pack(0) # pre defined
+ sample_entry_payload += u16.pack(0) # reserved
+ sample_entry_payload += u32.pack(0) * 3 # pre defined
+ sample_entry_payload += u16.pack(width)
+ sample_entry_payload += u16.pack(height)
+ sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi
+ sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi
+ sample_entry_payload += u32.pack(0) # reserved
+ sample_entry_payload += u16.pack(1) # frame count
+ sample_entry_payload += u8.pack(0) * 32 # compressor name
+ sample_entry_payload += u16.pack(0x18) # depth
+ sample_entry_payload += s16.pack(-1) # pre defined
+
+ codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8'))
+ if fourcc in ('H264', 'AVC1'):
+ sps, pps = codec_private_data.split(u32.pack(1))[1:]
+ avcc_payload = u8.pack(1) # configuration version
+ avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
+ avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one
+ avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
+ avcc_payload += u16.pack(len(sps))
+ avcc_payload += sps
+ avcc_payload += u8.pack(1) # number of pps
+ avcc_payload += u16.pack(len(pps))
+ avcc_payload += pps
+ sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
+ sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
+ stsd_payload += sample_entry_box
+
+ stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
+
+ stts_payload = u32.pack(0) # entry count
+ stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box
+
+ stsc_payload = u32.pack(0) # entry count
+ stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box
+
+ stco_payload = u32.pack(0) # entry count
+ stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box
+
+ minf_payload += box(b'stbl', stbl_payload) # Sample Table Box
+
+ mdia_payload += box(b'minf', minf_payload) # Media Information Box
+
+ trak_payload += box(b'mdia', mdia_payload) # Media Box
+
+ moov_payload += box(b'trak', trak_payload) # Track Box
+
+ mehd_payload = u64.pack(duration)
+ mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box
+
+ trex_payload = u32.pack(track_id) # track id
+ trex_payload += u32.pack(1) # default sample description index
+ trex_payload += u32.pack(0) # default sample duration
+ trex_payload += u32.pack(0) # default sample size
+ trex_payload += u32.pack(0) # default sample flags
+ mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box
+
+ moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box
+ stream.write(box(b'moov', moov_payload)) # Movie Box
+
+
+def extract_box_data(data, box_sequence):
+ data_reader = io.BytesIO(data)
+ while True:
+ box_size = u32.unpack(data_reader.read(4))[0]
+ box_type = data_reader.read(4)
+ if box_type == box_sequence[0]:
+ box_data = data_reader.read(box_size - 8)
+ if len(box_sequence) == 1:
+ return box_data
+ return extract_box_data(box_data, box_sequence[1:])
+ data_reader.seek(box_size - 8, 1)
+
+
+class IsmFD(FragmentFD):
+ """
+ Download segments in a ISM manifest
+ """
+
+ FD_NAME = 'ism'
+
+ def real_download(self, filename, info_dict):
+ segments = info_dict['fragments'][:1] if self.params.get(
+ 'test', False) else info_dict['fragments']
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': len(segments),
+ }
+
+ self._prepare_and_start_frag_download(ctx)
+
+ fragment_retries = self.params.get('fragment_retries', 0)
+ skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+
+ track_written = False
+ frag_index = 0
+ for i, segment in enumerate(segments):
+ frag_index += 1
+ if frag_index <= ctx['fragment_index']:
+ continue
+ count = 0
+ while count <= fragment_retries:
+ try:
+ success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
+ if not success:
+ return False
+ if not track_written:
+ tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
+ info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
+ write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
+ track_written = True
+ self._append_fragment(ctx, frag_content)
+ break
+ except compat_urllib_error.HTTPError as err:
+ count += 1
+ if count <= fragment_retries:
+ self.report_retry_fragment(err, frag_index, count, fragment_retries)
+ if count > fragment_retries:
+ if skip_unavailable_fragments:
+ self.report_skip_fragment(frag_index)
+ continue
+ self.report_error('giving up after %s fragment retries' % fragment_retries)
+ return False
+
+ self._finish_frag_download(ctx)
+
+ return True
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py
new file mode 100644
index 0000000..fbb7f51
--- /dev/null
+++ b/youtube_dl/downloader/rtmp.py
@@ -0,0 +1,214 @@
+from __future__ import unicode_literals
+
+import os
+import re
+import subprocess
+import time
+
+from .common import FileDownloader
+from ..compat import compat_str
+from ..utils import (
+ check_executable,
+ encodeFilename,
+ encodeArgument,
+ get_exe_version,
+)
+
+
+def rtmpdump_version():
+ return get_exe_version(
+ 'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)')
+
+
+class RtmpFD(FileDownloader):
+ def real_download(self, filename, info_dict):
+ def run_rtmpdump(args):
+ start = time.time()
+ resume_percent = None
+ resume_downloaded_data_len = None
+ proc = subprocess.Popen(args, stderr=subprocess.PIPE)
+ cursor_in_new_line = True
+ proc_stderr_closed = False
+ try:
+ while not proc_stderr_closed:
+ # read line from stderr
+ line = ''
+ while True:
+ char = proc.stderr.read(1)
+ if not char:
+ proc_stderr_closed = True
+ break
+ if char in [b'\r', b'\n']:
+ break
+ line += char.decode('ascii', 'replace')
+ if not line:
+ # proc_stderr_closed is True
+ continue
+ mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
+ if mobj:
+ downloaded_data_len = int(float(mobj.group(1)) * 1024)
+ percent = float(mobj.group(2))
+ if not resume_percent:
+ resume_percent = percent
+ resume_downloaded_data_len = downloaded_data_len
+ time_now = time.time()
+ eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
+ speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
+ data_len = None
+ if percent > 0:
+ data_len = int(downloaded_data_len * 100 / percent)
+ self._hook_progress({
+ 'status': 'downloading',
+ 'downloaded_bytes': downloaded_data_len,
+ 'total_bytes_estimate': data_len,
+ 'tmpfilename': tmpfilename,
+ 'filename': filename,
+ 'eta': eta,
+ 'elapsed': time_now - start,
+ 'speed': speed,
+ })
+ cursor_in_new_line = False
+ else:
+ # no percent for live streams
+ mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
+ if mobj:
+ downloaded_data_len = int(float(mobj.group(1)) * 1024)
+ time_now = time.time()
+ speed = self.calc_speed(start, time_now, downloaded_data_len)
+ self._hook_progress({
+ 'downloaded_bytes': downloaded_data_len,
+ 'tmpfilename': tmpfilename,
+ 'filename': filename,
+ 'status': 'downloading',
+ 'elapsed': time_now - start,
+ 'speed': speed,
+ })
+ cursor_in_new_line = False
+ elif self.params.get('verbose', False):
+ if not cursor_in_new_line:
+ self.to_screen('')
+ cursor_in_new_line = True
+ self.to_screen('[rtmpdump] ' + line)
+ finally:
+ proc.wait()
+ if not cursor_in_new_line:
+ self.to_screen('')
+ return proc.returncode
+
+ url = info_dict['url']
+ player_url = info_dict.get('player_url')
+ page_url = info_dict.get('page_url')
+ app = info_dict.get('app')
+ play_path = info_dict.get('play_path')
+ tc_url = info_dict.get('tc_url')
+ flash_version = info_dict.get('flash_version')
+ live = info_dict.get('rtmp_live', False)
+ conn = info_dict.get('rtmp_conn')
+ protocol = info_dict.get('rtmp_protocol')
+ real_time = info_dict.get('rtmp_real_time', False)
+ no_resume = info_dict.get('no_resume', False)
+ continue_dl = self.params.get('continuedl', True)
+
+ self.report_destination(filename)
+ tmpfilename = self.temp_name(filename)
+ test = self.params.get('test', False)
+
+ # Check for rtmpdump first
+ if not check_executable('rtmpdump', ['-h']):
+ self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
+ return False
+
+ # Download using rtmpdump. rtmpdump returns exit code 2 when
+ # the connection was interrupted and resuming appears to be
+ # possible. This is part of rtmpdump's normal usage, AFAIK.
+ basic_args = [
+ 'rtmpdump', '--verbose', '-r', url,
+ '-o', tmpfilename]
+ if player_url is not None:
+ basic_args += ['--swfVfy', player_url]
+ if page_url is not None:
+ basic_args += ['--pageUrl', page_url]
+ if app is not None:
+ basic_args += ['--app', app]
+ if play_path is not None:
+ basic_args += ['--playpath', play_path]
+ if tc_url is not None:
+ basic_args += ['--tcUrl', tc_url]
+ if test:
+ basic_args += ['--stop', '1']
+ if flash_version is not None:
+ basic_args += ['--flashVer', flash_version]
+ if live:
+ basic_args += ['--live']
+ if isinstance(conn, list):
+ for entry in conn:
+ basic_args += ['--conn', entry]
+ elif isinstance(conn, compat_str):
+ basic_args += ['--conn', conn]
+ if protocol is not None:
+ basic_args += ['--protocol', protocol]
+ if real_time:
+ basic_args += ['--realtime']
+
+ args = basic_args
+ if not no_resume and continue_dl and not live:
+ args += ['--resume']
+ if not live and continue_dl:
+ args += ['--skip', '1']
+
+ args = [encodeArgument(a) for a in args]
+
+ self._debug_cmd(args, exe='rtmpdump')
+
+ RD_SUCCESS = 0
+ RD_FAILED = 1
+ RD_INCOMPLETE = 2
+ RD_NO_CONNECT = 3
+
+ started = time.time()
+
+ try:
+ retval = run_rtmpdump(args)
+ except KeyboardInterrupt:
+ if not info_dict.get('is_live'):
+ raise
+ retval = RD_SUCCESS
+ self.to_screen('\n[rtmpdump] Interrupted by user')
+
+ if retval == RD_NO_CONNECT:
+ self.report_error('[rtmpdump] Could not connect to RTMP server.')
+ return False
+
+ while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
+ prevsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
+ time.sleep(5.0) # This seems to be needed
+ args = basic_args + ['--resume']
+ if retval == RD_FAILED:
+ args += ['--skip', '1']
+ args = [encodeArgument(a) for a in args]
+ retval = run_rtmpdump(args)
+ cursize = os.path.getsize(encodeFilename(tmpfilename))
+ if prevsize == cursize and retval == RD_FAILED:
+ break
+ # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
+ if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
+ self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
+ retval = RD_SUCCESS
+ break
+ if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
+ fsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
+ self.try_rename(tmpfilename, filename)
+ self._hook_progress({
+ 'downloaded_bytes': fsize,
+ 'total_bytes': fsize,
+ 'filename': filename,
+ 'status': 'finished',
+ 'elapsed': time.time() - started,
+ })
+ return True
+ else:
+ self.to_stderr('\n')
+ self.report_error('rtmpdump exited with code %d' % retval)
+ return False
diff --git a/youtube_dl/downloader/rtsp.py b/youtube_dl/downloader/rtsp.py
new file mode 100644
index 0000000..939358b
--- /dev/null
+++ b/youtube_dl/downloader/rtsp.py
@@ -0,0 +1,47 @@
+from __future__ import unicode_literals
+
+import os
+import subprocess
+
+from .common import FileDownloader
+from ..utils import (
+ check_executable,
+ encodeFilename,
+)
+
+
+class RtspFD(FileDownloader):
+ def real_download(self, filename, info_dict):
+ url = info_dict['url']
+ self.report_destination(filename)
+ tmpfilename = self.temp_name(filename)
+
+ if check_executable('mplayer', ['-h']):
+ args = [
+ 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
+ '-dumpstream', '-dumpfile', tmpfilename, url]
+ elif check_executable('mpv', ['-h']):
+ args = [
+ 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
+ else:
+ self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
+ return False
+
+ self._debug_cmd(args)
+
+ retval = subprocess.call(args)
+ if retval == 0:
+ fsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
+ self.try_rename(tmpfilename, filename)
+ self._hook_progress({
+ 'downloaded_bytes': fsize,
+ 'total_bytes': fsize,
+ 'filename': filename,
+ 'status': 'finished',
+ })
+ return True
+ else:
+ self.to_stderr('\n')
+ self.report_error('%s exited with code %d' % (args[0], retval))
+ return False
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
new file mode 100644
index 0000000..d5a4418
--- /dev/null
+++ b/youtube_dl/extractor/__init__.py
@@ -0,0 +1,46 @@
+from __future__ import unicode_literals
+
+try:
+ from .lazy_extractors import *
+ from .lazy_extractors import _ALL_CLASSES
+ _LAZY_LOADER = True
+except ImportError:
+ _LAZY_LOADER = False
+ from .extractors import *
+
+ _ALL_CLASSES = [
+ klass
+ for name, klass in globals().items()
+ if name.endswith('IE') and name != 'GenericIE'
+ ]
+ #_ALL_CLASSES.append(GenericIE)
+
+
+def gen_extractor_classes():
+ """ Return a list of supported extractors.
+ The order does matter; the first extractor matched is the one handling the URL.
+ """
+ return _ALL_CLASSES
+
+
+def gen_extractors():
+ """ Return a list of an instance of every supported extractor.
+ The order does matter; the first extractor matched is the one handling the URL.
+ """
+ return [klass() for klass in gen_extractor_classes()]
+
+
+def list_extractors(age_limit):
+ """
+ Return a list of extractors that are suitable for the given age,
+ sorted by extractor ID.
+ """
+
+ return sorted(
+ filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
+ key=lambda ie: ie.IE_NAME.lower())
+
+
+def get_info_extractor(ie_name):
+ """Returns the info extractor class with the given ie_name"""
+ return globals()[ie_name + 'IE']
diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py
new file mode 100644
index 0000000..b83b51e
--- /dev/null
+++ b/youtube_dl/extractor/adobepass.py
@@ -0,0 +1,1567 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import time
+import xml.etree.ElementTree as etree
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_kwargs,
+ compat_urlparse,
+)
+from ..utils import (
+ unescapeHTML,
+ urlencode_postdata,
+ unified_timestamp,
+ ExtractorError,
+ NO_DEFAULT,
+)
+
+
+MSO_INFO = {
+ 'DTV': {
+ 'name': 'DIRECTV',
+ 'username_field': 'username',
+ 'password_field': 'password',
+ },
+ 'ATTOTT': {
+ 'name': 'DIRECTV NOW',
+ 'username_field': 'email',
+ 'password_field': 'loginpassword',
+ },
+ 'Rogers': {
+ 'name': 'Rogers',
+ 'username_field': 'UserName',
+ 'password_field': 'UserPassword',
+ },
+ 'Comcast_SSO': {
+ 'name': 'Comcast XFINITY',
+ 'username_field': 'user',
+ 'password_field': 'passwd',
+ },
+ 'TWC': {
+ 'name': 'Time Warner Cable | Spectrum',
+ 'username_field': 'Ecom_User_ID',
+ 'password_field': 'Ecom_Password',
+ },
+ 'Brighthouse': {
+ 'name': 'Bright House Networks | Spectrum',
+ 'username_field': 'j_username',
+ 'password_field': 'j_password',
+ },
+ 'Charter_Direct': {
+ 'name': 'Charter Spectrum',
+ 'username_field': 'IDToken1',
+ 'password_field': 'IDToken2',
+ },
+ 'Verizon': {
+ 'name': 'Verizon FiOS',
+ 'username_field': 'IDToken1',
+ 'password_field': 'IDToken2',
+ },
+ 'thr030': {
+ 'name': '3 Rivers Communications'
+ },
+ 'com140': {
+ 'name': 'Access Montana'
+ },
+ 'acecommunications': {
+ 'name': 'AcenTek'
+ },
+ 'acm010': {
+ 'name': 'Acme Communications'
+ },
+ 'ada020': {
+ 'name': 'Adams Cable Service'
+ },
+ 'alb020': {
+ 'name': 'Albany Mutual Telephone'
+ },
+ 'algona': {
+ 'name': 'Algona Municipal Utilities'
+ },
+ 'allwest': {
+ 'name': 'All West Communications'
+ },
+ 'all025': {
+ 'name': 'Allen\'s Communications'
+ },
+ 'spl010': {
+ 'name': 'Alliance Communications'
+ },
+ 'all070': {
+ 'name': 'ALLO Communications'
+ },
+ 'alpine': {
+ 'name': 'Alpine Communications'
+ },
+ 'hun015': {
+ 'name': 'American Broadband'
+ },
+ 'nwc010': {
+ 'name': 'American Broadband Missouri'
+ },
+ 'com130-02': {
+ 'name': 'American Community Networks'
+ },
+ 'com130-01': {
+ 'name': 'American Warrior Networks'
+ },
+ 'tom020': {
+ 'name': 'Amherst Telephone/Tomorrow Valley'
+ },
+ 'tvc020': {
+ 'name': 'Andycable'
+ },
+ 'arkwest': {
+ 'name': 'Arkwest Communications'
+ },
+ 'art030': {
+ 'name': 'Arthur Mutual Telephone Company'
+ },
+ 'arvig': {
+ 'name': 'Arvig'
+ },
+ 'nttcash010': {
+ 'name': 'Ashland Home Net'
+ },
+ 'astound': {
+ 'name': 'Astound (now Wave)'
+ },
+ 'dix030': {
+ 'name': 'ATC Broadband'
+ },
+ 'ara010': {
+ 'name': 'ATC Communications'
+ },
+ 'she030-02': {
+ 'name': 'Ayersville Communications'
+ },
+ 'baldwin': {
+ 'name': 'Baldwin Lightstream'
+ },
+ 'bal040': {
+ 'name': 'Ballard TV'
+ },
+ 'cit025': {
+ 'name': 'Bardstown Cable TV'
+ },
+ 'bay030': {
+ 'name': 'Bay Country Communications'
+ },
+ 'tel095': {
+ 'name': 'Beaver Creek Cooperative Telephone'
+ },
+ 'bea020': {
+ 'name': 'Beaver Valley Cable'
+ },
+ 'bee010': {
+ 'name': 'Bee Line Cable'
+ },
+ 'wir030': {
+ 'name': 'Beehive Broadband'
+ },
+ 'bra020': {
+ 'name': 'BELD'
+ },
+ 'bel020': {
+ 'name': 'Bellevue Municipal Cable'
+ },
+ 'vol040-01': {
+ 'name': 'Ben Lomand Connect / BLTV'
+ },
+ 'bev010': {
+ 'name': 'BEVCOMM'
+ },
+ 'big020': {
+ 'name': 'Big Sandy Broadband'
+ },
+ 'ble020': {
+ 'name': 'Bledsoe Telephone Cooperative'
+ },
+ 'bvt010': {
+ 'name': 'Blue Valley Tele-Communications'
+ },
+ 'bra050': {
+ 'name': 'Brandenburg Telephone Co.'
+ },
+ 'bte010': {
+ 'name': 'Bristol Tennessee Essential Services'
+ },
+ 'annearundel': {
+ 'name': 'Broadstripe'
+ },
+ 'btc010': {
+ 'name': 'BTC Communications'
+ },
+ 'btc040': {
+ 'name': 'BTC Vision - Nahunta'
+ },
+ 'bul010': {
+ 'name': 'Bulloch Telephone Cooperative'
+ },
+ 'but010': {
+ 'name': 'Butler-Bremer Communications'
+ },
+ 'tel160-csp': {
+ 'name': 'C Spire SNAP'
+ },
+ 'csicable': {
+ 'name': 'Cable Services Inc.'
+ },
+ 'cableamerica': {
+ 'name': 'CableAmerica'
+ },
+ 'cab038': {
+ 'name': 'CableSouth Media 3'
+ },
+ 'weh010-camtel': {
+ 'name': 'Cam-Tel Company'
+ },
+ 'car030': {
+ 'name': 'Cameron Communications'
+ },
+ 'canbytel': {
+ 'name': 'Canby Telcom'
+ },
+ 'crt020': {
+ 'name': 'CapRock Tv'
+ },
+ 'car050': {
+ 'name': 'Carnegie Cable'
+ },
+ 'cas': {
+ 'name': 'CAS Cable'
+ },
+ 'casscomm': {
+ 'name': 'CASSCOMM'
+ },
+ 'mid180-02': {
+ 'name': 'Catalina Broadband Solutions'
+ },
+ 'cccomm': {
+ 'name': 'CC Communications'
+ },
+ 'nttccde010': {
+ 'name': 'CDE Lightband'
+ },
+ 'cfunet': {
+ 'name': 'Cedar Falls Utilities'
+ },
+ 'dem010-01': {
+ 'name': 'Celect-Bloomer Telephone Area'
+ },
+ 'dem010-02': {
+ 'name': 'Celect-Bruce Telephone Area'
+ },
+ 'dem010-03': {
+ 'name': 'Celect-Citizens Connected Area'
+ },
+ 'dem010-04': {
+ 'name': 'Celect-Elmwood/Spring Valley Area'
+ },
+ 'dem010-06': {
+ 'name': 'Celect-Mosaic Telecom'
+ },
+ 'dem010-05': {
+ 'name': 'Celect-West WI Telephone Area'
+ },
+ 'net010-02': {
+ 'name': 'Cellcom/Nsight Telservices'
+ },
+ 'cen100': {
+ 'name': 'CentraCom'
+ },
+ 'nttccst010': {
+ 'name': 'Central Scott / CSTV'
+ },
+ 'cha035': {
+ 'name': 'Chaparral CableVision'
+ },
+ 'cha050': {
+ 'name': 'Chariton Valley Communication Corporation, Inc.'
+ },
+ 'cha060': {
+ 'name': 'Chatmoss Cablevision'
+ },
+ 'nttcche010': {
+ 'name': 'Cherokee Communications'
+ },
+ 'che050': {
+ 'name': 'Chesapeake Bay Communications'
+ },
+ 'cimtel': {
+ 'name': 'Cim-Tel Cable, LLC.'
+ },
+ 'cit180': {
+ 'name': 'Citizens Cablevision - Floyd, VA'
+ },
+ 'cit210': {
+ 'name': 'Citizens Cablevision, Inc.'
+ },
+ 'cit040': {
+ 'name': 'Citizens Fiber'
+ },
+ 'cit250': {
+ 'name': 'Citizens Mutual'
+ },
+ 'war040': {
+ 'name': 'Citizens Telephone Corporation'
+ },
+ 'wat025': {
+ 'name': 'City Of Monroe'
+ },
+ 'wadsworth': {
+ 'name': 'CityLink'
+ },
+ 'nor100': {
+ 'name': 'CL Tel'
+ },
+ 'cla010': {
+ 'name': 'Clarence Telephone and Cedar Communications'
+ },
+ 'ser060': {
+ 'name': 'Clear Choice Communications'
+ },
+ 'tac020': {
+ 'name': 'Click! Cable TV'
+ },
+ 'war020': {
+ 'name': 'CLICK1.NET'
+ },
+ 'cml010': {
+ 'name': 'CML Telephone Cooperative Association'
+ },
+ 'cns': {
+ 'name': 'CNS'
+ },
+ 'com160': {
+ 'name': 'Co-Mo Connect'
+ },
+ 'coa020': {
+ 'name': 'Coast Communications'
+ },
+ 'coa030': {
+ 'name': 'Coaxial Cable TV'
+ },
+ 'mid055': {
+ 'name': 'Cobalt TV (Mid-State Community TV)'
+ },
+ 'col070': {
+ 'name': 'Columbia Power & Water Systems'
+ },
+ 'col080': {
+ 'name': 'Columbus Telephone'
+ },
+ 'nor105': {
+ 'name': 'Communications 1 Cablevision, Inc.'
+ },
+ 'com150': {
+ 'name': 'Community Cable & Broadband'
+ },
+ 'com020': {
+ 'name': 'Community Communications Company'
+ },
+ 'coy010': {
+ 'name': 'commZoom'
+ },
+ 'com025': {
+ 'name': 'Complete Communication Services'
+ },
+ 'cat020': {
+ 'name': 'Comporium'
+ },
+ 'com071': {
+ 'name': 'ComSouth Telesys'
+ },
+ 'consolidatedcable': {
+ 'name': 'Consolidated'
+ },
+ 'conwaycorp': {
+ 'name': 'Conway Corporation'
+ },
+ 'coo050': {
+ 'name': 'Coon Valley Telecommunications Inc'
+ },
+ 'coo080': {
+ 'name': 'Cooperative Telephone Company'
+ },
+ 'cpt010': {
+ 'name': 'CP-TEL'
+ },
+ 'cra010': {
+ 'name': 'Craw-Kan Telephone'
+ },
+ 'crestview': {
+ 'name': 'Crestview Cable Communications'
+ },
+ 'cross': {
+ 'name': 'Cross TV'
+ },
+ 'cro030': {
+ 'name': 'Crosslake Communications'
+ },
+ 'ctc040': {
+ 'name': 'CTC - Brainerd MN'
+ },
+ 'phe030': {
+ 'name': 'CTV-Beam - East Alabama'
+ },
+ 'cun010': {
+ 'name': 'Cunningham Telephone & Cable'
+ },
+ 'dpc010': {
+ 'name': 'D & P Communications'
+ },
+ 'dak030': {
+ 'name': 'Dakota Central Telecommunications'
+ },
+ 'nttcdel010': {
+ 'name': 'Delcambre Telephone LLC'
+ },
+ 'tel160-del': {
+ 'name': 'Delta Telephone Company'
+ },
+ 'sal040': {
+ 'name': 'DiamondNet'
+ },
+ 'ind060-dc': {
+ 'name': 'Direct Communications'
+ },
+ 'doy010': {
+ 'name': 'Doylestown Cable TV'
+ },
+ 'dic010': {
+ 'name': 'DRN'
+ },
+ 'dtc020': {
+ 'name': 'DTC'
+ },
+ 'dtc010': {
+ 'name': 'DTC Cable (Delhi)'
+ },
+ 'dum010': {
+ 'name': 'Dumont Telephone Company'
+ },
+ 'dun010': {
+ 'name': 'Dunkerton Telephone Cooperative'
+ },
+ 'cci010': {
+ 'name': 'Duo County Telecom'
+ },
+ 'eagle': {
+ 'name': 'Eagle Communications'
+ },
+ 'weh010-east': {
+ 'name': 'East Arkansas Cable TV'
+ },
+ 'eatel': {
+ 'name': 'EATEL Video, LLC'
+ },
+ 'ell010': {
+ 'name': 'ECTA'
+ },
+ 'emerytelcom': {
+ 'name': 'Emery Telcom Video LLC'
+ },
+ 'nor200': {
+ 'name': 'Empire Access'
+ },
+ 'endeavor': {
+ 'name': 'Endeavor Communications'
+ },
+ 'sun045': {
+ 'name': 'Enhanced Telecommunications Corporation'
+ },
+ 'mid030': {
+ 'name': 'enTouch'
+ },
+ 'epb020': {
+ 'name': 'EPB Smartnet'
+ },
+ 'jea010': {
+ 'name': 'EPlus Broadband'
+ },
+ 'com065': {
+ 'name': 'ETC'
+ },
+ 'ete010': {
+ 'name': 'Etex Communications'
+ },
+ 'fbc-tele': {
+ 'name': 'F&B Communications'
+ },
+ 'fal010': {
+ 'name': 'Falcon Broadband'
+ },
+ 'fam010': {
+ 'name': 'FamilyView CableVision'
+ },
+ 'far020': {
+ 'name': 'Farmers Mutual Telephone Company'
+ },
+ 'fay010': {
+ 'name': 'Fayetteville Public Utilities'
+ },
+ 'sal060': {
+ 'name': 'fibrant'
+ },
+ 'fid010': {
+ 'name': 'Fidelity Communications'
+ },
+ 'for030': {
+ 'name': 'FJ Communications'
+ },
+ 'fli020': {
+ 'name': 'Flint River Communications'
+ },
+ 'far030': {
+ 'name': 'FMT - Jesup'
+ },
+ 'foo010': {
+ 'name': 'Foothills Communications'
+ },
+ 'for080': {
+ 'name': 'Forsyth CableNet'
+ },
+ 'fbcomm': {
+ 'name': 'Frankfort Plant Board'
+ },
+ 'tel160-fra': {
+ 'name': 'Franklin Telephone Company'
+ },
+ 'nttcftc010': {
+ 'name': 'FTC'
+ },
+ 'fullchannel': {
+ 'name': 'Full Channel, Inc.'
+ },
+ 'gar040': {
+ 'name': 'Gardonville Cooperative Telephone Association'
+ },
+ 'gbt010': {
+ 'name': 'GBT Communications, Inc.'
+ },
+ 'tec010': {
+ 'name': 'Genuine Telecom'
+ },
+ 'clr010': {
+ 'name': 'Giant Communications'
+ },
+ 'gla010': {
+ 'name': 'Glasgow EPB'
+ },
+ 'gle010': {
+ 'name': 'Glenwood Telecommunications'
+ },
+ 'gra060': {
+ 'name': 'GLW Broadband Inc.'
+ },
+ 'goldenwest': {
+ 'name': 'Golden West Cablevision'
+ },
+ 'vis030': {
+ 'name': 'Grantsburg Telcom'
+ },
+ 'gpcom': {
+ 'name': 'Great Plains Communications'
+ },
+ 'gri010': {
+ 'name': 'Gridley Cable Inc'
+ },
+ 'hbc010': {
+ 'name': 'H&B Cable Services'
+ },
+ 'hae010': {
+ 'name': 'Haefele TV Inc.'
+ },
+ 'htc010': {
+ 'name': 'Halstad Telephone Company'
+ },
+ 'har005': {
+ 'name': 'Harlan Municipal Utilities'
+ },
+ 'har020': {
+ 'name': 'Hart Communications'
+ },
+ 'ced010': {
+ 'name': 'Hartelco TV'
+ },
+ 'hea040': {
+ 'name': 'Heart of Iowa Communications Cooperative'
+ },
+ 'htc020': {
+ 'name': 'Hickory Telephone Company'
+ },
+ 'nttchig010': {
+ 'name': 'Highland Communication Services'
+ },
+ 'hig030': {
+ 'name': 'Highland Media'
+ },
+ 'spc010': {
+ 'name': 'Hilliary Communications'
+ },
+ 'hin020': {
+ 'name': 'Hinton CATV Co.'
+ },
+ 'hometel': {
+ 'name': 'HomeTel Entertainment, Inc.'
+ },
+ 'hoodcanal': {
+ 'name': 'Hood Canal Communications'
+ },
+ 'weh010-hope': {
+ 'name': 'Hope - Prescott Cable TV'
+ },
+ 'horizoncable': {
+ 'name': 'Horizon Cable TV, Inc.'
+ },
+ 'hor040': {
+ 'name': 'Horizon Chillicothe Telephone'
+ },
+ 'htc030': {
+ 'name': 'HTC Communications Co. - IL'
+ },
+ 'htccomm': {
+ 'name': 'HTC Communications, Inc. - IA'
+ },
+ 'wal005': {
+ 'name': 'Huxley Communications'
+ },
+ 'imon': {
+ 'name': 'ImOn Communications'
+ },
+ 'ind040': {
+ 'name': 'Independence Telecommunications'
+ },
+ 'rrc010': {
+ 'name': 'Inland Networks'
+ },
+ 'stc020': {
+ 'name': 'Innovative Cable TV St Croix'
+ },
+ 'car100': {
+ 'name': 'Innovative Cable TV St Thomas-St John'
+ },
+ 'icc010': {
+ 'name': 'Inside Connect Cable'
+ },
+ 'int100': {
+ 'name': 'Integra Telecom'
+ },
+ 'int050': {
+ 'name': 'Interstate Telecommunications Coop'
+ },
+ 'irv010': {
+ 'name': 'Irvine Cable'
+ },
+ 'k2c010': {
+ 'name': 'K2 Communications'
+ },
+ 'kal010': {
+ 'name': 'Kalida Telephone Company, Inc.'
+ },
+ 'kal030': {
+ 'name': 'Kalona Cooperative Telephone Company'
+ },
+ 'kmt010': {
+ 'name': 'KMTelecom'
+ },
+ 'kpu010': {
+ 'name': 'KPU Telecommunications'
+ },
+ 'kuh010': {
+ 'name': 'Kuhn Communications, Inc.'
+ },
+ 'lak130': {
+ 'name': 'Lakeland Communications'
+ },
+ 'lan010': {
+ 'name': 'Langco'
+ },
+ 'lau020': {
+ 'name': 'Laurel Highland Total Communications, Inc.'
+ },
+ 'leh010': {
+ 'name': 'Lehigh Valley Cooperative Telephone'
+ },
+ 'bra010': {
+ 'name': 'Limestone Cable/Bracken Cable'
+ },
+ 'loc020': {
+ 'name': 'LISCO'
+ },
+ 'lit020': {
+ 'name': 'Litestream'
+ },
+ 'tel140': {
+ 'name': 'LivCom'
+ },
+ 'loc010': {
+ 'name': 'LocalTel Communications'
+ },
+ 'weh010-longview': {
+ 'name': 'Longview - Kilgore Cable TV'
+ },
+ 'lon030': {
+ 'name': 'Lonsdale Video Ventures, LLC'
+ },
+ 'lns010': {
+ 'name': 'Lost Nation-Elwood Telephone Co.'
+ },
+ 'nttclpc010': {
+ 'name': 'LPC Connect'
+ },
+ 'lumos': {
+ 'name': 'Lumos Networks'
+ },
+ 'madison': {
+ 'name': 'Madison Communications'
+ },
+ 'mad030': {
+ 'name': 'Madison County Cable Inc.'
+ },
+ 'nttcmah010': {
+ 'name': 'Mahaska Communication Group'
+ },
+ 'mar010': {
+ 'name': 'Marne & Elk Horn Telephone Company'
+ },
+ 'mcc040': {
+ 'name': 'McClure Telephone Co.'
+ },
+ 'mctv': {
+ 'name': 'MCTV'
+ },
+ 'merrimac': {
+ 'name': 'Merrimac Communications Ltd.'
+ },
+ 'metronet': {
+ 'name': 'Metronet'
+ },
+ 'mhtc': {
+ 'name': 'MHTC'
+ },
+ 'midhudson': {
+ 'name': 'Mid-Hudson Cable'
+ },
+ 'midrivers': {
+ 'name': 'Mid-Rivers Communications'
+ },
+ 'mid045': {
+ 'name': 'Midstate Communications'
+ },
+ 'mil080': {
+ 'name': 'Milford Communications'
+ },
+ 'min030': {
+ 'name': 'MINET'
+ },
+ 'nttcmin010': {
+ 'name': 'Minford TV'
+ },
+ 'san040-02': {
+ 'name': 'Mitchell Telecom'
+ },
+ 'mlg010': {
+ 'name': 'MLGC'
+ },
+ 'mon060': {
+ 'name': 'Mon-Cre TVE'
+ },
+ 'mou110': {
+ 'name': 'Mountain Telephone'
+ },
+ 'mou050': {
+ 'name': 'Mountain Village Cable'
+ },
+ 'mtacomm': {
+ 'name': 'MTA Communications, LLC'
+ },
+ 'mtc010': {
+ 'name': 'MTC Cable'
+ },
+ 'med040': {
+ 'name': 'MTC Technologies'
+ },
+ 'man060': {
+ 'name': 'MTCC'
+ },
+ 'mtc030': {
+ 'name': 'MTCO Communications'
+ },
+ 'mul050': {
+ 'name': 'Mulberry Telecommunications'
+ },
+ 'mur010': {
+ 'name': 'Murray Electric System'
+ },
+ 'musfiber': {
+ 'name': 'MUS FiberNET'
+ },
+ 'mpw': {
+ 'name': 'Muscatine Power & Water'
+ },
+ 'nttcsli010': {
+ 'name': 'myEVTV.com'
+ },
+ 'nor115': {
+ 'name': 'NCC'
+ },
+ 'nor260': {
+ 'name': 'NDTC'
+ },
+ 'nctc': {
+ 'name': 'Nebraska Central Telecom, Inc.'
+ },
+ 'nel020': {
+ 'name': 'Nelsonville TV Cable'
+ },
+ 'nem010': {
+ 'name': 'Nemont'
+ },
+ 'new075': {
+ 'name': 'New Hope Telephone Cooperative'
+ },
+ 'nor240': {
+ 'name': 'NICP'
+ },
+ 'cic010': {
+ 'name': 'NineStar Connect'
+ },
+ 'nktelco': {
+ 'name': 'NKTelco'
+ },
+ 'nortex': {
+ 'name': 'Nortex Communications'
+ },
+ 'nor140': {
+ 'name': 'North Central Telephone Cooperative'
+ },
+ 'nor030': {
+ 'name': 'Northland Communications'
+ },
+ 'nor075': {
+ 'name': 'Northwest Communications'
+ },
+ 'nor125': {
+ 'name': 'Norwood Light Broadband'
+ },
+ 'net010': {
+ 'name': 'Nsight Telservices'
+ },
+ 'dur010': {
+ 'name': 'Ntec'
+ },
+ 'nts010': {
+ 'name': 'NTS Communications'
+ },
+ 'new045': {
+ 'name': 'NU-Telecom'
+ },
+ 'nulink': {
+ 'name': 'NuLink'
+ },
+ 'jam030': {
+ 'name': 'NVC'
+ },
+ 'far035': {
+ 'name': 'OmniTel Communications'
+ },
+ 'onesource': {
+ 'name': 'OneSource Communications'
+ },
+ 'cit230': {
+ 'name': 'Opelika Power Services'
+ },
+ 'daltonutilities': {
+ 'name': 'OptiLink'
+ },
+ 'mid140': {
+ 'name': 'OPTURA'
+ },
+ 'ote010': {
+ 'name': 'OTEC Communication Company'
+ },
+ 'cci020': {
+ 'name': 'Packerland Broadband'
+ },
+ 'pan010': {
+ 'name': 'Panora Telco/Guthrie Center Communications'
+ },
+ 'otter': {
+ 'name': 'Park Region Telephone & Otter Tail Telcom'
+ },
+ 'mid050': {
+ 'name': 'Partner Communications Cooperative'
+ },
+ 'fib010': {
+ 'name': 'Pathway'
+ },
+ 'paulbunyan': {
+ 'name': 'Paul Bunyan Communications'
+ },
+ 'pem020': {
+ 'name': 'Pembroke Telephone Company'
+ },
+ 'mck010': {
+ 'name': 'Peoples Rural Telephone Cooperative'
+ },
+ 'pul010': {
+ 'name': 'PES Energize'
+ },
+ 'phi010': {
+ 'name': 'Philippi Communications System'
+ },
+ 'phonoscope': {
+ 'name': 'Phonoscope Cable'
+ },
+ 'pin070': {
+ 'name': 'Pine Belt Communications, Inc.'
+ },
+ 'weh010-pine': {
+ 'name': 'Pine Bluff Cable TV'
+ },
+ 'pin060': {
+ 'name': 'Pineland Telephone Cooperative'
+ },
+ 'cam010': {
+ 'name': 'Pinpoint Communications'
+ },
+ 'pio060': {
+ 'name': 'Pioneer Broadband'
+ },
+ 'pioncomm': {
+ 'name': 'Pioneer Communications'
+ },
+ 'pioneer': {
+ 'name': 'Pioneer DTV'
+ },
+ 'pla020': {
+ 'name': 'Plant TiftNet, Inc.'
+ },
+ 'par010': {
+ 'name': 'PLWC'
+ },
+ 'pro035': {
+ 'name': 'PMT'
+ },
+ 'vik011': {
+ 'name': 'Polar Cablevision'
+ },
+ 'pottawatomie': {
+ 'name': 'Pottawatomie Telephone Co.'
+ },
+ 'premiercomm': {
+ 'name': 'Premier Communications'
+ },
+ 'psc010': {
+ 'name': 'PSC'
+ },
+ 'pan020': {
+ 'name': 'PTCI'
+ },
+ 'qco010': {
+ 'name': 'QCOL'
+ },
+ 'qua010': {
+ 'name': 'Quality Cablevision'
+ },
+ 'rad010': {
+ 'name': 'Radcliffe Telephone Company'
+ },
+ 'car040': {
+ 'name': 'Rainbow Communications'
+ },
+ 'rai030': {
+ 'name': 'Rainier Connect'
+ },
+ 'ral010': {
+ 'name': 'Ralls Technologies'
+ },
+ 'rct010': {
+ 'name': 'RC Technologies'
+ },
+ 'red040': {
+ 'name': 'Red River Communications'
+ },
+ 'ree010': {
+ 'name': 'Reedsburg Utility Commission'
+ },
+ 'mol010': {
+ 'name': 'Reliance Connects- Oregon'
+ },
+ 'res020': {
+ 'name': 'Reserve Telecommunications'
+ },
+ 'weh010-resort': {
+ 'name': 'Resort TV Cable'
+ },
+ 'rld010': {
+ 'name': 'Richland Grant Telephone Cooperative, Inc.'
+ },
+ 'riv030': {
+ 'name': 'River Valley Telecommunications Coop'
+ },
+ 'rockportcable': {
+ 'name': 'Rock Port Cablevision'
+ },
+ 'rsf010': {
+ 'name': 'RS Fiber'
+ },
+ 'rtc': {
+ 'name': 'RTC Communication Corp'
+ },
+ 'res040': {
+ 'name': 'RTC-Reservation Telephone Coop.'
+ },
+ 'rte010': {
+ 'name': 'RTEC Communications'
+ },
+ 'stc010': {
+ 'name': 'S&T'
+ },
+ 'san020': {
+ 'name': 'San Bruno Cable TV'
+ },
+ 'san040-01': {
+ 'name': 'Santel'
+ },
+ 'sav010': {
+ 'name': 'SCI Broadband-Savage Communications Inc.'
+ },
+ 'sco050': {
+ 'name': 'Scottsboro Electric Power Board'
+ },
+ 'scr010': {
+ 'name': 'Scranton Telephone Company'
+ },
+ 'selco': {
+ 'name': 'SELCO'
+ },
+ 'she010': {
+ 'name': 'Shentel'
+ },
+ 'she030': {
+ 'name': 'Sherwood Mutual Telephone Association, Inc.'
+ },
+ 'ind060-ssc': {
+ 'name': 'Silver Star Communications'
+ },
+ 'sjoberg': {
+ 'name': 'Sjoberg\'s Inc.'
+ },
+ 'sou025': {
+ 'name': 'SKT'
+ },
+ 'sky050': {
+ 'name': 'SkyBest TV'
+ },
+ 'nttcsmi010': {
+ 'name': 'Smithville Communications'
+ },
+ 'woo010': {
+ 'name': 'Solarus'
+ },
+ 'sou075': {
+ 'name': 'South Central Rural Telephone Cooperative'
+ },
+ 'sou065': {
+ 'name': 'South Holt Cablevision, Inc.'
+ },
+ 'sou035': {
+ 'name': 'South Slope Cooperative Communications'
+ },
+ 'spa020': {
+ 'name': 'Spanish Fork Community Network'
+ },
+ 'spe010': {
+ 'name': 'Spencer Municipal Utilities'
+ },
+ 'spi005': {
+ 'name': 'Spillway Communications, Inc.'
+ },
+ 'srt010': {
+ 'name': 'SRT'
+ },
+ 'cccsmc010': {
+ 'name': 'St. Maarten Cable TV'
+ },
+ 'sta025': {
+ 'name': 'Star Communications'
+ },
+ 'sco020': {
+ 'name': 'STE'
+ },
+ 'uin010': {
+ 'name': 'STRATA Networks'
+ },
+ 'sum010': {
+ 'name': 'Sumner Cable TV'
+ },
+ 'pie010': {
+ 'name': 'Surry TV/PCSI TV'
+ },
+ 'swa010': {
+ 'name': 'Swayzee Communications'
+ },
+ 'sweetwater': {
+ 'name': 'Sweetwater Cable Television Co'
+ },
+ 'weh010-talequah': {
+ 'name': 'Tahlequah Cable TV'
+ },
+ 'tct': {
+ 'name': 'TCT'
+ },
+ 'tel050': {
+ 'name': 'Tele-Media Company'
+ },
+ 'com050': {
+ 'name': 'The Community Agency'
+ },
+ 'thr020': {
+ 'name': 'Three River'
+ },
+ 'cab140': {
+ 'name': 'Town & Country Technologies'
+ },
+ 'tra010': {
+ 'name': 'Trans-Video'
+ },
+ 'tre010': {
+ 'name': 'Trenton TV Cable Company'
+ },
+ 'tcc': {
+ 'name': 'Tri County Communications Cooperative'
+ },
+ 'tri025': {
+ 'name': 'TriCounty Telecom'
+ },
+ 'tri110': {
+ 'name': 'TrioTel Communications, Inc.'
+ },
+ 'tro010': {
+ 'name': 'Troy Cablevision, Inc.'
+ },
+ 'tsc': {
+ 'name': 'TSC'
+ },
+ 'cit220': {
+ 'name': 'Tullahoma Utilities Board'
+ },
+ 'tvc030': {
+ 'name': 'TV Cable of Rensselaer'
+ },
+ 'tvc015': {
+ 'name': 'TVC Cable'
+ },
+ 'cab180': {
+ 'name': 'TVision'
+ },
+ 'twi040': {
+ 'name': 'Twin Lakes'
+ },
+ 'tvtinc': {
+ 'name': 'Twin Valley'
+ },
+ 'uis010': {
+ 'name': 'Union Telephone Company'
+ },
+ 'uni110': {
+ 'name': 'United Communications - TN'
+ },
+ 'uni120': {
+ 'name': 'United Services'
+ },
+ 'uss020': {
+ 'name': 'US Sonet'
+ },
+ 'cab060': {
+ 'name': 'USA Communications'
+ },
+ 'she005': {
+ 'name': 'USA Communications/Shellsburg, IA'
+ },
+ 'val040': {
+ 'name': 'Valley TeleCom Group'
+ },
+ 'val025': {
+ 'name': 'Valley Telecommunications'
+ },
+ 'val030': {
+ 'name': 'Valparaiso Broadband'
+ },
+ 'cla050': {
+ 'name': 'Vast Broadband'
+ },
+ 'sul015': {
+ 'name': 'Venture Communications Cooperative, Inc.'
+ },
+ 'ver025': {
+ 'name': 'Vernon Communications Co-op'
+ },
+ 'weh010-vicksburg': {
+ 'name': 'Vicksburg Video'
+ },
+ 'vis070': {
+ 'name': 'Vision Communications'
+ },
+ 'volcanotel': {
+ 'name': 'Volcano Vision, Inc.'
+ },
+ 'vol040-02': {
+ 'name': 'VolFirst / BLTV'
+ },
+ 'ver070': {
+ 'name': 'VTel'
+ },
+ 'nttcvtx010': {
+ 'name': 'VTX1'
+ },
+ 'bci010-02': {
+ 'name': 'Vyve Broadband'
+ },
+ 'wab020': {
+ 'name': 'Wabash Mutual Telephone'
+ },
+ 'waitsfield': {
+ 'name': 'Waitsfield Cable'
+ },
+ 'wal010': {
+ 'name': 'Walnut Communications'
+ },
+ 'wavebroadband': {
+ 'name': 'Wave'
+ },
+ 'wav030': {
+ 'name': 'Waverly Communications Utility'
+ },
+ 'wbi010': {
+ 'name': 'WBI'
+ },
+ 'web020': {
+ 'name': 'Webster-Calhoun Cooperative Telephone Association'
+ },
+ 'wes005': {
+ 'name': 'West Alabama TV Cable'
+ },
+ 'carolinata': {
+ 'name': 'West Carolina Communications'
+ },
+ 'wct010': {
+ 'name': 'West Central Telephone Association'
+ },
+ 'wes110': {
+ 'name': 'West River Cooperative Telephone Company'
+ },
+ 'ani030': {
+ 'name': 'WesTel Systems'
+ },
+ 'westianet': {
+ 'name': 'Western Iowa Networks'
+ },
+ 'nttcwhi010': {
+ 'name': 'Whidbey Telecom'
+ },
+ 'weh010-white': {
+ 'name': 'White County Cable TV'
+ },
+ 'wes130': {
+ 'name': 'Wiatel'
+ },
+ 'wik010': {
+ 'name': 'Wiktel'
+ },
+ 'wil070': {
+ 'name': 'Wilkes Communications, Inc./RiverStreet Networks'
+ },
+ 'wil015': {
+ 'name': 'Wilson Communications'
+ },
+ 'win010': {
+ 'name': 'Windomnet/SMBS'
+ },
+ 'win090': {
+ 'name': 'Windstream Cable TV'
+ },
+ 'wcta': {
+ 'name': 'Winnebago Cooperative Telecom Association'
+ },
+ 'wtc010': {
+ 'name': 'WTC'
+ },
+ 'wil040': {
+ 'name': 'WTC Communications, Inc.'
+ },
+ 'wya010': {
+ 'name': 'Wyandotte Cable'
+ },
+ 'hin020-02': {
+ 'name': 'X-Stream Services'
+ },
+ 'xit010': {
+ 'name': 'XIT Communications'
+ },
+ 'yel010': {
+ 'name': 'Yelcot Communications'
+ },
+ 'mid180-01': {
+ 'name': 'yondoo'
+ },
+ 'cou060': {
+ 'name': 'Zito Media'
+ },
+}
+
+
+class AdobePassIE(InfoExtractor):
+ _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
+ _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
+ _MVPD_CACHE = 'ap-mvpd'
+
+ _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
+
+ def _download_webpage_handle(self, *args, **kwargs):
+ headers = kwargs.get('headers', {})
+ headers.update(self.geo_verification_headers())
+ kwargs['headers'] = headers
+ return super(AdobePassIE, self)._download_webpage_handle(
+ *args, **compat_kwargs(kwargs))
+
+ @staticmethod
+ def _get_mvpd_resource(provider_id, title, guid, rating):
+ channel = etree.Element('channel')
+ channel_title = etree.SubElement(channel, 'title')
+ channel_title.text = provider_id
+ item = etree.SubElement(channel, 'item')
+ resource_title = etree.SubElement(item, 'title')
+ resource_title.text = title
+ resource_guid = etree.SubElement(item, 'guid')
+ resource_guid.text = guid
+ resource_rating = etree.SubElement(item, 'media:rating')
+ resource_rating.attrib = {'scheme': 'urn:v-chip'}
+ resource_rating.text = rating
+ return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>'
+
+ def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
+ def xml_text(xml_str, tag):
+ return self._search_regex(
+ '<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
+
+ def is_expired(token, date_ele):
+ token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele)))
+ return token_expires and token_expires <= int(time.time())
+
+ def post_form(form_page_res, note, data={}):
+ form_page, urlh = form_page_res
+ post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
+ if not re.match(r'https?://', post_url):
+ post_url = compat_urlparse.urljoin(urlh.geturl(), post_url)
+ form_data = self._hidden_inputs(form_page)
+ form_data.update(data)
+ return self._download_webpage_handle(
+ post_url, video_id, note, data=urlencode_postdata(form_data), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+
+ def raise_mvpd_required():
+ raise ExtractorError(
+ 'This video is only available for users of participating TV providers. '
+ 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier '
+ 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True)
+
+ def extract_redirect_url(html, url=None, fatal=False):
+ # TODO: eliminate code duplication with generic extractor and move
+ # redirection code into _download_webpage_handle
+ REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
+ redirect_url = self._search_regex(
+ r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
+ r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
+ html, 'meta refresh redirect',
+ default=NO_DEFAULT if fatal else None, fatal=fatal)
+ if not redirect_url:
+ return None
+ if url:
+ redirect_url = compat_urlparse.urljoin(url, unescapeHTML(redirect_url))
+ return redirect_url
+
+ mvpd_headers = {
+ 'ap_42': 'anonymous',
+ 'ap_11': 'Linux i686',
+ 'ap_z': self._USER_AGENT,
+ 'User-Agent': self._USER_AGENT,
+ }
+
+ guid = xml_text(resource, 'guid') if '<' in resource else resource
+ count = 0
+ while count < 2:
+ requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {}
+ authn_token = requestor_info.get('authn_token')
+ if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
+ authn_token = None
+ if not authn_token:
+ # TODO add support for other TV Providers
+ mso_id = self._downloader.params.get('ap_mso')
+ if not mso_id:
+ raise_mvpd_required()
+ username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
+ if not username or not password:
+ raise_mvpd_required()
+ mso_info = MSO_INFO[mso_id]
+
+ provider_redirect_page_res = self._download_webpage_handle(
+ self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
+ 'Downloading Provider Redirect Page', query={
+ 'noflash': 'true',
+ 'mso_id': mso_id,
+ 'requestor_id': requestor_id,
+ 'no_iframe': 'false',
+ 'domain_name': 'adobe.com',
+ 'redirect_url': url,
+ })
+
+ if mso_id == 'Comcast_SSO':
+ # Comcast page flow varies by video site and whether you
+ # are on Comcast's network.
+ provider_redirect_page, urlh = provider_redirect_page_res
+ if 'automatically signing you in' in provider_redirect_page:
+ oauth_redirect_url = self._html_search_regex(
+ r'window\.location\s*=\s*[\'"]([^\'"]+)',
+ provider_redirect_page, 'oauth redirect')
+ self._download_webpage(
+ oauth_redirect_url, video_id, 'Confirming auto login')
+ else:
+ if '<form name="signin"' in provider_redirect_page:
+ provider_login_page_res = provider_redirect_page_res
+ elif 'http-equiv="refresh"' in provider_redirect_page:
+ oauth_redirect_url = extract_redirect_url(
+ provider_redirect_page, fatal=True)
+ provider_login_page_res = self._download_webpage_handle(
+ oauth_redirect_url, video_id,
+ self._DOWNLOADING_LOGIN_PAGE)
+ else:
+ provider_login_page_res = post_form(
+ provider_redirect_page_res,
+ self._DOWNLOADING_LOGIN_PAGE)
+
+ mvpd_confirm_page_res = post_form(
+ provider_login_page_res, 'Logging in', {
+ mso_info['username_field']: username,
+ mso_info['password_field']: password,
+ })
+ mvpd_confirm_page, urlh = mvpd_confirm_page_res
+ if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
+ post_form(mvpd_confirm_page_res, 'Confirming Login')
+ elif mso_id == 'Verizon':
+ # In general, if you're connecting from a Verizon-assigned IP,
+ # you will not actually pass your credentials.
+ provider_redirect_page, urlh = provider_redirect_page_res
+ if 'Please wait ...' in provider_redirect_page:
+ saml_redirect_url = self._html_search_regex(
+ r'self\.parent\.location=(["\'])(?P<url>.+?)\1',
+ provider_redirect_page,
+ 'SAML Redirect URL', group='url')
+ saml_login_page = self._download_webpage(
+ saml_redirect_url, video_id,
+ 'Downloading SAML Login Page')
+ else:
+ saml_login_page_res = post_form(
+ provider_redirect_page_res, 'Logging in', {
+ mso_info['username_field']: username,
+ mso_info['password_field']: password,
+ })
+ saml_login_page, urlh = saml_login_page_res
+ if 'Please try again.' in saml_login_page:
+ raise ExtractorError(
+ 'We\'re sorry, but either the User ID or Password entered is not correct.')
+ saml_login_url = self._search_regex(
+ r'xmlHttp\.open\("POST"\s*,\s*(["\'])(?P<url>.+?)\1',
+ saml_login_page, 'SAML Login URL', group='url')
+ saml_response_json = self._download_json(
+ saml_login_url, video_id, 'Downloading SAML Response',
+ headers={'Content-Type': 'text/xml'})
+ self._download_webpage(
+ saml_response_json['targetValue'], video_id,
+ 'Confirming Login', data=urlencode_postdata({
+ 'SAMLResponse': saml_response_json['SAMLResponse'],
+ 'RelayState': saml_response_json['RelayState']
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+ else:
+ # Some providers (e.g. DIRECTV NOW) have another meta refresh
+ # based redirect that should be followed.
+ provider_redirect_page, urlh = provider_redirect_page_res
+ provider_refresh_redirect_url = extract_redirect_url(
+ provider_redirect_page, url=urlh.geturl())
+ if provider_refresh_redirect_url:
+ provider_redirect_page_res = self._download_webpage_handle(
+ provider_refresh_redirect_url, video_id,
+ 'Downloading Provider Redirect Page (meta refresh)')
+ provider_login_page_res = post_form(
+ provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
+ mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
+ mso_info.get('username_field', 'username'): username,
+ mso_info.get('password_field', 'password'): password,
+ })
+ if mso_id != 'Rogers':
+ post_form(mvpd_confirm_page_res, 'Confirming Login')
+
+ session = self._download_webpage(
+ self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
+ 'Retrieving Session', data=urlencode_postdata({
+ '_method': 'GET',
+ 'requestor_id': requestor_id,
+ }), headers=mvpd_headers)
+ if '<pendingLogout' in session:
+ self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+ count += 1
+ continue
+ authn_token = unescapeHTML(xml_text(session, 'authnToken'))
+ requestor_info['authn_token'] = authn_token
+ self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
+
+ authz_token = requestor_info.get(guid)
+ if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
+ authz_token = None
+ if not authz_token:
+ authorize = self._download_webpage(
+ self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
+ 'Retrieving Authorization Token', data=urlencode_postdata({
+ 'resource_id': resource,
+ 'requestor_id': requestor_id,
+ 'authentication_token': authn_token,
+ 'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
+ 'userMeta': '1',
+ }), headers=mvpd_headers)
+ if '<pendingLogout' in authorize:
+ self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+ count += 1
+ continue
+ if '<error' in authorize:
+ raise ExtractorError(xml_text(authorize, 'details'), expected=True)
+ authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
+ requestor_info[guid] = authz_token
+ self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
+
+ mvpd_headers.update({
+ 'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
+ 'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
+ })
+
+ short_authorize = self._download_webpage(
+ self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
+ video_id, 'Retrieving Media Token', data=urlencode_postdata({
+ 'authz_token': authz_token,
+ 'requestor_id': requestor_id,
+ 'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
+ 'hashed_guid': 'false',
+ }), headers=mvpd_headers)
+ if '<pendingLogout' in short_authorize:
+ self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+ count += 1
+ continue
+ return short_authorize
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
new file mode 100644
index 0000000..5d4db54
--- /dev/null
+++ b/youtube_dl/extractor/common.py
@@ -0,0 +1,2862 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import datetime
+import hashlib
+import json
+import netrc
+import os
+import random
+import re
+import socket
+import sys
+import time
+import math
+
+from ..compat import (
+ compat_cookiejar,
+ compat_cookies,
+ compat_etree_fromstring,
+ compat_getpass,
+ compat_integer_types,
+ compat_http_client,
+ compat_os_name,
+ compat_str,
+ compat_urllib_error,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_urlencode,
+ compat_urllib_request,
+ compat_urlparse,
+ compat_xml_parse_error,
+)
+from ..downloader.f4m import (
+ get_base_url,
+ remove_encrypted_media,
+)
+from ..utils import (
+ NO_DEFAULT,
+ age_restricted,
+ base_url,
+ bug_reports_message,
+ clean_html,
+ compiled_regex_type,
+ determine_ext,
+ determine_protocol,
+ error_to_compat_str,
+ ExtractorError,
+ extract_attributes,
+ fix_xml_ampersands,
+ float_or_none,
+ GeoRestrictedError,
+ GeoUtils,
+ int_or_none,
+ js_to_json,
+ JSON_LD_RE,
+ mimetype2ext,
+ orderedSet,
+ parse_codecs,
+ parse_duration,
+ parse_iso8601,
+ parse_m3u8_attributes,
+ RegexNotFoundError,
+ sanitized_Request,
+ sanitize_filename,
+ unescapeHTML,
+ unified_strdate,
+ unified_timestamp,
+ update_Request,
+ update_url_query,
+ urljoin,
+ url_basename,
+ xpath_element,
+ xpath_text,
+ xpath_with_ns,
+)
+
+
+class InfoExtractor(object):
+ """Information Extractor class.
+
+ Information extractors are the classes that, given a URL, extract
+ information about the video (or videos) the URL refers to. This
+ information includes the real video URL, the video title, author and
+ others. The information is stored in a dictionary which is then
+ passed to the YoutubeDL. The YoutubeDL processes this
+ information possibly downloading the video to the file system, among
+ other possible outcomes.
+
+ The type field determines the type of the result.
+ By far the most common value (and the default if _type is missing) is
+ "video", which indicates a single video.
+
+ For a video, the dictionaries must include the following fields:
+
+ id: Video identifier.
+ title: Video title, unescaped.
+
+ Additionally, it must contain either a formats entry or a url one:
+
+ formats: A list of dictionaries for each format available, ordered
+ from worst to best quality.
+
+ Potential fields:
+ * url Mandatory. The URL of the video file
+ * manifest_url
+ The URL of the manifest file in case of
+ fragmented media (DASH, hls, hds)
+ * ext Will be calculated from URL if missing
+ * format A human-readable description of the format
+ ("mp4 container with h264/opus").
+ Calculated from the format_id, width, height.
+ and format_note fields if missing.
+ * format_id A short description of the format
+ ("mp4_h264_opus" or "19").
+ Technically optional, but strongly recommended.
+ * format_note Additional info about the format
+ ("3D" or "DASH video")
+ * width Width of the video, if known
+ * height Height of the video, if known
+ * resolution Textual description of width and height
+ * tbr Average bitrate of audio and video in KBit/s
+ * abr Average audio bitrate in KBit/s
+ * acodec Name of the audio codec in use
+ * asr Audio sampling rate in Hertz
+ * vbr Average video bitrate in KBit/s
+ * fps Frame rate
+ * vcodec Name of the video codec in use
+ * container Name of the container format
+ * filesize The number of bytes, if known in advance
+ * filesize_approx An estimate for the number of bytes
+ * player_url SWF Player URL (used for rtmpdump).
+ * protocol The protocol that will be used for the actual
+ download, lower-case.
+ "http", "https", "rtsp", "rtmp", "rtmpe",
+ "m3u8", "m3u8_native" or "http_dash_segments".
+ * fragment_base_url
+ Base URL for fragments. Each fragment's path
+ value (if present) will be relative to
+ this URL.
+ * fragments A list of fragments of a fragmented media.
+ Each fragment entry must contain either an url
+ or a path. If an url is present it should be
+ considered by a client. Otherwise both path and
+ fragment_base_url must be present. Here is
+ the list of all potential fields:
+ * "url" - fragment's URL
+ * "path" - fragment's path relative to
+ fragment_base_url
+ * "duration" (optional, int or float)
+ * "filesize" (optional, int)
+ * preference Order number of this format. If this field is
+ present and not None, the formats get sorted
+ by this field, regardless of all other values.
+ -1 for default (order by other properties),
+ -2 or smaller for less than default.
+ < -1000 to hide the format (if there is
+ another one which is strictly better)
+ * language Language code, e.g. "de" or "en-US".
+ * language_preference Is this in the language mentioned in
+ the URL?
+ 10 if it's what the URL is about,
+ -1 for default (don't know),
+ -10 otherwise, other values reserved for now.
+ * quality Order number of the video quality of this
+ format, irrespective of the file format.
+ -1 for default (order by other properties),
+ -2 or smaller for less than default.
+ * source_preference Order number for this video source
+ (quality takes higher priority)
+ -1 for default (order by other properties),
+ -2 or smaller for less than default.
+ * http_headers A dictionary of additional HTTP headers
+ to add to the request.
+ * stretched_ratio If given and not 1, indicates that the
+ video's pixels are not square.
+ width : height ratio as float.
+ * no_resume The server does not support resuming the
+ (HTTP or RTMP) download. Boolean.
+ * downloader_options A dictionary of downloader options as
+ described in FileDownloader
+
+ url: Final video URL.
+ ext: Video filename extension.
+ format: The video format, defaults to ext (used for --get-format)
+ player_url: SWF Player URL (used for rtmpdump).
+
+ The following fields are optional:
+
+ alt_title: A secondary title of the video.
+ display_id An alternative identifier for the video, not necessarily
+ unique, but available before title. Typically, id is
+ something like "4234987", title "Dancing naked mole rats",
+ and display_id "dancing-naked-mole-rats"
+ thumbnails: A list of dictionaries, with the following entries:
+ * "id" (optional, string) - Thumbnail format ID
+ * "url"
+ * "preference" (optional, int) - quality of the image
+ * "width" (optional, int)
+ * "height" (optional, int)
+ * "resolution" (optional, string "{width}x{height"},
+ deprecated)
+ * "filesize" (optional, int)
+ thumbnail: Full URL to a video thumbnail image.
+ description: Full video description.
+ uploader: Full name of the video uploader.
+ license: License name the video is licensed under.
+ creator: The creator of the video.
+ release_date: The date (YYYYMMDD) when the video was released.
+ timestamp: UNIX timestamp of the moment the video became available.
+ upload_date: Video upload date (YYYYMMDD).
+ If not explicitly set, calculated from timestamp.
+ uploader_id: Nickname or id of the video uploader.
+ uploader_url: Full URL to a personal webpage of the video uploader.
+ location: Physical location where the video was filmed.
+ subtitles: The available subtitles as a dictionary in the format
+ {tag: subformats}. "tag" is usually a language code, and
+ "subformats" is a list sorted from lower to higher
+ preference, each element is a dictionary with the "ext"
+ entry and one of:
+ * "data": The subtitles file contents
+ * "url": A URL pointing to the subtitles file
+ "ext" will be calculated from URL if missing
+ automatic_captions: Like 'subtitles', used by the YoutubeIE for
+ automatically generated captions
+ duration: Length of the video in seconds, as an integer or float.
+ view_count: How many users have watched the video on the platform.
+ like_count: Number of positive ratings of the video
+ dislike_count: Number of negative ratings of the video
+ repost_count: Number of reposts of the video
+ average_rating: Average rating give by users, the scale used depends on the webpage
+ comment_count: Number of comments on the video
+ comments: A list of comments, each with one or more of the following
+ properties (all but one of text or html optional):
+ * "author" - human-readable name of the comment author
+ * "author_id" - user ID of the comment author
+ * "id" - Comment ID
+ * "html" - Comment as HTML
+ * "text" - Plain text of the comment
+ * "timestamp" - UNIX timestamp of comment
+ * "parent" - ID of the comment this one is replying to.
+ Set to "root" to indicate that this is a
+ comment to the original video.
+ age_limit: Age restriction for the video, as an integer (years)
+ webpage_url: The URL to the video webpage, if given to youtube-dl it
+ should allow to get the same result again. (It will be set
+ by YoutubeDL if it's missing)
+ categories: A list of categories that the video falls in, for example
+ ["Sports", "Berlin"]
+ tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"]
+ is_live: True, False, or None (=unknown). Whether this video is a
+ live stream that goes on instead of a fixed-length video.
+ start_time: Time in seconds where the reproduction should start, as
+ specified in the URL.
+ end_time: Time in seconds where the reproduction should end, as
+ specified in the URL.
+ chapters: A list of dictionaries, with the following entries:
+ * "start_time" - The start time of the chapter in seconds
+ * "end_time" - The end time of the chapter in seconds
+ * "title" (optional, string)
+
+ The following fields should only be used when the video belongs to some logical
+ chapter or section:
+
+ chapter: Name or title of the chapter the video belongs to.
+ chapter_number: Number of the chapter the video belongs to, as an integer.
+ chapter_id: Id of the chapter the video belongs to, as a unicode string.
+
+ The following fields should only be used when the video is an episode of some
+ series, programme or podcast:
+
+ series: Title of the series or programme the video episode belongs to.
+ season: Title of the season the video episode belongs to.
+ season_number: Number of the season the video episode belongs to, as an integer.
+ season_id: Id of the season the video episode belongs to, as a unicode string.
+ episode: Title of the video episode. Unlike mandatory video title field,
+ this field should denote the exact title of the video episode
+ without any kind of decoration.
+ episode_number: Number of the video episode within a season, as an integer.
+ episode_id: Id of the video episode, as a unicode string.
+
+ The following fields should only be used when the media is a track or a part of
+ a music album:
+
+ track: Title of the track.
+ track_number: Number of the track within an album or a disc, as an integer.
+ track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
+ as a unicode string.
+ artist: Artist(s) of the track.
+ genre: Genre(s) of the track.
+ album: Title of the album the track belongs to.
+ album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
+ album_artist: List of all artists appeared on the album (e.g.
+ "Ash Borer / Fell Voices" or "Various Artists", useful for splits
+ and compilations).
+ disc_number: Number of the disc or other physical medium the track belongs to,
+ as an integer.
+ release_year: Year (YYYY) when the album was released.
+
+ Unless mentioned otherwise, the fields should be Unicode strings.
+
+ Unless mentioned otherwise, None is equivalent to absence of information.
+
+
+ _type "playlist" indicates multiple videos.
+ There must be a key "entries", which is a list, an iterable, or a PagedList
+ object, each element of which is a valid dictionary by this specification.
+
+ Additionally, playlists can have "id", "title", "description", "uploader",
+ "uploader_id", "uploader_url" attributes with the same semantics as videos
+ (see above).
+
+
+ _type "multi_video" indicates that there are multiple videos that
+ form a single show, for examples multiple acts of an opera or TV episode.
+ It must have an entries key like a playlist and contain all the keys
+ required for a video at the same time.
+
+
+ _type "url" indicates that the video must be extracted from another
+ location, possibly by a different extractor. Its only required key is:
+ "url" - the next URL to extract.
+ The key "ie_key" can be set to the class name (minus the trailing "IE",
+ e.g. "Youtube") if the extractor class is known in advance.
+ Additionally, the dictionary may have any properties of the resolved entity
+ known in advance, for example "title" if the title of the referred video is
+ known ahead of time.
+
+
+ _type "url_transparent" entities have the same specification as "url", but
+ indicate that the given additional information is more precise than the one
+ associated with the resolved URL.
+ This is useful when a site employs a video service that hosts the video and
+ its technical metadata, but that video service does not embed a useful
+ title, description etc.
+
+
+ Subclasses of this one should re-define the _real_initialize() and
+ _real_extract() methods and define a _VALID_URL regexp.
+ Probably, they should also be added to the list of extractors.
+
+ _GEO_BYPASS attribute may be set to False in order to disable
+ geo restriction bypass mechanisms for a particular extractor.
+ Though it won't disable explicit geo restriction bypass based on
+ country code provided with geo_bypass_country.
+
+ _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
+ countries for this extractor. One of these countries will be used by
+ geo restriction bypass mechanism right away in order to bypass
+ geo restriction, of course, if the mechanism is not disabled.
+
+ _GEO_IP_BLOCKS attribute may contain a list of presumably geo unrestricted
+ IP blocks in CIDR notation for this extractor. One of these IP blocks
+ will be used by geo restriction bypass mechanism similarly
+ to _GEO_COUNTRIES.
+
+ Finally, the _WORKING attribute should be set to False for broken IEs
+ in order to warn the users and skip the tests.
+ """
+
+ _ready = False
+ _downloader = None
+ _x_forwarded_for_ip = None
+ _GEO_BYPASS = True
+ _GEO_COUNTRIES = None
+ _GEO_IP_BLOCKS = None
+ _WORKING = True
+
+ def __init__(self, downloader=None):
+ """Constructor. Receives an optional downloader."""
+ self._ready = False
+ self._x_forwarded_for_ip = None
+ self.set_downloader(downloader)
+
+ @classmethod
+ def suitable(cls, url):
+ """Receives a URL and returns True if suitable for this IE."""
+
+ # This does not use has/getattr intentionally - we want to know whether
+ # we have cached the regexp for *this* class, whereas getattr would also
+ # match the superclass
+ if '_VALID_URL_RE' not in cls.__dict__:
+ cls._VALID_URL_RE = re.compile(cls._VALID_URL)
+ return cls._VALID_URL_RE.match(url) is not None
+
+ @classmethod
+ def _match_id(cls, url):
+ if '_VALID_URL_RE' not in cls.__dict__:
+ cls._VALID_URL_RE = re.compile(cls._VALID_URL)
+ m = cls._VALID_URL_RE.match(url)
+ assert m
+ return compat_str(m.group('id'))
+
+ @classmethod
+ def working(cls):
+ """Getter method for _WORKING."""
+ return cls._WORKING
+
+ def initialize(self):
+ """Initializes an instance (authentication, etc)."""
+ self._initialize_geo_bypass({
+ 'countries': self._GEO_COUNTRIES,
+ 'ip_blocks': self._GEO_IP_BLOCKS,
+ })
+ if not self._ready:
+ self._real_initialize()
+ self._ready = True
+
+ def _initialize_geo_bypass(self, geo_bypass_context):
+ """
+ Initialize geo restriction bypass mechanism.
+
+ This method is used to initialize geo bypass mechanism based on faking
+ X-Forwarded-For HTTP header. A random country from provided country list
+ is selected and a random IP belonging to this country is generated. This
+ IP will be passed as X-Forwarded-For HTTP header in all subsequent
+ HTTP requests.
+
+ This method will be used for initial geo bypass mechanism initialization
+ during the instance initialization with _GEO_COUNTRIES and
+ _GEO_IP_BLOCKS.
+
+ You may also manually call it from extractor's code if geo bypass
+ information is not available beforehand (e.g. obtained during
+ extraction) or due to some other reason. In this case you should pass
+ this information in geo bypass context passed as first argument. It may
+ contain following fields:
+
+ countries: List of geo unrestricted countries (similar
+ to _GEO_COUNTRIES)
+ ip_blocks: List of geo unrestricted IP blocks in CIDR notation
+ (similar to _GEO_IP_BLOCKS)
+
+ """
+ if not self._x_forwarded_for_ip:
+
+ # Geo bypass mechanism is explicitly disabled by user
+ if not self._downloader.params.get('geo_bypass', True):
+ return
+
+ if not geo_bypass_context:
+ geo_bypass_context = {}
+
+ # Backward compatibility: previously _initialize_geo_bypass
+ # expected a list of countries, some 3rd party code may still use
+ # it this way
+ if isinstance(geo_bypass_context, (list, tuple)):
+ geo_bypass_context = {
+ 'countries': geo_bypass_context,
+ }
+
+ # The whole point of geo bypass mechanism is to fake IP
+ # as X-Forwarded-For HTTP header based on some IP block or
+ # country code.
+
+ # Path 1: bypassing based on IP block in CIDR notation
+
+ # Explicit IP block specified by user, use it right away
+ # regardless of whether extractor is geo bypassable or not
+ ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
+
+ # Otherwise use random IP block from geo bypass context but only
+ # if extractor is known as geo bypassable
+ if not ip_block:
+ ip_blocks = geo_bypass_context.get('ip_blocks')
+ if self._GEO_BYPASS and ip_blocks:
+ ip_block = random.choice(ip_blocks)
+
+ if ip_block:
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen(
+ '[debug] Using fake IP %s as X-Forwarded-For.'
+ % self._x_forwarded_for_ip)
+ return
+
+ # Path 2: bypassing based on country code
+
+ # Explicit country code specified by user, use it right away
+ # regardless of whether extractor is geo bypassable or not
+ country = self._downloader.params.get('geo_bypass_country', None)
+
+ # Otherwise use random country code from geo bypass context but
+ # only if extractor is known as geo bypassable
+ if not country:
+ countries = geo_bypass_context.get('countries')
+ if self._GEO_BYPASS and countries:
+ country = random.choice(countries)
+
+ if country:
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen(
+ '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
+ % (self._x_forwarded_for_ip, country.upper()))
+
+ def extract(self, url):
+ """Extracts URL information and returns it in list of dicts."""
+ try:
+ for _ in range(2):
+ try:
+ self.initialize()
+ ie_result = self._real_extract(url)
+ if self._x_forwarded_for_ip:
+ ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
+ return ie_result
+ except GeoRestrictedError as e:
+ if self.__maybe_fake_ip_and_retry(e.countries):
+ continue
+ raise
+ except ExtractorError:
+ raise
+ except compat_http_client.IncompleteRead as e:
+ raise ExtractorError('A network error has occurred.', cause=e, expected=True)
+ except (KeyError, StopIteration) as e:
+ raise ExtractorError('An extractor error has occurred.', cause=e)
+
+ def __maybe_fake_ip_and_retry(self, countries):
+ if (not self._downloader.params.get('geo_bypass_country', None) and
+ self._GEO_BYPASS and
+ self._downloader.params.get('geo_bypass', True) and
+ not self._x_forwarded_for_ip and
+ countries):
+ country_code = random.choice(countries)
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+ if self._x_forwarded_for_ip:
+ self.report_warning(
+ 'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.'
+ % (self._x_forwarded_for_ip, country_code.upper()))
+ return True
+ return False
+
+ def set_downloader(self, downloader):
+ """Sets the downloader for this IE."""
+ self._downloader = downloader
+
+ def _real_initialize(self):
+ """Real initialization process. Redefine in subclasses."""
+ pass
+
+ def _real_extract(self, url):
+ """Real extraction process. Redefine in subclasses."""
+ pass
+
+ @classmethod
+ def ie_key(cls):
+ """A string for getting the InfoExtractor with get_info_extractor"""
+ return compat_str(cls.__name__[:-2])
+
+ @property
+ def IE_NAME(self):
+ return compat_str(type(self).__name__[:-2])
+
+ @staticmethod
+ def __can_accept_status_code(err, expected_status):
+ assert isinstance(err, compat_urllib_error.HTTPError)
+ if expected_status is None:
+ return False
+ if isinstance(expected_status, compat_integer_types):
+ return err.code == expected_status
+ elif isinstance(expected_status, (list, tuple)):
+ return err.code in expected_status
+ elif callable(expected_status):
+ return expected_status(err.code) is True
+ else:
+ assert False
+
+ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
+ """
+ Return the response handle.
+
+ See _download_webpage docstring for arguments specification.
+ """
+ if note is None:
+ self.report_download_webpage(video_id)
+ elif note is not False:
+ if video_id is None:
+ self.to_screen('%s' % (note,))
+ else:
+ self.to_screen('%s: %s' % (video_id, note))
+
+ # Some sites check X-Forwarded-For HTTP header in order to figure out
+ # the origin of the client behind proxy. This allows bypassing geo
+ # restriction by faking this header's value to IP that belongs to some
+ # geo unrestricted country. We will do so once we encounter any
+ # geo restriction error.
+ if self._x_forwarded_for_ip:
+ if 'X-Forwarded-For' not in headers:
+ headers['X-Forwarded-For'] = self._x_forwarded_for_ip
+
+ if isinstance(url_or_request, compat_urllib_request.Request):
+ url_or_request = update_Request(
+ url_or_request, data=data, headers=headers, query=query)
+ else:
+ if query:
+ url_or_request = update_url_query(url_or_request, query)
+ if data is not None or headers:
+ url_or_request = sanitized_Request(url_or_request, data, headers)
+ try:
+ return self._downloader.urlopen(url_or_request)
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ if isinstance(err, compat_urllib_error.HTTPError):
+ if self.__can_accept_status_code(err, expected_status):
+ return err.fp
+
+ if errnote is False:
+ return False
+ if errnote is None:
+ errnote = 'Unable to download webpage'
+
+ errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
+ if fatal:
+ raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
+ else:
+ self._downloader.report_warning(errmsg)
+ return False
+
+ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ """
+ Return a tuple (page content as string, URL handle).
+
+ See _download_webpage docstring for arguments specification.
+ """
+ # Strip hashes from the URL (#1038)
+ if isinstance(url_or_request, (compat_str, str)):
+ url_or_request = url_or_request.partition('#')[0]
+
+ urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
+ if urlh is False:
+ assert not fatal
+ return False
+ content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
+ return (content, urlh)
+
+ @staticmethod
+ def _guess_encoding_from_content(content_type, webpage_bytes):
+ m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
+ if m:
+ encoding = m.group(1)
+ else:
+ m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
+ webpage_bytes[:1024])
+ if m:
+ encoding = m.group(1).decode('ascii')
+ elif webpage_bytes.startswith(b'\xff\xfe'):
+ encoding = 'utf-16'
+ else:
+ encoding = 'utf-8'
+
+ return encoding
+
+ def __check_blocked(self, content):
+ first_block = content[:512]
+ if ('<title>Access to this site is blocked</title>' in content and
+ 'Websense' in first_block):
+ msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
+ blocked_iframe = self._html_search_regex(
+ r'<iframe src="([^"]+)"', content,
+ 'Websense information URL', default=None)
+ if blocked_iframe:
+ msg += ' Visit %s for more details' % blocked_iframe
+ raise ExtractorError(msg, expected=True)
+ if '<title>The URL you requested has been blocked</title>' in first_block:
+ msg = (
+ 'Access to this webpage has been blocked by Indian censorship. '
+ 'Use a VPN or proxy server (with --proxy) to route around it.')
+ block_msg = self._html_search_regex(
+ r'</h1><p>(.*?)</p>',
+ content, 'block message', default=None)
+ if block_msg:
+ msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
+ raise ExtractorError(msg, expected=True)
+ if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content and
+ 'blocklist.rkn.gov.ru' in content):
+ raise ExtractorError(
+ 'Access to this webpage has been blocked by decision of the Russian government. '
+ 'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
+ expected=True)
+
+ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
+ content_type = urlh.headers.get('Content-Type', '')
+ webpage_bytes = urlh.read()
+ if prefix is not None:
+ webpage_bytes = prefix + webpage_bytes
+ if not encoding:
+ encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
+ if self._downloader.params.get('dump_intermediate_pages', False):
+ self.to_screen('Dumping request to ' + urlh.geturl())
+ dump = base64.b64encode(webpage_bytes).decode('ascii')
+ self._downloader.to_screen(dump)
+ if self._downloader.params.get('write_pages', False):
+ basen = '%s_%s' % (video_id, urlh.geturl())
+ if len(basen) > 240:
+ h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
+ basen = basen[:240 - len(h)] + h
+ raw_filename = basen + '.dump'
+ filename = sanitize_filename(raw_filename, restricted=True)
+ self.to_screen('Saving request to ' + filename)
+ # Working around MAX_PATH limitation on Windows (see
+ # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
+ if compat_os_name == 'nt':
+ absfilepath = os.path.abspath(filename)
+ if len(absfilepath) > 259:
+ filename = '\\\\?\\' + absfilepath
+ with open(filename, 'wb') as outf:
+ outf.write(webpage_bytes)
+
+ try:
+ content = webpage_bytes.decode(encoding, 'replace')
+ except LookupError:
+ content = webpage_bytes.decode('utf-8', 'replace')
+
+ self.__check_blocked(content)
+
+ return content
+
+ def _download_webpage(
+ self, url_or_request, video_id, note=None, errnote=None,
+ fatal=True, tries=1, timeout=5, encoding=None, data=None,
+ headers={}, query={}, expected_status=None):
+ """
+ Return the data of the page as a string.
+
+ Arguments:
+ url_or_request -- plain text URL as a string or
+ a compat_urllib_request.Requestobject
+ video_id -- Video/playlist/item identifier (string)
+
+ Keyword arguments:
+ note -- note printed before downloading (string)
+ errnote -- note printed in case of an error (string)
+ fatal -- flag denoting whether error should be considered fatal,
+ i.e. whether it should cause ExtractionError to be raised,
+ otherwise a warning will be reported and extraction continued
+ tries -- number of tries
+ timeout -- sleep interval between tries
+ encoding -- encoding for a page content decoding, guessed automatically
+ when not explicitly specified
+ data -- POST data (bytes)
+ headers -- HTTP headers (dict)
+ query -- URL query (dict)
+ expected_status -- allows to accept failed HTTP requests (non 2xx
+ status code) by explicitly specifying a set of accepted status
+ codes. Can be any of the following entities:
+ - an integer type specifying an exact failed status code to
+ accept
+ - a list or a tuple of integer types specifying a list of
+ failed status codes to accept
+ - a callable accepting an actual failed status code and
+ returning True if it should be accepted
+ Note that this argument does not affect success status codes (2xx)
+ which are always accepted.
+ """
+
+ success = False
+ try_count = 0
+ while success is False:
+ try:
+ res = self._download_webpage_handle(
+ url_or_request, video_id, note, errnote, fatal,
+ encoding=encoding, data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ success = True
+ except compat_http_client.IncompleteRead as e:
+ try_count += 1
+ if try_count >= tries:
+ raise e
+ self._sleep(timeout, video_id)
+ if res is False:
+ return res
+ else:
+ content, _ = res
+ return content
+
+ def _download_xml_handle(
+ self, url_or_request, video_id, note='Downloading XML',
+ errnote='Unable to download XML', transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={},
+ expected_status=None):
+ """
+ Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle).
+
+ See _download_webpage docstring for arguments specification.
+ """
+ res = self._download_webpage_handle(
+ url_or_request, video_id, note, errnote, fatal=fatal,
+ encoding=encoding, data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ if res is False:
+ return res
+ xml_string, urlh = res
+ return self._parse_xml(
+ xml_string, video_id, transform_source=transform_source,
+ fatal=fatal), urlh
+
+ def _download_xml(
+ self, url_or_request, video_id,
+ note='Downloading XML', errnote='Unable to download XML',
+ transform_source=None, fatal=True, encoding=None,
+ data=None, headers={}, query={}, expected_status=None):
+ """
+ Return the xml as an xml.etree.ElementTree.Element.
+
+ See _download_webpage docstring for arguments specification.
+ """
+ res = self._download_xml_handle(
+ url_or_request, video_id, note=note, errnote=errnote,
+ transform_source=transform_source, fatal=fatal, encoding=encoding,
+ data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ return res if res is False else res[0]
+
+ def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
+ if transform_source:
+ xml_string = transform_source(xml_string)
+ try:
+ return compat_etree_fromstring(xml_string.encode('utf-8'))
+ except compat_xml_parse_error as ve:
+ errmsg = '%s: Failed to parse XML ' % video_id
+ if fatal:
+ raise ExtractorError(errmsg, cause=ve)
+ else:
+ self.report_warning(errmsg + str(ve))
+
+ def _download_json_handle(
+ self, url_or_request, video_id, note='Downloading JSON metadata',
+ errnote='Unable to download JSON metadata', transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={},
+ expected_status=None):
+ """
+ Return a tuple (JSON object, URL handle).
+
+ See _download_webpage docstring for arguments specification.
+ """
+ res = self._download_webpage_handle(
+ url_or_request, video_id, note, errnote, fatal=fatal,
+ encoding=encoding, data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ if res is False:
+ return res
+ json_string, urlh = res
+ return self._parse_json(
+ json_string, video_id, transform_source=transform_source,
+ fatal=fatal), urlh
+
+ def _download_json(
+ self, url_or_request, video_id, note='Downloading JSON metadata',
+ errnote='Unable to download JSON metadata', transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={},
+ expected_status=None):
+ """
+ Return the JSON object as a dict.
+
+ See _download_webpage docstring for arguments specification.
+ """
+ res = self._download_json_handle(
+ url_or_request, video_id, note=note, errnote=errnote,
+ transform_source=transform_source, fatal=fatal, encoding=encoding,
+ data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ return res if res is False else res[0]
+
+ def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
+ if transform_source:
+ json_string = transform_source(json_string)
+ try:
+ return json.loads(json_string)
+ except ValueError as ve:
+ errmsg = '%s: Failed to parse JSON ' % video_id
+ if fatal:
+ raise ExtractorError(errmsg, cause=ve)
+ else:
+ self.report_warning(errmsg + str(ve))
+
+ def report_warning(self, msg, video_id=None):
+ idstr = '' if video_id is None else '%s: ' % video_id
+ self._downloader.report_warning(
+ '[%s] %s%s' % (self.IE_NAME, idstr, msg))
+
+ def to_screen(self, msg):
+ """Print msg to screen, prefixing it with '[ie_name]'"""
+ self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg))
+
+ def report_extraction(self, id_or_name):
+ """Report information extraction."""
+ self.to_screen('%s: Extracting information' % id_or_name)
+
+ def report_download_webpage(self, video_id):
+ """Report webpage download."""
+ self.to_screen('%s: Downloading webpage' % video_id)
+
+ def report_age_confirmation(self):
+ """Report attempt to confirm age."""
+ self.to_screen('Confirming age')
+
+ def report_login(self):
+ """Report attempt to log in."""
+ self.to_screen('Logging in')
+
+ @staticmethod
+ def raise_login_required(msg='This video is only available for registered users'):
+ raise ExtractorError(
+ '%s. Use --username and --password or --netrc to provide account credentials.' % msg,
+ expected=True)
+
+ @staticmethod
+ def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
+ raise GeoRestrictedError(msg, countries=countries)
+
+ # Methods for following #608
+ @staticmethod
+ def url_result(url, ie=None, video_id=None, video_title=None):
+ """Returns a URL that points to a page that should be processed"""
+ # TODO: ie should be the class used for getting the info
+ video_info = {'_type': 'url',
+ 'url': url,
+ 'ie_key': ie}
+ if video_id is not None:
+ video_info['id'] = video_id
+ if video_title is not None:
+ video_info['title'] = video_title
+ return video_info
+
+ def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
+ urls = orderedSet(
+ self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
+ for m in matches)
+ return self.playlist_result(
+ urls, playlist_id=playlist_id, playlist_title=playlist_title)
+
+ @staticmethod
+ def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
+ """Returns a playlist"""
+ video_info = {'_type': 'playlist',
+ 'entries': entries}
+ if playlist_id:
+ video_info['id'] = playlist_id
+ if playlist_title:
+ video_info['title'] = playlist_title
+ if playlist_description:
+ video_info['description'] = playlist_description
+ return video_info
+
+ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
+ """
+ Perform a regex search on the given string, using a single or a list of
+ patterns returning the first matching group.
+ In case of failure return a default value or raise a WARNING or a
+ RegexNotFoundError, depending on fatal, specifying the field name.
+ """
+ if isinstance(pattern, (str, compat_str, compiled_regex_type)):
+ mobj = re.search(pattern, string, flags)
+ else:
+ for p in pattern:
+ mobj = re.search(p, string, flags)
+ if mobj:
+ break
+
+ if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
+ _name = '\033[0;34m%s\033[0m' % name
+ else:
+ _name = name
+
+ if mobj:
+ if group is None:
+ # return the first matching group
+ return next(g for g in mobj.groups() if g is not None)
+ else:
+ return mobj.group(group)
+ elif default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ raise RegexNotFoundError('Unable to extract %s' % _name)
+ else:
+ self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
+ return None
+
+ def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
+ """
+ Like _search_regex, but strips HTML tags and unescapes entities.
+ """
+ res = self._search_regex(pattern, string, name, default, fatal, flags, group)
+ if res:
+ return clean_html(res).strip()
+ else:
+ return res
+
+ def _get_netrc_login_info(self, netrc_machine=None):
+ username = None
+ password = None
+ netrc_machine = netrc_machine or self._NETRC_MACHINE
+
+ if self._downloader.params.get('usenetrc', False):
+ try:
+ info = netrc.netrc().authenticators(netrc_machine)
+ if info is not None:
+ username = info[0]
+ password = info[2]
+ else:
+ raise netrc.NetrcParseError(
+ 'No authenticators for %s' % netrc_machine)
+ except (IOError, netrc.NetrcParseError) as err:
+ self._downloader.report_warning(
+ 'parsing .netrc: %s' % error_to_compat_str(err))
+
+ return username, password
+
+ def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
+ """
+ Get the login info as (username, password)
+ First look for the manually specified credentials using username_option
+ and password_option as keys in params dictionary. If no such credentials
+ available look in the netrc file using the netrc_machine or _NETRC_MACHINE
+ value.
+ If there's no info available, return (None, None)
+ """
+ if self._downloader is None:
+ return (None, None)
+
+ downloader_params = self._downloader.params
+
+ # Attempt to use provided username and password or .netrc data
+ if downloader_params.get(username_option) is not None:
+ username = downloader_params[username_option]
+ password = downloader_params[password_option]
+ else:
+ username, password = self._get_netrc_login_info(netrc_machine)
+
+ return username, password
+
+ def _get_tfa_info(self, note='two-factor verification code'):
+ """
+ Get the two-factor authentication info
+ TODO - asking the user will be required for sms/phone verify
+ currently just uses the command line option
+ If there's no info available, return None
+ """
+ if self._downloader is None:
+ return None
+ downloader_params = self._downloader.params
+
+ if downloader_params.get('twofactor') is not None:
+ return downloader_params['twofactor']
+
+ return compat_getpass('Type %s and press [Return]: ' % note)
+
+ # Helper functions for extracting OpenGraph info
+ @staticmethod
+ def _og_regexes(prop):
+ content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
+ property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
+ % {'prop': re.escape(prop)})
+ template = r'<meta[^>]+?%s[^>]+?%s'
+ return [
+ template % (property_re, content_re),
+ template % (content_re, property_re),
+ ]
+
+ @staticmethod
+ def _meta_regex(prop):
+ return r'''(?isx)<meta
+ (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1)
+ [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
+
+ def _og_search_property(self, prop, html, name=None, **kargs):
+ if not isinstance(prop, (list, tuple)):
+ prop = [prop]
+ if name is None:
+ name = 'OpenGraph %s' % prop[0]
+ og_regexes = []
+ for p in prop:
+ og_regexes.extend(self._og_regexes(p))
+ escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs)
+ if escaped is None:
+ return None
+ return unescapeHTML(escaped)
+
+ def _og_search_thumbnail(self, html, **kargs):
+ return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs)
+
+ def _og_search_description(self, html, **kargs):
+ return self._og_search_property('description', html, fatal=False, **kargs)
+
+ def _og_search_title(self, html, **kargs):
+ return self._og_search_property('title', html, **kargs)
+
+ def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
+ regexes = self._og_regexes('video') + self._og_regexes('video:url')
+ if secure:
+ regexes = self._og_regexes('video:secure_url') + regexes
+ return self._html_search_regex(regexes, html, name, **kargs)
+
+ def _og_search_url(self, html, **kargs):
+ return self._og_search_property('url', html, **kargs)
+
+ def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
+ if not isinstance(name, (list, tuple)):
+ name = [name]
+ if display_name is None:
+ display_name = name[0]
+ return self._html_search_regex(
+ [self._meta_regex(n) for n in name],
+ html, display_name, fatal=fatal, group='content', **kwargs)
+
+ def _dc_search_uploader(self, html):
+ return self._html_search_meta('dc.creator', html, 'uploader')
+
+ def _rta_search(self, html):
+ # See http://www.rtalabel.org/index.php?content=howtofaq#single
+ if re.search(r'(?ix)<meta\s+name="rating"\s+'
+ r' content="RTA-5042-1996-1400-1577-RTA"',
+ html):
+ return 18
+ return 0
+
+ def _media_rating_search(self, html):
+ # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
+ rating = self._html_search_meta('rating', html)
+
+ if not rating:
+ return None
+
+ RATING_TABLE = {
+ 'safe for kids': 0,
+ 'general': 8,
+ '14 years': 14,
+ 'mature': 17,
+ 'restricted': 19,
+ }
+ return RATING_TABLE.get(rating.lower())
+
+ def _family_friendly_search(self, html):
+ # See http://schema.org/VideoObject
+ family_friendly = self._html_search_meta(
+ 'isFamilyFriendly', html, default=None)
+
+ if not family_friendly:
+ return None
+
+ RATING_TABLE = {
+ '1': 0,
+ 'true': 0,
+ '0': 18,
+ 'false': 18,
+ }
+ return RATING_TABLE.get(family_friendly.lower())
+
+ def _twitter_search_player(self, html):
+ return self._html_search_meta('twitter:player', html,
+ 'twitter card player')
+
+ def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
+ json_ld = self._search_regex(
+ JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
+ default = kwargs.get('default', NO_DEFAULT)
+ if not json_ld:
+ return default if default is not NO_DEFAULT else {}
+ # JSON-LD may be malformed and thus `fatal` should be respected.
+ # At the same time `default` may be passed that assumes `fatal=False`
+ # for _search_regex. Let's simulate the same behavior here as well.
+ fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
+ return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
+
+ def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
+ if isinstance(json_ld, compat_str):
+ json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
+ if not json_ld:
+ return {}
+ info = {}
+ if not isinstance(json_ld, (list, tuple, dict)):
+ return info
+ if isinstance(json_ld, dict):
+ json_ld = [json_ld]
+
+ INTERACTION_TYPE_MAP = {
+ 'CommentAction': 'comment',
+ 'AgreeAction': 'like',
+ 'DisagreeAction': 'dislike',
+ 'LikeAction': 'like',
+ 'DislikeAction': 'dislike',
+ 'ListenAction': 'view',
+ 'WatchAction': 'view',
+ 'ViewAction': 'view',
+ }
+
+ def extract_interaction_statistic(e):
+ interaction_statistic = e.get('interactionStatistic')
+ if not isinstance(interaction_statistic, list):
+ return
+ for is_e in interaction_statistic:
+ if not isinstance(is_e, dict):
+ continue
+ if is_e.get('@type') != 'InteractionCounter':
+ continue
+ interaction_type = is_e.get('interactionType')
+ if not isinstance(interaction_type, compat_str):
+ continue
+ interaction_count = int_or_none(is_e.get('userInteractionCount'))
+ if interaction_count is None:
+ continue
+ count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
+ if not count_kind:
+ continue
+ count_key = '%s_count' % count_kind
+ if info.get(count_key) is not None:
+ continue
+ info[count_key] = interaction_count
+
+ def extract_video_object(e):
+ assert e['@type'] == 'VideoObject'
+ info.update({
+ 'url': e.get('contentUrl'),
+ 'title': unescapeHTML(e.get('name')),
+ 'description': unescapeHTML(e.get('description')),
+ 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
+ 'duration': parse_duration(e.get('duration')),
+ 'timestamp': unified_timestamp(e.get('uploadDate')),
+ 'filesize': float_or_none(e.get('contentSize')),
+ 'tbr': int_or_none(e.get('bitrate')),
+ 'width': int_or_none(e.get('width')),
+ 'height': int_or_none(e.get('height')),
+ 'view_count': int_or_none(e.get('interactionCount')),
+ })
+ extract_interaction_statistic(e)
+
+ for e in json_ld:
+ if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):
+ item_type = e.get('@type')
+ if expected_type is not None and expected_type != item_type:
+ return info
+ if item_type in ('TVEpisode', 'Episode'):
+ info.update({
+ 'episode': unescapeHTML(e.get('name')),
+ 'episode_number': int_or_none(e.get('episodeNumber')),
+ 'description': unescapeHTML(e.get('description')),
+ })
+ part_of_season = e.get('partOfSeason')
+ if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
+ info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
+ part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
+ if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
+ info['series'] = unescapeHTML(part_of_series.get('name'))
+ elif item_type in ('Article', 'NewsArticle'):
+ info.update({
+ 'timestamp': parse_iso8601(e.get('datePublished')),
+ 'title': unescapeHTML(e.get('headline')),
+ 'description': unescapeHTML(e.get('articleBody')),
+ })
+ elif item_type == 'VideoObject':
+ extract_video_object(e)
+ continue
+ video = e.get('video')
+ if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+ extract_video_object(video)
+ break
+ return dict((k, v) for k, v in info.items() if v is not None)
+
+ @staticmethod
+ def _hidden_inputs(html):
+ html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
+ hidden_inputs = {}
+ for input in re.findall(r'(?i)(<input[^>]+>)', html):
+ attrs = extract_attributes(input)
+ if not input:
+ continue
+ if attrs.get('type') not in ('hidden', 'submit'):
+ continue
+ name = attrs.get('name') or attrs.get('id')
+ value = attrs.get('value')
+ if name and value is not None:
+ hidden_inputs[name] = value
+ return hidden_inputs
+
+ def _form_hidden_inputs(self, form_id, html):
+ form = self._search_regex(
+ r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
+ html, '%s form' % form_id, group='form')
+ return self._hidden_inputs(form)
+
+ def _sort_formats(self, formats, field_preference=None):
+ if not formats:
+ raise ExtractorError('No video formats found')
+
+ for f in formats:
+ # Automatically determine tbr when missing based on abr and vbr (improves
+ # formats sorting in some cases)
+ if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None:
+ f['tbr'] = f['abr'] + f['vbr']
+
+ def _formats_key(f):
+ # TODO remove the following workaround
+ from ..utils import determine_ext
+ if not f.get('ext') and 'url' in f:
+ f['ext'] = determine_ext(f['url'])
+
+ if isinstance(field_preference, (list, tuple)):
+ return tuple(
+ f.get(field)
+ if f.get(field) is not None
+ else ('' if field == 'format_id' else -1)
+ for field in field_preference)
+
+ preference = f.get('preference')
+ if preference is None:
+ preference = 0
+ if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
+ preference -= 0.5
+
+ protocol = f.get('protocol') or determine_protocol(f)
+ proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1)
+
+ if f.get('vcodec') == 'none': # audio only
+ preference -= 50
+ if self._downloader.params.get('prefer_free_formats'):
+ ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
+ else:
+ ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
+ ext_preference = 0
+ try:
+ audio_ext_preference = ORDER.index(f['ext'])
+ except ValueError:
+ audio_ext_preference = -1
+ else:
+ if f.get('acodec') == 'none': # video only
+ preference -= 40
+ if self._downloader.params.get('prefer_free_formats'):
+ ORDER = ['flv', 'mp4', 'webm']
+ else:
+ ORDER = ['webm', 'flv', 'mp4']
+ try:
+ ext_preference = ORDER.index(f['ext'])
+ except ValueError:
+ ext_preference = -1
+ audio_ext_preference = 0
+
+ return (
+ preference,
+ f.get('language_preference') if f.get('language_preference') is not None else -1,
+ f.get('quality') if f.get('quality') is not None else -1,
+ f.get('tbr') if f.get('tbr') is not None else -1,
+ f.get('filesize') if f.get('filesize') is not None else -1,
+ f.get('vbr') if f.get('vbr') is not None else -1,
+ f.get('height') if f.get('height') is not None else -1,
+ f.get('width') if f.get('width') is not None else -1,
+ proto_preference,
+ ext_preference,
+ f.get('abr') if f.get('abr') is not None else -1,
+ audio_ext_preference,
+ f.get('fps') if f.get('fps') is not None else -1,
+ f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
+ f.get('source_preference') if f.get('source_preference') is not None else -1,
+ f.get('format_id') if f.get('format_id') is not None else '',
+ )
+ formats.sort(key=_formats_key)
+
+ def _check_formats(self, formats, video_id):
+ if formats:
+ formats[:] = filter(
+ lambda f: self._is_valid_url(
+ f['url'], video_id,
+ item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
+ formats)
+
+ @staticmethod
+ def _remove_duplicate_formats(formats):
+ format_urls = set()
+ unique_formats = []
+ for f in formats:
+ if f['url'] not in format_urls:
+ format_urls.add(f['url'])
+ unique_formats.append(f)
+ formats[:] = unique_formats
+
+ def _is_valid_url(self, url, video_id, item='video', headers={}):
+ url = self._proto_relative_url(url, scheme='http:')
+ # For now assume non HTTP(S) URLs always valid
+ if not (url.startswith('http://') or url.startswith('https://')):
+ return True
+ try:
+ self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
+ return True
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_urllib_error.URLError):
+ self.to_screen(
+ '%s: %s URL is invalid, skipping' % (video_id, item))
+ return False
+ raise
+
+ def http_scheme(self):
+ """ Either "http:" or "https:", depending on the user's preferences """
+ return (
+ 'http:'
+ if self._downloader.params.get('prefer_insecure', False)
+ else 'https:')
+
+ def _proto_relative_url(self, url, scheme=None):
+ if url is None:
+ return url
+ if url.startswith('//'):
+ if scheme is None:
+ scheme = self.http_scheme()
+ return scheme + url
+ else:
+ return url
+
+ def _sleep(self, timeout, video_id, msg_template=None):
+ if msg_template is None:
+ msg_template = '%(video_id)s: Waiting for %(timeout)s seconds'
+ msg = msg_template % {'video_id': video_id, 'timeout': timeout}
+ self.to_screen(msg)
+ time.sleep(timeout)
+
+ def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
+ transform_source=lambda s: fix_xml_ampersands(s).strip(),
+ fatal=True, m3u8_id=None):
+ manifest = self._download_xml(
+ manifest_url, video_id, 'Downloading f4m manifest',
+ 'Unable to download f4m manifest',
+ # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
+ # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244)
+ transform_source=transform_source,
+ fatal=fatal)
+
+ if manifest is False:
+ return []
+
+ return self._parse_f4m_formats(
+ manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
+ transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
+
+ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
+ transform_source=lambda s: fix_xml_ampersands(s).strip(),
+ fatal=True, m3u8_id=None):
+ # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
+ akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
+ if akamai_pv is not None and ';' in akamai_pv.text:
+ playerVerificationChallenge = akamai_pv.text.split(';')[0]
+ if playerVerificationChallenge.strip() != '':
+ return []
+
+ formats = []
+ manifest_version = '1.0'
+ media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
+ if not media_nodes:
+ manifest_version = '2.0'
+ media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
+ # Remove unsupported DRM protected media from final formats
+ # rendition (see https://github.com/rg3/youtube-dl/issues/8573).
+ media_nodes = remove_encrypted_media(media_nodes)
+ if not media_nodes:
+ return formats
+
+ manifest_base_url = get_base_url(manifest)
+
+ bootstrap_info = xpath_element(
+ manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
+ 'bootstrap info', default=None)
+
+ vcodec = None
+ mime_type = xpath_text(
+ manifest, ['{http://ns.adobe.com/f4m/1.0}mimeType', '{http://ns.adobe.com/f4m/2.0}mimeType'],
+ 'base URL', default=None)
+ if mime_type and mime_type.startswith('audio/'):
+ vcodec = 'none'
+
+ for i, media_el in enumerate(media_nodes):
+ tbr = int_or_none(media_el.attrib.get('bitrate'))
+ width = int_or_none(media_el.attrib.get('width'))
+ height = int_or_none(media_el.attrib.get('height'))
+ format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)]))
+ # If <bootstrapInfo> is present, the specified f4m is a
+ # stream-level manifest, and only set-level manifests may refer to
+ # external resources. See section 11.4 and section 4 of F4M spec
+ if bootstrap_info is None:
+ media_url = None
+ # @href is introduced in 2.0, see section 11.6 of F4M spec
+ if manifest_version == '2.0':
+ media_url = media_el.attrib.get('href')
+ if media_url is None:
+ media_url = media_el.attrib.get('url')
+ if not media_url:
+ continue
+ manifest_url = (
+ media_url if media_url.startswith('http://') or media_url.startswith('https://')
+ else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
+ # If media_url is itself a f4m manifest do the recursive extraction
+ # since bitrates in parent manifest (this one) and media_url manifest
+ # may differ leading to inability to resolve the format by requested
+ # bitrate in f4m downloader
+ ext = determine_ext(manifest_url)
+ if ext == 'f4m':
+ f4m_formats = self._extract_f4m_formats(
+ manifest_url, video_id, preference=preference, f4m_id=f4m_id,
+ transform_source=transform_source, fatal=fatal)
+ # Sometimes stream-level manifest contains single media entry that
+ # does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
+ # At the same time parent's media entry in set-level manifest may
+ # contain it. We will copy it from parent in such cases.
+ if len(f4m_formats) == 1:
+ f = f4m_formats[0]
+ f.update({
+ 'tbr': f.get('tbr') or tbr,
+ 'width': f.get('width') or width,
+ 'height': f.get('height') or height,
+ 'format_id': f.get('format_id') if not tbr else format_id,
+ 'vcodec': vcodec,
+ })
+ formats.extend(f4m_formats)
+ continue
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4', preference=preference,
+ m3u8_id=m3u8_id, fatal=fatal))
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': manifest_url,
+ 'manifest_url': manifest_url,
+ 'ext': 'flv' if bootstrap_info is not None else None,
+ 'protocol': 'f4m',
+ 'tbr': tbr,
+ 'width': width,
+ 'height': height,
+ 'vcodec': vcodec,
+ 'preference': preference,
+ })
+ return formats
+
+ def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None):
+ return {
+ 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
+ 'url': m3u8_url,
+ 'ext': ext,
+ 'protocol': 'm3u8',
+ 'preference': preference - 100 if preference else -100,
+ 'resolution': 'multiple',
+ 'format_note': 'Quality selection URL',
+ }
+
+ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
+ entry_protocol='m3u8', preference=None,
+ m3u8_id=None, note=None, errnote=None,
+ fatal=True, live=False):
+ res = self._download_webpage_handle(
+ m3u8_url, video_id,
+ note=note or 'Downloading m3u8 information',
+ errnote=errnote or 'Failed to download m3u8 information',
+ fatal=fatal)
+
+ if res is False:
+ return []
+
+ m3u8_doc, urlh = res
+ m3u8_url = urlh.geturl()
+
+ return self._parse_m3u8_formats(
+ m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
+ preference=preference, m3u8_id=m3u8_id, live=live)
+
+ def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
+ entry_protocol='m3u8', preference=None,
+ m3u8_id=None, live=False):
+ if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
+ return []
+
+ if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay
+ return []
+
+ formats = []
+
+ format_url = lambda u: (
+ u
+ if re.match(r'^https?://', u)
+ else compat_urlparse.urljoin(m3u8_url, u))
+
+ # References:
+ # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
+ # 2. https://github.com/rg3/youtube-dl/issues/12211
+
+ # We should try extracting formats only from master playlists [1, 4.3.4],
+ # i.e. playlists that describe available qualities. On the other hand
+ # media playlists [1, 4.3.3] should be returned as is since they contain
+ # just the media without qualities renditions.
+ # Fortunately, master playlist can be easily distinguished from media
+ # playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
+ # master playlist tags MUST NOT appear in a media playist and vice versa.
+ # As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
+ # media playlist and MUST NOT appear in master playlist thus we can
+ # clearly detect media playlist with this criterion.
+
+ if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
+ return [{
+ 'url': m3u8_url,
+ 'format_id': m3u8_id,
+ 'ext': ext,
+ 'protocol': entry_protocol,
+ 'preference': preference,
+ }]
+
+ groups = {}
+ last_stream_inf = {}
+
+ def extract_media(x_media_line):
+ media = parse_m3u8_attributes(x_media_line)
+ # As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
+ media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
+ if not (media_type and group_id and name):
+ return
+ groups.setdefault(group_id, []).append(media)
+ if media_type not in ('VIDEO', 'AUDIO'):
+ return
+ media_url = media.get('URI')
+ if media_url:
+ format_id = []
+ for v in (m3u8_id, group_id, name):
+ if v:
+ format_id.append(v)
+ f = {
+ 'format_id': '-'.join(format_id),
+ 'url': format_url(media_url),
+ 'manifest_url': m3u8_url,
+ 'language': media.get('LANGUAGE'),
+ 'ext': ext,
+ 'protocol': entry_protocol,
+ 'preference': preference,
+ }
+ if media_type == 'AUDIO':
+ f['vcodec'] = 'none'
+ formats.append(f)
+
+ def build_stream_name():
+ # Despite specification does not mention NAME attribute for
+ # EXT-X-STREAM-INF tag it still sometimes may be present (see [1]
+ # or vidio test in TestInfoExtractor.test_parse_m3u8_formats)
+ # 1. http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
+ stream_name = last_stream_inf.get('NAME')
+ if stream_name:
+ return stream_name
+ # If there is no NAME in EXT-X-STREAM-INF it will be obtained
+ # from corresponding rendition group
+ stream_group_id = last_stream_inf.get('VIDEO')
+ if not stream_group_id:
+ return
+ stream_group = groups.get(stream_group_id)
+ if not stream_group:
+ return stream_group_id
+ rendition = stream_group[0]
+ return rendition.get('NAME') or stream_group_id
+
+ for line in m3u8_doc.splitlines():
+ if line.startswith('#EXT-X-STREAM-INF:'):
+ last_stream_inf = parse_m3u8_attributes(line)
+ elif line.startswith('#EXT-X-MEDIA:'):
+ extract_media(line)
+ elif line.startswith('#') or not line.strip():
+ continue
+ else:
+ tbr = float_or_none(
+ last_stream_inf.get('AVERAGE-BANDWIDTH') or
+ last_stream_inf.get('BANDWIDTH'), scale=1000)
+ format_id = []
+ if m3u8_id:
+ format_id.append(m3u8_id)
+ stream_name = build_stream_name()
+ # Bandwidth of live streams may differ over time thus making
+ # format_id unpredictable. So it's better to keep provided
+ # format_id intact.
+ if not live:
+ format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
+ manifest_url = format_url(line.strip())
+ f = {
+ 'format_id': '-'.join(format_id),
+ 'url': manifest_url,
+ 'manifest_url': m3u8_url,
+ 'tbr': tbr,
+ 'ext': ext,
+ 'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
+ 'protocol': entry_protocol,
+ 'preference': preference,
+ }
+ resolution = last_stream_inf.get('RESOLUTION')
+ if resolution:
+ mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
+ if mobj:
+ f['width'] = int(mobj.group('width'))
+ f['height'] = int(mobj.group('height'))
+ # Unified Streaming Platform
+ mobj = re.search(
+ r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
+ if mobj:
+ abr, vbr = mobj.groups()
+ abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
+ f.update({
+ 'vbr': vbr,
+ 'abr': abr,
+ })
+ codecs = parse_codecs(last_stream_inf.get('CODECS'))
+ f.update(codecs)
+ audio_group_id = last_stream_inf.get('AUDIO')
+ # As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
+ # references a rendition group MUST have a CODECS attribute.
+ # However, this is not always respected, for example, [2]
+ # contains EXT-X-STREAM-INF tag which references AUDIO
+ # rendition group but does not have CODECS and despite
+ # referencing audio group an audio group, it represents
+ # a complete (with audio and video) format. So, for such cases
+ # we will ignore references to rendition groups and treat them
+ # as complete formats.
+ if audio_group_id and codecs and f.get('vcodec') != 'none':
+ audio_group = groups.get(audio_group_id)
+ if audio_group and audio_group[0].get('URI'):
+ # TODO: update acodec for audio only formats with
+ # the same GROUP-ID
+ f['acodec'] = 'none'
+ formats.append(f)
+ last_stream_inf = {}
+ return formats
+
+ @staticmethod
+ def _xpath_ns(path, namespace=None):
+ if not namespace:
+ return path
+ out = []
+ for c in path.split('/'):
+ if not c or c == '.':
+ out.append(c)
+ else:
+ out.append('{%s}%s' % (namespace, c))
+ return '/'.join(out)
+
+ def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
+ smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
+
+ if smil is False:
+ assert not fatal
+ return []
+
+ namespace = self._parse_smil_namespace(smil)
+
+ return self._parse_smil_formats(
+ smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
+
+ def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
+ smil = self._download_smil(smil_url, video_id, fatal=fatal)
+ if smil is False:
+ return {}
+ return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
+
+ def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
+ return self._download_xml(
+ smil_url, video_id, 'Downloading SMIL file',
+ 'Unable to download SMIL file', fatal=fatal, transform_source=transform_source)
+
+ def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
+ namespace = self._parse_smil_namespace(smil)
+
+ formats = self._parse_smil_formats(
+ smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
+ subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
+
+ video_id = os.path.splitext(url_basename(smil_url))[0]
+ title = None
+ description = None
+ upload_date = None
+ for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
+ name = meta.attrib.get('name')
+ content = meta.attrib.get('content')
+ if not name or not content:
+ continue
+ if not title and name == 'title':
+ title = content
+ elif not description and name in ('description', 'abstract'):
+ description = content
+ elif not upload_date and name == 'date':
+ upload_date = unified_strdate(content)
+
+ thumbnails = [{
+ 'id': image.get('type'),
+ 'url': image.get('src'),
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ } for image in smil.findall(self._xpath_ns('.//image', namespace)) if image.get('src')]
+
+ return {
+ 'id': video_id,
+ 'title': title or video_id,
+ 'description': description,
+ 'upload_date': upload_date,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _parse_smil_namespace(self, smil):
+ return self._search_regex(
+ r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
+
+ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+ base = smil_url
+ for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
+ b = meta.get('base') or meta.get('httpBase')
+ if b:
+ base = b
+ break
+
+ formats = []
+ rtmp_count = 0
+ http_count = 0
+ m3u8_count = 0
+
+ srcs = []
+ media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
+ for medium in media:
+ src = medium.get('src')
+ if not src or src in srcs:
+ continue
+ srcs.append(src)
+
+ bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
+ filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
+ width = int_or_none(medium.get('width'))
+ height = int_or_none(medium.get('height'))
+ proto = medium.get('proto')
+ ext = medium.get('ext')
+ src_ext = determine_ext(src)
+ streamer = medium.get('streamer') or base
+
+ if proto == 'rtmp' or streamer.startswith('rtmp'):
+ rtmp_count += 1
+ formats.append({
+ 'url': streamer,
+ 'play_path': src,
+ 'ext': 'flv',
+ 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
+ 'tbr': bitrate,
+ 'filesize': filesize,
+ 'width': width,
+ 'height': height,
+ })
+ if transform_rtmp_url:
+ streamer, src = transform_rtmp_url(streamer, src)
+ formats[-1].update({
+ 'url': streamer,
+ 'play_path': src,
+ })
+ continue
+
+ src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
+ src_url = src_url.strip()
+
+ if proto == 'm3u8' or src_ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
+ if len(m3u8_formats) == 1:
+ m3u8_count += 1
+ m3u8_formats[0].update({
+ 'format_id': 'hls-%d' % (m3u8_count if bitrate is None else bitrate),
+ 'tbr': bitrate,
+ 'width': width,
+ 'height': height,
+ })
+ formats.extend(m3u8_formats)
+ continue
+
+ if src_ext == 'f4m':
+ f4m_url = src_url
+ if not f4m_params:
+ f4m_params = {
+ 'hdcore': '3.2.0',
+ 'plugin': 'flowplayer-3.2.0.1',
+ }
+ f4m_url += '&' if '?' in f4m_url else '?'
+ f4m_url += compat_urllib_parse_urlencode(f4m_params)
+ formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
+ continue
+
+ if src_url.startswith('http') and self._is_valid_url(src, video_id):
+ http_count += 1
+ formats.append({
+ 'url': src_url,
+ 'ext': ext or src_ext or 'flv',
+ 'format_id': 'http-%d' % (bitrate or http_count),
+ 'tbr': bitrate,
+ 'filesize': filesize,
+ 'width': width,
+ 'height': height,
+ })
+ continue
+
+ return formats
+
+ def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
+ urls = []
+ subtitles = {}
+ for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
+ src = textstream.get('src')
+ if not src or src in urls:
+ continue
+ urls.append(src)
+ ext = textstream.get('ext') or mimetype2ext(textstream.get('type')) or determine_ext(src)
+ lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
+ subtitles.setdefault(lang, []).append({
+ 'url': src,
+ 'ext': ext,
+ })
+ return subtitles
+
+ def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
+ xspf = self._download_xml(
+ xspf_url, playlist_id, 'Downloading xpsf playlist',
+ 'Unable to download xspf manifest', fatal=fatal)
+ if xspf is False:
+ return []
+ return self._parse_xspf(
+ xspf, playlist_id, xspf_url=xspf_url,
+ xspf_base_url=base_url(xspf_url))
+
+ def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
+ NS_MAP = {
+ 'xspf': 'http://xspf.org/ns/0/',
+ 's1': 'http://static.streamone.nl/player/ns/0',
+ }
+
+ entries = []
+ for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
+ title = xpath_text(
+ track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
+ description = xpath_text(
+ track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
+ thumbnail = xpath_text(
+ track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
+ duration = float_or_none(
+ xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
+
+ formats = []
+ for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
+ format_url = urljoin(xspf_base_url, location.text)
+ if not format_url:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'manifest_url': xspf_url,
+ 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
+ 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
+ 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
+ })
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': playlist_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ })
+ return entries
+
+ def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
+ res = self._download_xml_handle(
+ mpd_url, video_id,
+ note=note or 'Downloading MPD manifest',
+ errnote=errnote or 'Failed to download MPD manifest',
+ fatal=fatal)
+ if res is False:
+ return []
+ mpd_doc, urlh = res
+ mpd_base_url = base_url(urlh.geturl())
+
+ return self._parse_mpd_formats(
+ mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
+ formats_dict=formats_dict, mpd_url=mpd_url)
+
+ def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
+ """
+ Parse formats from MPD manifest.
+ References:
+ 1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E),
+ http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
+ 2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
+ """
+ if mpd_doc.get('type') == 'dynamic':
+ return []
+
+ namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
+
+ def _add_ns(path):
+ return self._xpath_ns(path, namespace)
+
+ def is_drm_protected(element):
+ return element.find(_add_ns('ContentProtection')) is not None
+
+ def extract_multisegment_info(element, ms_parent_info):
+ ms_info = ms_parent_info.copy()
+
+ # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
+ # common attributes and elements. We will only extract relevant
+ # for us.
+ def extract_common(source):
+ segment_timeline = source.find(_add_ns('SegmentTimeline'))
+ if segment_timeline is not None:
+ s_e = segment_timeline.findall(_add_ns('S'))
+ if s_e:
+ ms_info['total_number'] = 0
+ ms_info['s'] = []
+ for s in s_e:
+ r = int(s.get('r', 0))
+ ms_info['total_number'] += 1 + r
+ ms_info['s'].append({
+ 't': int(s.get('t', 0)),
+ # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
+ 'd': int(s.attrib['d']),
+ 'r': r,
+ })
+ start_number = source.get('startNumber')
+ if start_number:
+ ms_info['start_number'] = int(start_number)
+ timescale = source.get('timescale')
+ if timescale:
+ ms_info['timescale'] = int(timescale)
+ segment_duration = source.get('duration')
+ if segment_duration:
+ ms_info['segment_duration'] = float(segment_duration)
+
+ def extract_Initialization(source):
+ initialization = source.find(_add_ns('Initialization'))
+ if initialization is not None:
+ ms_info['initialization_url'] = initialization.attrib['sourceURL']
+
+ segment_list = element.find(_add_ns('SegmentList'))
+ if segment_list is not None:
+ extract_common(segment_list)
+ extract_Initialization(segment_list)
+ segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
+ if segment_urls_e:
+ ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
+ else:
+ segment_template = element.find(_add_ns('SegmentTemplate'))
+ if segment_template is not None:
+ extract_common(segment_template)
+ media = segment_template.get('media')
+ if media:
+ ms_info['media'] = media
+ initialization = segment_template.get('initialization')
+ if initialization:
+ ms_info['initialization'] = initialization
+ else:
+ extract_Initialization(segment_template)
+ return ms_info
+
+ mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
+ formats = []
+ for period in mpd_doc.findall(_add_ns('Period')):
+ period_duration = parse_duration(period.get('duration')) or mpd_duration
+ period_ms_info = extract_multisegment_info(period, {
+ 'start_number': 1,
+ 'timescale': 1,
+ })
+ for adaptation_set in period.findall(_add_ns('AdaptationSet')):
+ if is_drm_protected(adaptation_set):
+ continue
+ adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
+ for representation in adaptation_set.findall(_add_ns('Representation')):
+ if is_drm_protected(representation):
+ continue
+ representation_attrib = adaptation_set.attrib.copy()
+ representation_attrib.update(representation.attrib)
+ # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
+ mime_type = representation_attrib['mimeType']
+ content_type = mime_type.split('/')[0]
+ if content_type == 'text':
+ # TODO implement WebVTT downloading
+ pass
+ elif content_type in ('video', 'audio'):
+ base_url = ''
+ for element in (representation, adaptation_set, period, mpd_doc):
+ base_url_e = element.find(_add_ns('BaseURL'))
+ if base_url_e is not None:
+ base_url = base_url_e.text + base_url
+ if re.match(r'^https?://', base_url):
+ break
+ if mpd_base_url and not re.match(r'^https?://', base_url):
+ if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
+ mpd_base_url += '/'
+ base_url = mpd_base_url + base_url
+ representation_id = representation_attrib.get('id')
+ lang = representation_attrib.get('lang')
+ url_el = representation.find(_add_ns('BaseURL'))
+ filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
+ bandwidth = int_or_none(representation_attrib.get('bandwidth'))
+ f = {
+ 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
+ 'url': base_url,
+ 'manifest_url': mpd_url,
+ 'ext': mimetype2ext(mime_type),
+ 'width': int_or_none(representation_attrib.get('width')),
+ 'height': int_or_none(representation_attrib.get('height')),
+ 'tbr': float_or_none(bandwidth, 1000),
+ 'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
+ 'fps': int_or_none(representation_attrib.get('frameRate')),
+ 'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
+ 'format_note': 'DASH %s' % content_type,
+ 'filesize': filesize,
+ 'container': mimetype2ext(mime_type) + '_dash',
+ }
+ f.update(parse_codecs(representation_attrib.get('codecs')))
+ representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
+
+ def prepare_template(template_name, identifiers):
+ tmpl = representation_ms_info[template_name]
+ # First of, % characters outside $...$ templates
+ # must be escaped by doubling for proper processing
+ # by % operator string formatting used further (see
+ # https://github.com/rg3/youtube-dl/issues/16867).
+ t = ''
+ in_template = False
+ for c in tmpl:
+ t += c
+ if c == '$':
+ in_template = not in_template
+ elif c == '%' and not in_template:
+ t += c
+ # Next, $...$ templates are translated to their
+ # %(...) counterparts to be used with % operator
+ t = t.replace('$RepresentationID$', representation_id)
+ t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
+ t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
+ t.replace('$$', '$')
+ return t
+
+ # @initialization is a regular template like @media one
+ # so it should be handled just the same way (see
+ # https://github.com/rg3/youtube-dl/issues/11605)
+ if 'initialization' in representation_ms_info:
+ initialization_template = prepare_template(
+ 'initialization',
+ # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
+ # $Time$ shall not be included for @initialization thus
+ # only $Bandwidth$ remains
+ ('Bandwidth', ))
+ representation_ms_info['initialization_url'] = initialization_template % {
+ 'Bandwidth': bandwidth,
+ }
+
+ def location_key(location):
+ return 'url' if re.match(r'^https?://', location) else 'path'
+
+ if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
+
+ media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
+ media_location_key = location_key(media_template)
+
+ # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
+ # can't be used at the same time
+ if '%(Number' in media_template and 's' not in representation_ms_info:
+ segment_duration = None
+ if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
+ segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
+ representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
+ representation_ms_info['fragments'] = [{
+ media_location_key: media_template % {
+ 'Number': segment_number,
+ 'Bandwidth': bandwidth,
+ },
+ 'duration': segment_duration,
+ } for segment_number in range(
+ representation_ms_info['start_number'],
+ representation_ms_info['total_number'] + representation_ms_info['start_number'])]
+ else:
+ # $Number*$ or $Time$ in media template with S list available
+ # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
+ # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
+ representation_ms_info['fragments'] = []
+ segment_time = 0
+ segment_d = None
+ segment_number = representation_ms_info['start_number']
+
+ def add_segment_url():
+ segment_url = media_template % {
+ 'Time': segment_time,
+ 'Bandwidth': bandwidth,
+ 'Number': segment_number,
+ }
+ representation_ms_info['fragments'].append({
+ media_location_key: segment_url,
+ 'duration': float_or_none(segment_d, representation_ms_info['timescale']),
+ })
+
+ for num, s in enumerate(representation_ms_info['s']):
+ segment_time = s.get('t') or segment_time
+ segment_d = s['d']
+ add_segment_url()
+ segment_number += 1
+ for r in range(s.get('r', 0)):
+ segment_time += segment_d
+ add_segment_url()
+ segment_number += 1
+ segment_time += segment_d
+ elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
+ # No media template
+ # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
+ # or any YouTube dashsegments video
+ fragments = []
+ segment_index = 0
+ timescale = representation_ms_info['timescale']
+ for s in representation_ms_info['s']:
+ duration = float_or_none(s['d'], timescale)
+ for r in range(s.get('r', 0) + 1):
+ segment_uri = representation_ms_info['segment_urls'][segment_index]
+ fragments.append({
+ location_key(segment_uri): segment_uri,
+ 'duration': duration,
+ })
+ segment_index += 1
+ representation_ms_info['fragments'] = fragments
+ elif 'segment_urls' in representation_ms_info:
+ # Segment URLs with no SegmentTimeline
+ # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
+ # https://github.com/rg3/youtube-dl/pull/14844
+ fragments = []
+ segment_duration = float_or_none(
+ representation_ms_info['segment_duration'],
+ representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
+ for segment_url in representation_ms_info['segment_urls']:
+ fragment = {
+ location_key(segment_url): segment_url,
+ }
+ if segment_duration:
+ fragment['duration'] = segment_duration
+ fragments.append(fragment)
+ representation_ms_info['fragments'] = fragments
+ # NB: MPD manifest may contain direct URLs to unfragmented media.
+ # No fragments key is present in this case.
+ if 'fragments' in representation_ms_info:
+ f.update({
+ 'fragment_base_url': base_url,
+ 'fragments': [],
+ 'protocol': 'http_dash_segments',
+ })
+ if 'initialization_url' in representation_ms_info:
+ initialization_url = representation_ms_info['initialization_url']
+ if not f.get('url'):
+ f['url'] = initialization_url
+ f['fragments'].append({location_key(initialization_url): initialization_url})
+ f['fragments'].extend(representation_ms_info['fragments'])
+ # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
+ # is not necessarily unique within a Period thus formats with
+ # the same `format_id` are quite possible. There are numerous examples
+ # of such manifests (see https://github.com/rg3/youtube-dl/issues/15111,
+ # https://github.com/rg3/youtube-dl/issues/13919)
+ full_info = formats_dict.get(representation_id, {}).copy()
+ full_info.update(f)
+ formats.append(full_info)
+ else:
+ self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
+ return formats
+
+ def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
+ res = self._download_xml_handle(
+ ism_url, video_id,
+ note=note or 'Downloading ISM manifest',
+ errnote=errnote or 'Failed to download ISM manifest',
+ fatal=fatal)
+ if res is False:
+ return []
+ ism_doc, urlh = res
+
+ return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
+
+ def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
+ """
+ Parse formats from ISM manifest.
+ References:
+ 1. [MS-SSTR]: Smooth Streaming Protocol,
+ https://msdn.microsoft.com/en-us/library/ff469518.aspx
+ """
+ if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
+ return []
+
+ duration = int(ism_doc.attrib['Duration'])
+ timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
+
+ formats = []
+ for stream in ism_doc.findall('StreamIndex'):
+ stream_type = stream.get('Type')
+ if stream_type not in ('video', 'audio'):
+ continue
+ url_pattern = stream.attrib['Url']
+ stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
+ stream_name = stream.get('Name')
+ for track in stream.findall('QualityLevel'):
+ fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
+ # TODO: add support for WVC1 and WMAP
+ if fourcc not in ('H264', 'AVC1', 'AACL'):
+ self.report_warning('%s is not a supported codec' % fourcc)
+ continue
+ tbr = int(track.attrib['Bitrate']) // 1000
+ # [1] does not mention Width and Height attributes. However,
+ # they're often present while MaxWidth and MaxHeight are
+ # missing, so should be used as fallbacks
+ width = int_or_none(track.get('MaxWidth') or track.get('Width'))
+ height = int_or_none(track.get('MaxHeight') or track.get('Height'))
+ sampling_rate = int_or_none(track.get('SamplingRate'))
+
+ track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
+ track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
+
+ fragments = []
+ fragment_ctx = {
+ 'time': 0,
+ }
+ stream_fragments = stream.findall('c')
+ for stream_fragment_index, stream_fragment in enumerate(stream_fragments):
+ fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time']
+ fragment_repeat = int_or_none(stream_fragment.get('r')) or 1
+ fragment_ctx['duration'] = int_or_none(stream_fragment.get('d'))
+ if not fragment_ctx['duration']:
+ try:
+ next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t'])
+ except IndexError:
+ next_fragment_time = duration
+ fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
+ for _ in range(fragment_repeat):
+ fragments.append({
+ 'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
+ 'duration': fragment_ctx['duration'] / stream_timescale,
+ })
+ fragment_ctx['time'] += fragment_ctx['duration']
+
+ format_id = []
+ if ism_id:
+ format_id.append(ism_id)
+ if stream_name:
+ format_id.append(stream_name)
+ format_id.append(compat_str(tbr))
+
+ formats.append({
+ 'format_id': '-'.join(format_id),
+ 'url': ism_url,
+ 'manifest_url': ism_url,
+ 'ext': 'ismv' if stream_type == 'video' else 'isma',
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'asr': sampling_rate,
+ 'vcodec': 'none' if stream_type == 'audio' else fourcc,
+ 'acodec': 'none' if stream_type == 'video' else fourcc,
+ 'protocol': 'ism',
+ 'fragments': fragments,
+ '_download_params': {
+ 'duration': duration,
+ 'timescale': stream_timescale,
+ 'width': width or 0,
+ 'height': height or 0,
+ 'fourcc': fourcc,
+ 'codec_private_data': track.get('CodecPrivateData'),
+ 'sampling_rate': sampling_rate,
+ 'channels': int_or_none(track.get('Channels', 2)),
+ 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
+ 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
+ },
+ })
+ return formats
+
+ def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
+ def absolute_url(item_url):
+ return urljoin(base_url, item_url)
+
+ def parse_content_type(content_type):
+ if not content_type:
+ return {}
+ ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
+ if ctr:
+ mimetype, codecs = ctr.groups()
+ f = parse_codecs(codecs)
+ f['ext'] = mimetype2ext(mimetype)
+ return f
+ return {}
+
+ def _media_formats(src, cur_media_type, type_info={}):
+ full_url = absolute_url(src)
+ ext = type_info.get('ext') or determine_ext(full_url)
+ if ext == 'm3u8':
+ is_plain_url = False
+ formats = self._extract_m3u8_formats(
+ full_url, video_id, ext='mp4',
+ entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
+ preference=preference, fatal=False)
+ elif ext == 'mpd':
+ is_plain_url = False
+ formats = self._extract_mpd_formats(
+ full_url, video_id, mpd_id=mpd_id, fatal=False)
+ else:
+ is_plain_url = True
+ formats = [{
+ 'url': full_url,
+ 'vcodec': 'none' if cur_media_type == 'audio' else None,
+ }]
+ return is_plain_url, formats
+
+ entries = []
+ # amp-video and amp-audio are very similar to their HTML5 counterparts
+ # so we wll include them right here (see
+ # https://www.ampproject.org/docs/reference/components/amp-video)
+ media_tags = [(media_tag, media_type, '')
+ for media_tag, media_type
+ in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)]
+ media_tags.extend(re.findall(
+ # We only allow video|audio followed by a whitespace or '>'.
+ # Allowing more characters may end up in significant slow down (see
+ # https://github.com/rg3/youtube-dl/issues/11979, example URL:
+ # http://www.porntrex.com/maps/videositemap.xml).
+ r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
+ for media_tag, media_type, media_content in media_tags:
+ media_info = {
+ 'formats': [],
+ 'subtitles': {},
+ }
+ media_attributes = extract_attributes(media_tag)
+ src = media_attributes.get('src')
+ if src:
+ _, formats = _media_formats(src, media_type)
+ media_info['formats'].extend(formats)
+ media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
+ if media_content:
+ for source_tag in re.findall(r'<source[^>]+>', media_content):
+ source_attributes = extract_attributes(source_tag)
+ src = source_attributes.get('src')
+ if not src:
+ continue
+ f = parse_content_type(source_attributes.get('type'))
+ is_plain_url, formats = _media_formats(src, media_type, f)
+ if is_plain_url:
+ # res attribute is not standard but seen several times
+ # in the wild
+ f.update({
+ 'height': int_or_none(source_attributes.get('res')),
+ 'format_id': source_attributes.get('label'),
+ })
+ f.update(formats[0])
+ media_info['formats'].append(f)
+ else:
+ media_info['formats'].extend(formats)
+ for track_tag in re.findall(r'<track[^>]+>', media_content):
+ track_attributes = extract_attributes(track_tag)
+ kind = track_attributes.get('kind')
+ if not kind or kind in ('subtitles', 'captions'):
+ src = track_attributes.get('src')
+ if not src:
+ continue
+ lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
+ media_info['subtitles'].setdefault(lang, []).append({
+ 'url': absolute_url(src),
+ })
+ for f in media_info['formats']:
+ f.setdefault('http_headers', {})['Referer'] = base_url
+ if media_info['formats'] or media_info['subtitles']:
+ entries.append(media_info)
+ return entries
+
+ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
+ formats = []
+ hdcore_sign = 'hdcore=3.7.0'
+ f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+ hds_host = hosts.get('hds')
+ if hds_host:
+ f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
+ if 'hdcore=' not in f4m_url:
+ f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
+ f4m_formats = self._extract_f4m_formats(
+ f4m_url, video_id, f4m_id='hds', fatal=False)
+ for entry in f4m_formats:
+ entry.update({'extra_param_to_segment_url': hdcore_sign})
+ formats.extend(f4m_formats)
+ m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
+ hls_host = hosts.get('hls')
+ if hls_host:
+ m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ return formats
+
+ def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
+ query = compat_urlparse.urlparse(url).query
+ url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
+ mobj = re.search(
+ r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
+ url_base = mobj.group('url')
+ http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base)
+ formats = []
+
+ def manifest_url(manifest):
+ m_url = '%s/%s' % (http_base_url, manifest)
+ if query:
+ m_url += '?%s' % query
+ return m_url
+
+ if 'm3u8' not in skip_protocols:
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url('playlist.m3u8'), video_id, 'mp4',
+ m3u8_entry_protocol, m3u8_id='hls', fatal=False))
+ if 'f4m' not in skip_protocols:
+ formats.extend(self._extract_f4m_formats(
+ manifest_url('manifest.f4m'),
+ video_id, f4m_id='hds', fatal=False))
+ if 'dash' not in skip_protocols:
+ formats.extend(self._extract_mpd_formats(
+ manifest_url('manifest.mpd'),
+ video_id, mpd_id='dash', fatal=False))
+ if re.search(r'(?:/smil:|\.smil)', url_base):
+ if 'smil' not in skip_protocols:
+ rtmp_formats = self._extract_smil_formats(
+ manifest_url('jwplayer.smil'),
+ video_id, fatal=False)
+ for rtmp_format in rtmp_formats:
+ rtsp_format = rtmp_format.copy()
+ rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
+ del rtsp_format['play_path']
+ del rtsp_format['ext']
+ rtsp_format.update({
+ 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
+ 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
+ 'protocol': 'rtsp',
+ })
+ formats.extend([rtmp_format, rtsp_format])
+ else:
+ for protocol in ('rtmp', 'rtsp'):
+ if protocol not in skip_protocols:
+ formats.append({
+ 'url': '%s:%s' % (protocol, url_base),
+ 'format_id': protocol,
+ 'protocol': protocol,
+ })
+ return formats
+
+ def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
+ mobj = re.search(
+ r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
+ webpage)
+ if mobj:
+ try:
+ jwplayer_data = self._parse_json(mobj.group('options'),
+ video_id=video_id,
+ transform_source=transform_source)
+ except ExtractorError:
+ pass
+ else:
+ if isinstance(jwplayer_data, dict):
+ return jwplayer_data
+
+ def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
+ jwplayer_data = self._find_jwplayer_data(
+ webpage, video_id, transform_source=js_to_json)
+ return self._parse_jwplayer_data(
+ jwplayer_data, video_id, *args, **kwargs)
+
+ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
+ m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+ # JWPlayer backward compatibility: flattened playlists
+ # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
+ if 'playlist' not in jwplayer_data:
+ jwplayer_data = {'playlist': [jwplayer_data]}
+
+ entries = []
+
+ # JWPlayer backward compatibility: single playlist item
+ # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
+ if not isinstance(jwplayer_data['playlist'], list):
+ jwplayer_data['playlist'] = [jwplayer_data['playlist']]
+
+ for video_data in jwplayer_data['playlist']:
+ # JWPlayer backward compatibility: flattened sources
+ # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
+ if 'sources' not in video_data:
+ video_data['sources'] = [video_data]
+
+ this_video_id = video_id or video_data['mediaid']
+
+ formats = self._parse_jwplayer_formats(
+ video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
+ mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
+
+ subtitles = {}
+ tracks = video_data.get('tracks')
+ if tracks and isinstance(tracks, list):
+ for track in tracks:
+ if not isinstance(track, dict):
+ continue
+ track_kind = track.get('kind')
+ if not track_kind or not isinstance(track_kind, compat_str):
+ continue
+ if track_kind.lower() not in ('captions', 'subtitles'):
+ continue
+ track_url = urljoin(base_url, track.get('file'))
+ if not track_url:
+ continue
+ subtitles.setdefault(track.get('label') or 'en', []).append({
+ 'url': self._proto_relative_url(track_url)
+ })
+
+ entry = {
+ 'id': this_video_id,
+ 'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
+ 'description': video_data.get('description'),
+ 'thumbnail': self._proto_relative_url(video_data.get('image')),
+ 'timestamp': int_or_none(video_data.get('pubdate')),
+ 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
+ 'subtitles': subtitles,
+ }
+ # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
+ if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
+ entry.update({
+ '_type': 'url_transparent',
+ 'url': formats[0]['url'],
+ })
+ else:
+ self._sort_formats(formats)
+ entry['formats'] = formats
+ entries.append(entry)
+ if len(entries) == 1:
+ return entries[0]
+ else:
+ return self.playlist_result(entries)
+
+ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
+ m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+ urls = []
+ formats = []
+ for source in jwplayer_sources_data:
+ if not isinstance(source, dict):
+ continue
+ source_url = self._proto_relative_url(source.get('file'))
+ if not source_url:
+ continue
+ if base_url:
+ source_url = compat_urlparse.urljoin(base_url, source_url)
+ if source_url in urls:
+ continue
+ urls.append(source_url)
+ source_type = source.get('type') or ''
+ ext = mimetype2ext(source_type) or determine_ext(source_url)
+ if source_type == 'hls' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=m3u8_id, fatal=False))
+ elif source_type == 'dash' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ source_url, video_id, mpd_id=mpd_id, fatal=False))
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ source_url, video_id, fatal=False))
+ # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
+ elif source_type.startswith('audio') or ext in (
+ 'oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
+ formats.append({
+ 'url': source_url,
+ 'vcodec': 'none',
+ 'ext': ext,
+ })
+ else:
+ height = int_or_none(source.get('height'))
+ if height is None:
+ # Often no height is provided but there is a label in
+ # format like "1080p", "720p SD", or 1080.
+ height = int_or_none(self._search_regex(
+ r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
+ 'height', default=None))
+ a_format = {
+ 'url': source_url,
+ 'width': int_or_none(source.get('width')),
+ 'height': height,
+ 'tbr': int_or_none(source.get('bitrate')),
+ 'ext': ext,
+ }
+ if source_url.startswith('rtmp'):
+ a_format['ext'] = 'flv'
+ # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
+ # of jwplayer.flash.swf
+ rtmp_url_parts = re.split(
+ r'((?:mp4|mp3|flv):)', source_url, 1)
+ if len(rtmp_url_parts) == 3:
+ rtmp_url, prefix, play_path = rtmp_url_parts
+ a_format.update({
+ 'url': rtmp_url,
+ 'play_path': prefix + play_path,
+ })
+ if rtmp_params:
+ a_format.update(rtmp_params)
+ formats.append(a_format)
+ return formats
+
+ def _live_title(self, name):
+ """ Generate the title for a live video """
+ now = datetime.datetime.now()
+ now_str = now.strftime('%Y-%m-%d %H:%M')
+ return name + ' ' + now_str
+
+ def _int(self, v, name, fatal=False, **kwargs):
+ res = int_or_none(v, **kwargs)
+ if 'get_attr' in kwargs:
+ print(getattr(v, kwargs['get_attr']))
+ if res is None:
+ msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+ if fatal:
+ raise ExtractorError(msg)
+ else:
+ self._downloader.report_warning(msg)
+ return res
+
+ def _float(self, v, name, fatal=False, **kwargs):
+ res = float_or_none(v, **kwargs)
+ if res is None:
+ msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+ if fatal:
+ raise ExtractorError(msg)
+ else:
+ self._downloader.report_warning(msg)
+ return res
+
+ def _set_cookie(self, domain, name, value, expire_time=None, port=None,
+ path='/', secure=False, discard=False, rest={}, **kwargs):
+ cookie = compat_cookiejar.Cookie(
+ 0, name, value, port, port is not None, domain, True,
+ domain.startswith('.'), path, True, secure, expire_time,
+ discard, None, None, rest)
+ self._downloader.cookiejar.set_cookie(cookie)
+
+ def _get_cookies(self, url):
+ """ Return a compat_cookies.SimpleCookie with the cookies for the url """
+ req = sanitized_Request(url)
+ self._downloader.cookiejar.add_cookie_header(req)
+ return compat_cookies.SimpleCookie(req.get_header('Cookie'))
+
+ def get_testcases(self, include_onlymatching=False):
+ t = getattr(self, '_TEST', None)
+ if t:
+ assert not hasattr(self, '_TESTS'), \
+ '%s has _TEST and _TESTS' % type(self).__name__
+ tests = [t]
+ else:
+ tests = getattr(self, '_TESTS', [])
+ for t in tests:
+ if not include_onlymatching and t.get('only_matching', False):
+ continue
+ t['name'] = type(self).__name__[:-len('IE')]
+ yield t
+
+ def is_suitable(self, age_limit):
+ """ Test whether the extractor is generally suitable for the given
+ age limit (i.e. pornographic sites are not, all others usually are) """
+
+ any_restricted = False
+ for tc in self.get_testcases(include_onlymatching=False):
+ if tc.get('playlist', []):
+ tc = tc['playlist'][0]
+ is_restricted = age_restricted(
+ tc.get('info_dict', {}).get('age_limit'), age_limit)
+ if not is_restricted:
+ return True
+ any_restricted = any_restricted or is_restricted
+ return not any_restricted
+
+ def extract_subtitles(self, *args, **kwargs):
+ if (self._downloader.params.get('writesubtitles', False) or
+ self._downloader.params.get('listsubtitles')):
+ return self._get_subtitles(*args, **kwargs)
+ return {}
+
+ def _get_subtitles(self, *args, **kwargs):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ @staticmethod
+ def _merge_subtitle_items(subtitle_list1, subtitle_list2):
+ """ Merge subtitle items for one language. Items with duplicated URLs
+ will be dropped. """
+ list1_urls = set([item['url'] for item in subtitle_list1])
+ ret = list(subtitle_list1)
+ ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
+ return ret
+
+ @classmethod
+ def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
+ """ Merge two subtitle dictionaries, language by language. """
+ ret = dict(subtitle_dict1)
+ for lang in subtitle_dict2:
+ ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
+ return ret
+
+ def extract_automatic_captions(self, *args, **kwargs):
+ if (self._downloader.params.get('writeautomaticsub', False) or
+ self._downloader.params.get('listsubtitles')):
+ return self._get_automatic_captions(*args, **kwargs)
+ return {}
+
+ def _get_automatic_captions(self, *args, **kwargs):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def mark_watched(self, *args, **kwargs):
+ if (self._downloader.params.get('mark_watched', False) and
+ (self._get_login_info()[0] is not None or
+ self._downloader.params.get('cookiefile') is not None)):
+ self._mark_watched(*args, **kwargs)
+
+ def _mark_watched(self, *args, **kwargs):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def geo_verification_headers(self):
+ headers = {}
+ geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
+ if geo_verification_proxy:
+ headers['Ytdl-request-proxy'] = geo_verification_proxy
+ return headers
+
+ def _generic_id(self, url):
+ return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+
+ def _generic_title(self, url):
+ return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+
+
+class SearchInfoExtractor(InfoExtractor):
+ """
+ Base class for paged search queries extractors.
+ They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query}
+ Instances should define _SEARCH_KEY and _MAX_RESULTS.
+ """
+
+ @classmethod
+ def _make_valid_url(cls):
+ return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
+
+ @classmethod
+ def suitable(cls, url):
+ return re.match(cls._make_valid_url(), url) is not None
+
+ def _real_extract(self, query):
+ mobj = re.match(self._make_valid_url(), query)
+ if mobj is None:
+ raise ExtractorError('Invalid search query "%s"' % query)
+
+ prefix = mobj.group('prefix')
+ query = mobj.group('query')
+ if prefix == '':
+ return self._get_n_results(query, 1)
+ elif prefix == 'all':
+ return self._get_n_results(query, self._MAX_RESULTS)
+ else:
+ n = int(prefix)
+ if n <= 0:
+ raise ExtractorError('invalid download number %s for query "%s"' % (n, query))
+ elif n > self._MAX_RESULTS:
+ self._downloader.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
+ n = self._MAX_RESULTS
+ return self._get_n_results(query, n)
+
+ def _get_n_results(self, query, n):
+ """Get a specified number of results for a query"""
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ @property
+ def SEARCH_KEY(self):
+ return self._SEARCH_KEY
diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py
new file mode 100644
index 0000000..79f7a9c
--- /dev/null
+++ b/youtube_dl/extractor/commonmistakes.py
@@ -0,0 +1,50 @@
+from __future__ import unicode_literals
+
+import sys
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class CommonMistakesIE(InfoExtractor):
+ IE_DESC = False # Do not list
+ _VALID_URL = r'''(?x)
+ (?:url|URL)$
+ '''
+
+ _TESTS = [{
+ 'url': 'url',
+ 'only_matching': True,
+ }, {
+ 'url': 'URL',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ msg = (
+ 'You\'ve asked youtube-dl to download the URL "%s". '
+ 'That doesn\'t make any sense. '
+ 'Simply remove the parameter in your command or configuration.'
+ ) % url
+ if not self._downloader.params.get('verbose'):
+ msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
+ raise ExtractorError(msg, expected=True)
+
+
+class UnicodeBOMIE(InfoExtractor):
+ IE_DESC = False
+ _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
+
+ # Disable test for python 3.2 since BOM is broken in re in this version
+ # (see https://github.com/rg3/youtube-dl/issues/9751)
+ _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
+ 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ real_url = self._match_id(url)
+ self.report_warning(
+ 'Your URL starts with a Byte Order Mark (BOM). '
+ 'Removing the BOM and looking for "%s" ...' % real_url)
+ return self.url_result(real_url)
diff --git a/youtube_dl/extractor/commonprotocols.py b/youtube_dl/extractor/commonprotocols.py
new file mode 100644
index 0000000..d98331a
--- /dev/null
+++ b/youtube_dl/extractor/commonprotocols.py
@@ -0,0 +1,60 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urlparse,
+)
+
+
+class RtmpIE(InfoExtractor):
+ IE_DESC = False # Do not list
+ _VALID_URL = r'(?i)rtmp[est]?://.+'
+
+ _TESTS = [{
+ 'url': 'rtmp://cp44293.edgefcs.net/ondemand?auth=daEcTdydfdqcsb8cZcDbAaCbhamacbbawaS-bw7dBb-bWG-GqpGFqCpNCnGoyL&aifp=v001&slist=public/unsecure/audio/2c97899446428e4301471a8cb72b4b97--audio--pmg-20110908-0900a_flv_aac_med_int.mp4',
+ 'only_matching': True,
+ }, {
+ 'url': 'rtmp://edge.live.hitbox.tv/live/dimak',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._generic_id(url)
+ title = self._generic_title(url)
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': [{
+ 'url': url,
+ 'ext': 'flv',
+ 'format_id': compat_urlparse.urlparse(url).scheme,
+ }],
+ }
+
+
+class MmsIE(InfoExtractor):
+ IE_DESC = False # Do not list
+ _VALID_URL = r'(?i)mms://.+'
+
+ _TEST = {
+ # Direct MMS link
+ 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv',
+ 'info_dict': {
+ 'id': 'MilesReid(0709)',
+ 'ext': 'wmv',
+ 'title': 'MilesReid(0709)',
+ },
+ 'params': {
+ 'skip_download': True, # rtsp downloads, requiring mplayer or mpv
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._generic_id(url)
+ title = self._generic_title(url)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': url,
+ }
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
new file mode 100644
index 0000000..ceb72da
--- /dev/null
+++ b/youtube_dl/extractor/extractors.py
@@ -0,0 +1,31 @@
+# flake8: noqa
+from __future__ import unicode_literals
+
+
+from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
+from .commonprotocols import (
+ MmsIE,
+ RtmpIE,
+)
+
+from .openload import OpenloadIE
+
+from .youtube import (
+ YoutubeIE,
+ YoutubeChannelIE,
+ YoutubeFavouritesIE,
+ YoutubeHistoryIE,
+ YoutubeLiveIE,
+ YoutubePlaylistIE,
+ YoutubePlaylistsIE,
+ YoutubeRecommendedIE,
+ YoutubeSearchDateIE,
+ YoutubeSearchIE,
+ YoutubeSearchURLIE,
+ YoutubeShowIE,
+ YoutubeSubscriptionsIE,
+ YoutubeTruncatedIDIE,
+ YoutubeTruncatedURLIE,
+ YoutubeUserIE,
+ YoutubeWatchLaterIE,
+)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
new file mode 100644
index 0000000..aa04905
--- /dev/null
+++ b/youtube_dl/extractor/generic.py
@@ -0,0 +1,3335 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import os
+import re
+import sys
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..compat import (
+ compat_etree_fromstring,
+ compat_str,
+ compat_urllib_parse_unquote,
+ compat_urlparse,
+ compat_xml_parse_error,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ HEADRequest,
+ is_html,
+ js_to_json,
+ KNOWN_EXTENSIONS,
+ merge_dicts,
+ mimetype2ext,
+ orderedSet,
+ sanitized_Request,
+ smuggle_url,
+ unescapeHTML,
+ unified_strdate,
+ unsmuggle_url,
+ UnsupportedError,
+ xpath_text,
+)
+from .commonprotocols import RtmpIE
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
+from .nexx import (
+ NexxIE,
+ NexxEmbedIE,
+)
+from .nbc import NBCSportsVPlayerIE
+from .ooyala import OoyalaIE
+from .rutv import RUTVIE
+from .tvc import TVCIE
+from .sportbox import SportBoxEmbedIE
+from .smotri import SmotriIE
+from .myvi import MyviIE
+from .condenast import CondeNastIE
+from .udn import UDNEmbedIE
+from .senateisvp import SenateISVPIE
+from .svt import SVTIE
+from .pornhub import PornHubIE
+from .xhamster import XHamsterEmbedIE
+from .tnaflix import TNAFlixNetworkEmbedIE
+from .drtuber import DrTuberIE
+from .redtube import RedTubeIE
+from .tube8 import Tube8IE
+from .vimeo import VimeoIE
+from .dailymotion import DailymotionIE
+from .dailymail import DailyMailIE
+from .onionstudios import OnionStudiosIE
+from .viewlift import ViewLiftEmbedIE
+from .mtv import MTVServicesEmbeddedIE
+from .pladform import PladformIE
+from .videomore import VideomoreIE
+from .webcaster import WebcasterFeedIE
+from .googledrive import GoogleDriveIE
+from .jwplatform import JWPlatformIE
+from .digiteka import DigitekaIE
+from .arkena import ArkenaIE
+from .instagram import InstagramIE
+from .liveleak import LiveLeakIE
+from .threeqsdn import ThreeQSDNIE
+from .theplatform import ThePlatformIE
+from .vessel import VesselIE
+from .kaltura import KalturaIE
+from .eagleplatform import EaglePlatformIE
+from .facebook import FacebookIE
+from .soundcloud import SoundcloudIE
+from .tunein import TuneInBaseIE
+from .vbox7 import Vbox7IE
+from .dbtv import DBTVIE
+from .piksel import PikselIE
+from .videa import VideaIE
+from .twentymin import TwentyMinutenIE
+from .ustream import UstreamIE
+from .openload import OpenloadIE
+from .videopress import VideoPressIE
+from .rutube import RutubeIE
+from .limelight import LimelightBaseIE
+from .anvato import AnvatoIE
+from .washingtonpost import WashingtonPostIE
+from .wistia import WistiaIE
+from .mediaset import MediasetIE
+from .joj import JojIE
+from .megaphone import MegaphoneIE
+from .vzaar import VzaarIE
+from .channel9 import Channel9IE
+from .vshare import VShareIE
+from .mediasite import MediasiteIE
+from .springboardplatform import SpringboardPlatformIE
+from .yapfiles import YapFilesIE
+from .vice import ViceIE
+from .xfileshare import XFileShareIE
+from .cloudflarestream import CloudflareStreamIE
+from .peertube import PeerTubeIE
+from .indavideo import IndavideoEmbedIE
+from .apa import APAIE
+from .foxnews import FoxNewsIE
+
+
+class GenericIE(InfoExtractor):
+ IE_DESC = 'Generic downloader that works on some sites'
+ _VALID_URL = r'.*'
+ IE_NAME = 'generic'
+ _TESTS = [
+ # Direct link to a video
+ {
+ 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
+ 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
+ 'info_dict': {
+ 'id': 'trailer',
+ 'ext': 'mp4',
+ 'title': 'trailer',
+ 'upload_date': '20100513',
+ }
+ },
+ # Direct link to media delivered compressed (until Accept-Encoding is *)
+ {
+ 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
+ 'md5': '128c42e68b13950268b648275386fc74',
+ 'info_dict': {
+ 'id': 'FictionJunction-Parallel_Hearts',
+ 'ext': 'flac',
+ 'title': 'FictionJunction-Parallel_Hearts',
+ 'upload_date': '20140522',
+ },
+ 'expected_warnings': [
+ 'URL could be a direct video link, returning it as such.'
+ ],
+ 'skip': 'URL invalid',
+ },
+ # Direct download with broken HEAD
+ {
+ 'url': 'http://ai-radio.org:8000/radio.opus',
+ 'info_dict': {
+ 'id': 'radio',
+ 'ext': 'opus',
+ 'title': 'radio',
+ },
+ 'params': {
+ 'skip_download': True, # infinite live stream
+ },
+ 'expected_warnings': [
+ r'501.*Not Implemented',
+ r'400.*Bad Request',
+ ],
+ },
+ # Direct link with incorrect MIME type
+ {
+ 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+ 'md5': '4ccbebe5f36706d85221f204d7eb5913',
+ 'info_dict': {
+ 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+ 'id': '5_Lennart_Poettering_-_Systemd',
+ 'ext': 'webm',
+ 'title': '5_Lennart_Poettering_-_Systemd',
+ 'upload_date': '20141120',
+ },
+ 'expected_warnings': [
+ 'URL could be a direct video link, returning it as such.'
+ ]
+ },
+ # RSS feed
+ {
+ 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
+ 'info_dict': {
+ 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
+ 'title': 'Zero Punctuation',
+ 'description': 're:.*groundbreaking video review series.*'
+ },
+ 'playlist_mincount': 11,
+ },
+ # RSS feed with enclosure
+ {
+ 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
+ 'info_dict': {
+ 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+ 'ext': 'm4v',
+ 'upload_date': '20150228',
+ 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+ }
+ },
+ # RSS feed with enclosures and unsupported link URLs
+ {
+ 'url': 'http://www.hellointernet.fm/podcast?format=rss',
+ 'info_dict': {
+ 'id': 'http://www.hellointernet.fm/podcast?format=rss',
+ 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
+ 'title': 'Hello Internet',
+ },
+ 'playlist_mincount': 100,
+ },
+ # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
+ {
+ 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
+ 'info_dict': {
+ 'id': 'smil',
+ 'ext': 'mp4',
+ 'title': 'Automatics, robotics and biocybernetics',
+ 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+ 'upload_date': '20130627',
+ 'formats': 'mincount:16',
+ 'subtitles': 'mincount:1',
+ },
+ 'params': {
+ 'force_generic_extractor': True,
+ 'skip_download': True,
+ },
+ },
+ # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
+ {
+ 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
+ 'info_dict': {
+ 'id': 'hds',
+ 'ext': 'flv',
+ 'title': 'hds',
+ 'formats': 'mincount:1',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # SMIL from https://www.restudy.dk/video/play/id/1637
+ {
+ 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
+ 'info_dict': {
+ 'id': 'video_1637',
+ 'ext': 'flv',
+ 'title': 'video_1637',
+ 'formats': 'mincount:3',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
+ {
+ 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
+ 'info_dict': {
+ 'id': 'smil-service',
+ 'ext': 'flv',
+ 'title': 'smil-service',
+ 'formats': 'mincount:1',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
+ {
+ 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
+ 'info_dict': {
+ 'id': '4719370',
+ 'ext': 'mp4',
+ 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
+ 'formats': 'mincount:3',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
+ {
+ 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
+ 'info_dict': {
+ 'id': 'mZlp2ctYIUEB',
+ 'ext': 'mp4',
+ 'title': 'Tikibad ontruimd wegens brand',
+ 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 33,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # MPD from http://dash-mse-test.appspot.com/media.html
+ {
+ 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
+ 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
+ 'info_dict': {
+ 'id': 'car-20120827-manifest',
+ 'ext': 'mp4',
+ 'title': 'car-20120827-manifest',
+ 'formats': 'mincount:9',
+ 'upload_date': '20130904',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ },
+ # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
+ {
+ 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
+ 'info_dict': {
+ 'id': 'content',
+ 'ext': 'mp4',
+ 'title': 'content',
+ 'formats': 'mincount:8',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'skip': 'video gone',
+ },
+ # m3u8 served with Content-Type: text/plain
+ {
+ 'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
+ 'info_dict': {
+ 'id': 'index',
+ 'ext': 'mp4',
+ 'title': 'index',
+ 'upload_date': '20140720',
+ 'formats': 'mincount:11',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'skip': 'video gone',
+ },
+ # google redirect
+ {
+ 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
+ 'info_dict': {
+ 'id': 'cmQHVoWB5FY',
+ 'ext': 'mp4',
+ 'upload_date': '20130224',
+ 'uploader_id': 'TheVerge',
+ 'description': r're:^Chris Ziegler takes a look at the\.*',
+ 'uploader': 'The Verge',
+ 'title': 'First Firefox OS phones side-by-side',
+ },
+ 'params': {
+ 'skip_download': False,
+ }
+ },
+ {
+ # redirect in Refresh HTTP header
+ 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
+ 'info_dict': {
+ 'id': 'pO8h3EaFRdo',
+ 'ext': 'mp4',
+ 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
+ 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
+ 'upload_date': '20150917',
+ 'uploader_id': 'brtvofficial',
+ 'uploader': 'Boiler Room',
+ },
+ 'params': {
+ 'skip_download': False,
+ },
+ },
+ {
+ 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
+ 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
+ 'info_dict': {
+ 'id': '13601338388002',
+ 'ext': 'mp4',
+ 'uploader': 'www.hodiho.fr',
+ 'title': 'R\u00e9gis plante sa Jeep',
+ }
+ },
+ # bandcamp page with custom domain
+ {
+ 'add_ie': ['Bandcamp'],
+ 'url': 'http://bronyrock.com/track/the-pony-mash',
+ 'info_dict': {
+ 'id': '3235767654',
+ 'ext': 'mp3',
+ 'title': 'The Pony Mash',
+ 'uploader': 'M_Pallante',
+ },
+ 'skip': 'There is a limit of 200 free downloads / month for the test song',
+ },
+ {
+ # embedded brightcove video
+ # it also tests brightcove videos that need to set the 'Referer'
+ # in the http requests
+ 'add_ie': ['BrightcoveLegacy'],
+ 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
+ 'info_dict': {
+ 'id': '2765128793001',
+ 'ext': 'mp4',
+ 'title': 'Le cours de bourse : l’analyse technique',
+ 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
+ 'uploader': 'BFM BUSINESS',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # embedded with itemprop embedURL and video id spelled as `idVideo`
+ 'add_id': ['BrightcoveLegacy'],
+ 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
+ 'info_dict': {
+ 'id': '5255628253001',
+ 'ext': 'mp4',
+ 'title': 'md5:37c519b1128915607601e75a87995fc0',
+ 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
+ 'uploader': 'BFM BUSINESS',
+ 'uploader_id': '876450612001',
+ 'timestamp': 1482255315,
+ 'upload_date': '20161220',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # https://github.com/rg3/youtube-dl/issues/2253
+ 'url': 'http://bcove.me/i6nfkrc3',
+ 'md5': '0ba9446db037002366bab3b3eb30c88c',
+ 'info_dict': {
+ 'id': '3101154703001',
+ 'ext': 'mp4',
+ 'title': 'Still no power',
+ 'uploader': 'thestar.com',
+ 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
+ },
+ 'add_ie': ['BrightcoveLegacy'],
+ 'skip': 'video gone',
+ },
+ {
+ 'url': 'http://www.championat.com/video/football/v/87/87499.html',
+ 'md5': 'fb973ecf6e4a78a67453647444222983',
+ 'info_dict': {
+ 'id': '3414141473001',
+ 'ext': 'mp4',
+ 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
+ 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
+ 'uploader': 'Championat',
+ },
+ },
+ {
+ # https://github.com/rg3/youtube-dl/issues/3541
+ 'add_ie': ['BrightcoveLegacy'],
+ 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
+ 'info_dict': {
+ 'id': '3866516442001',
+ 'ext': 'mp4',
+ 'title': 'Leer mij vrouwen kennen: Aflevering 1',
+ 'description': 'Leer mij vrouwen kennen: Aflevering 1',
+ 'uploader': 'SBS Broadcasting',
+ },
+ 'skip': 'Restricted to Netherlands',
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ },
+ {
+ # Brightcove video in <iframe>
+ 'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
+ 'md5': '36d74ef5e37c8b4a2ce92880d208b968',
+ 'info_dict': {
+ 'id': '5360463607001',
+ 'ext': 'mp4',
+ 'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活',
+ 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
+ 'uploader': 'United Nations',
+ 'uploader_id': '1362235914001',
+ 'timestamp': 1489593889,
+ 'upload_date': '20170315',
+ },
+ 'add_ie': ['BrightcoveLegacy'],
+ },
+ {
+ # Brightcove with alternative playerID key
+ 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
+ 'info_dict': {
+ 'id': 'nmeth.2062_SV1',
+ 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2228375078001',
+ 'ext': 'mp4',
+ 'title': 'nmeth.2062-sv1',
+ 'description': 'nmeth.2062-sv1',
+ 'timestamp': 1363357591,
+ 'upload_date': '20130315',
+ 'uploader': 'Nature Publishing Group',
+ 'uploader_id': '1964492299001',
+ },
+ }],
+ },
+ {
+ # Brightcove with UUID in videoPlayer
+ 'url': 'http://www8.hp.com/cn/zh/home.html',
+ 'info_dict': {
+ 'id': '5255815316001',
+ 'ext': 'mp4',
+ 'title': 'Sprocket Video - China',
+ 'description': 'Sprocket Video - China',
+ 'uploader': 'HP-Video Gallery',
+ 'timestamp': 1482263210,
+ 'upload_date': '20161220',
+ 'uploader_id': '1107601872001',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ 'skip': 'video rotates...weekly?',
+ },
+ {
+ # Brightcove:new type [2].
+ 'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
+ 'md5': '2b35148fcf48da41c9fb4591650784f3',
+ 'info_dict': {
+ 'id': '5348741021001',
+ 'ext': 'mp4',
+ 'upload_date': '20170306',
+ 'uploader_id': '4191638492001',
+ 'timestamp': 1488769918,
+ 'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
+
+ },
+ },
+ {
+ # Alternative brightcove <video> attributes
+ 'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
+ 'info_dict': {
+ 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
+ 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
+ },
+ 'playlist': [{
+ 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
+ 'info_dict': {
+ 'id': '5311302538001',
+ 'ext': 'mp4',
+ 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
+ 'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
+ 'timestamp': 1486321708,
+ 'upload_date': '20170205',
+ 'uploader_id': '800000640001',
+ },
+ 'only_matching': True,
+ }],
+ },
+ {
+ # Brightcove with UUID in videoPlayer
+ 'url': 'http://www8.hp.com/cn/zh/home.html',
+ 'info_dict': {
+ 'id': '5255815316001',
+ 'ext': 'mp4',
+ 'title': 'Sprocket Video - China',
+ 'description': 'Sprocket Video - China',
+ 'uploader': 'HP-Video Gallery',
+ 'timestamp': 1482263210,
+ 'upload_date': '20161220',
+ 'uploader_id': '1107601872001',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ },
+ # ooyala video
+ {
+ 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
+ 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
+ 'info_dict': {
+ 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
+ 'ext': 'mp4',
+ 'title': '2cc213299525360.mov', # that's what we get
+ 'duration': 238.231,
+ },
+ 'add_ie': ['Ooyala'],
+ },
+ {
+ # ooyala video embedded with http://player.ooyala.com/iframe.js
+ 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
+ 'info_dict': {
+ 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
+ 'ext': 'mp4',
+ 'title': '"Steve Jobs: Man in the Machine" trailer',
+ 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
+ 'duration': 135.427,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'movie expired',
+ },
+ # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
+ {
+ 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
+ 'info_dict': {
+ 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
+ 'ext': 'mp4',
+ 'title': 'Steampunk Fest Comes to Honesdale',
+ 'duration': 43.276,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ # embed.ly video
+ {
+ 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
+ 'info_dict': {
+ 'id': '9ODmcdjQcHQ',
+ 'ext': 'mp4',
+ 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
+ 'upload_date': '20140225',
+ 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
+ 'uploader': 'Tested',
+ 'uploader_id': 'testedcom',
+ },
+ # No need to test YoutubeIE here
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # funnyordie embed
+ {
+ 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
+ 'info_dict': {
+ 'id': '18e820ec3f',
+ 'ext': 'mp4',
+ 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
+ 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
+ },
+ # HEAD requests lead to endless 301, while GET is OK
+ 'expected_warnings': ['301'],
+ },
+ # RUTV embed
+ {
+ 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
+ 'info_dict': {
+ 'id': '776940',
+ 'ext': 'mp4',
+ 'title': 'Охотское море стало целиком российским',
+ 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ # TVC embed
+ {
+ 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
+ 'info_dict': {
+ 'id': '55304',
+ 'ext': 'mp4',
+ 'title': 'Дошкольное воспитание',
+ },
+ },
+ # SportBox embed
+ {
+ 'url': 'http://www.vestifinance.ru/articles/25753',
+ 'info_dict': {
+ 'id': '25753',
+ 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '370908',
+ 'title': 'Госзаказ. День 3',
+ 'ext': 'mp4',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '370905',
+ 'title': 'Госзаказ. День 2',
+ 'ext': 'mp4',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '370902',
+ 'title': 'Госзаказ. День 1',
+ 'ext': 'mp4',
+ }
+ }],
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ # Myvi.ru embed
+ {
+ 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
+ 'info_dict': {
+ 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
+ 'ext': 'mp4',
+ 'title': 'Ужастики, русский трейлер (2015)',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 153,
+ }
+ },
+ # XHamster embed
+ {
+ 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
+ 'info_dict': {
+ 'id': 'showthread',
+ 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
+ },
+ 'playlist_mincount': 7,
+ # This forum does not allow <iframe> syntaxes anymore
+ # Now HTML tags are displayed as-is
+ 'skip': 'No videos on this page',
+ },
+ # Embedded TED video
+ {
+ 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
+ 'md5': '65fdff94098e4a607385a60c5177c638',
+ 'info_dict': {
+ 'id': '1969',
+ 'ext': 'mp4',
+ 'title': 'Hidden miracles of the natural world',
+ 'uploader': 'Louie Schwartzberg',
+ 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
+ }
+ },
+ # nowvideo embed hidden behind percent encoding
+ {
+ 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
+ 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
+ 'info_dict': {
+ 'id': '06e53103ca9aa',
+ 'ext': 'flv',
+ 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
+ 'description': 'No description',
+ },
+ },
+ # arte embed
+ {
+ 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
+ 'md5': '7653032cbb25bf6c80d80f217055fa43',
+ 'info_dict': {
+ 'id': '048195-004_PLUS7-F',
+ 'ext': 'flv',
+ 'title': 'X:enius',
+ 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
+ 'upload_date': '20140320',
+ },
+ 'params': {
+ 'skip_download': 'Requires rtmpdump'
+ },
+ 'skip': 'video gone',
+ },
+ # francetv embed
+ {
+ 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
+ 'info_dict': {
+ 'id': 'EV_30231',
+ 'ext': 'mp4',
+ 'title': 'Alcaline, le concert avec Calogero',
+ 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
+ 'upload_date': '20150226',
+ 'timestamp': 1424989860,
+ 'duration': 5400,
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'expected_warnings': [
+ 'Forbidden'
+ ]
+ },
+ # Condé Nast embed
+ {
+ 'url': 'http://www.wired.com/2014/04/honda-asimo/',
+ 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
+ 'info_dict': {
+ 'id': '53501be369702d3275860000',
+ 'ext': 'mp4',
+ 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
+ }
+ },
+ # Dailymotion embed
+ {
+ 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
+ 'md5': '441aeeb82eb72c422c7f14ec533999cd',
+ 'info_dict': {
+ 'id': 'k2mm4bCdJ6CQ2i7c8o2',
+ 'ext': 'mp4',
+ 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
+ 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
+ 'uploader': 'Spi0n',
+ 'uploader_id': 'xgditw',
+ 'upload_date': '20140425',
+ 'timestamp': 1398441542,
+ },
+ 'add_ie': ['Dailymotion'],
+ },
+ # DailyMail embed
+ {
+ 'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
+ 'info_dict': {
+ 'id': '1495629',
+ 'ext': 'mp4',
+ 'title': 'Care worker punches elderly dementia patient in head 11 times',
+ 'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
+ },
+ 'add_ie': ['DailyMail'],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # YouTube embed
+ {
+ 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
+ 'info_dict': {
+ 'id': 'FXRb4ykk4S0',
+ 'ext': 'mp4',
+ 'title': 'The NBL Auction 2014',
+ 'uploader': 'BADMINTON England',
+ 'uploader_id': 'BADMINTONEvents',
+ 'upload_date': '20140603',
+ 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
+ },
+ 'add_ie': ['Youtube'],
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ # MTVSercices embed
+ {
+ 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
+ 'md5': 'ca1aef97695ef2c1d6973256a57e5252',
+ 'info_dict': {
+ 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
+ 'ext': 'mp4',
+ 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
+ 'description': 'Two valets share their love for movie star Liam Neesons.',
+ 'timestamp': 1349922600,
+ 'upload_date': '20121011',
+ },
+ },
+ # YouTube embed via <data-embed-url="">
+ {
+ 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
+ 'info_dict': {
+ 'id': '4vAffPZIT44',
+ 'ext': 'mp4',
+ 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
+ 'uploader': 'Gameloft',
+ 'uploader_id': 'gameloft',
+ 'upload_date': '20140828',
+ 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ # YouTube <object> embed
+ {
+ 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
+ 'md5': '516718101ec834f74318df76259fb3cc',
+ 'info_dict': {
+ 'id': 'msN87y-iEx0',
+ 'ext': 'webm',
+ 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
+ 'upload_date': '20080526',
+ 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
+ 'uploader': 'Christopher Sykes',
+ 'uploader_id': 'ChristopherJSykes',
+ },
+ 'add_ie': ['Youtube'],
+ },
+ # Camtasia studio
+ {
+ 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
+ 'playlist': [{
+ 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
+ 'info_dict': {
+ 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
+ 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
+ 'ext': 'flv',
+ 'duration': 2235.90,
+ }
+ }, {
+ 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
+ 'info_dict': {
+ 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
+ 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
+ 'ext': 'flv',
+ 'duration': 2235.93,
+ }
+ }],
+ 'info_dict': {
+ 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
+ }
+ },
+ # Flowplayer
+ {
+ 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
+ 'md5': '9d65602bf31c6e20014319c7d07fba27',
+ 'info_dict': {
+ 'id': '5123ea6d5e5a7',
+ 'ext': 'mp4',
+ 'age_limit': 18,
+ 'uploader': 'www.handjobhub.com',
+ 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
+ }
+ },
+ # Multiple brightcove videos
+ # https://github.com/rg3/youtube-dl/issues/2283
+ {
+ 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
+ 'info_dict': {
+ 'id': 'always-never',
+ 'title': 'Always / Never - The New Yorker',
+ },
+ 'playlist_count': 3,
+ 'params': {
+ 'extract_flat': False,
+ 'skip_download': True,
+ }
+ },
+ # MLB embed
+ {
+ 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
+ 'md5': '96f09a37e44da40dd083e12d9a683327',
+ 'info_dict': {
+ 'id': '33322633',
+ 'ext': 'mp4',
+ 'title': 'Ump changes call to ball',
+ 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
+ 'duration': 48,
+ 'timestamp': 1401537900,
+ 'upload_date': '20140531',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ # Wistia embed
+ {
+ 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
+ 'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
+ 'info_dict': {
+ 'id': '6e2wtrbdaf',
+ 'ext': 'mov',
+ 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
+ 'description': 'a Paywall Videos video from Remilon',
+ 'duration': 644.072,
+ 'uploader': 'study.com',
+ 'timestamp': 1459678540,
+ 'upload_date': '20160403',
+ 'filesize': 24687186,
+ },
+ },
+ {
+ 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
+ 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
+ 'info_dict': {
+ 'id': 'uxjb0lwrcz',
+ 'ext': 'mp4',
+ 'title': 'Conversation about Hexagonal Rails Part 1',
+ 'description': 'a Martin Fowler video from ThoughtWorks',
+ 'duration': 1715.0,
+ 'uploader': 'thoughtworks.wistia.com',
+ 'timestamp': 1401832161,
+ 'upload_date': '20140603',
+ },
+ },
+ # Wistia standard embed (async)
+ {
+ 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
+ 'info_dict': {
+ 'id': '807fafadvk',
+ 'ext': 'mp4',
+ 'title': 'Drip Brennan Dunn Workshop',
+ 'description': 'a JV Webinars video from getdrip-1',
+ 'duration': 4986.95,
+ 'timestamp': 1463607249,
+ 'upload_date': '20160518',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ # Soundcloud embed
+ {
+ 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
+ 'info_dict': {
+ 'id': '174391317',
+ 'ext': 'mp3',
+ 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
+ 'uploader': 'Sophos Security',
+ 'title': 'Chet Chat 171 - Oct 29, 2014',
+ 'upload_date': '20141029',
+ }
+ },
+ # Soundcloud multiple embeds
+ {
+ 'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
+ 'info_dict': {
+ 'id': '52809',
+ 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
+ },
+ 'playlist_mincount': 7,
+ },
+ # TuneIn station embed
+ {
+ 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
+ 'info_dict': {
+ 'id': '204146',
+ 'ext': 'mp3',
+ 'title': 'CNRV',
+ 'location': 'Paris, France',
+ 'is_live': True,
+ },
+ 'params': {
+ # Live stream
+ 'skip_download': True,
+ },
+ },
+ # Livestream embed
+ {
+ 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
+ 'info_dict': {
+ 'id': '67864563',
+ 'ext': 'flv',
+ 'upload_date': '20141112',
+ 'title': 'Rosetta #CometLanding webcast HL 10',
+ }
+ },
+ # Another Livestream embed, without 'new.' in URL
+ {
+ 'url': 'https://www.freespeech.org/',
+ 'info_dict': {
+ 'id': '123537347',
+ 'ext': 'mp4',
+ 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ # Live stream
+ 'skip_download': True,
+ },
+ },
+ # LazyYT
+ {
+ 'url': 'https://skiplagged.com/',
+ 'info_dict': {
+ 'id': 'skiplagged',
+ 'title': 'Skiplagged: The smart way to find cheap flights',
+ },
+ 'playlist_mincount': 1,
+ 'add_ie': ['Youtube'],
+ },
+ # Cinchcast embed
+ {
+ 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
+ 'info_dict': {
+ 'id': '7141703',
+ 'ext': 'mp3',
+ 'upload_date': '20141126',
+ 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
+ }
+ },
+ # Cinerama player
+ {
+ 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
+ 'info_dict': {
+ 'id': '730m_DandD_1901_512k',
+ 'ext': 'mp4',
+ 'uploader': 'www.abc.net.au',
+ 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
+ }
+ },
+ # embedded viddler video
+ {
+ 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
+ 'info_dict': {
+ 'id': '4d03aad9',
+ 'ext': 'mp4',
+ 'uploader': 'deadspin',
+ 'title': 'WALL-TO-GORTAT',
+ 'timestamp': 1422285291,
+ 'upload_date': '20150126',
+ },
+ 'add_ie': ['Viddler'],
+ },
+ # Libsyn embed
+ {
+ 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
+ 'info_dict': {
+ 'id': '3377616',
+ 'ext': 'mp3',
+ 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+ 'description': 'md5:601cb790edd05908957dae8aaa866465',
+ 'upload_date': '20150220',
+ },
+ 'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
+ },
+ # jwplayer YouTube
+ {
+ 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
+ 'info_dict': {
+ 'id': 'Mrj4DVp2zeA',
+ 'ext': 'mp4',
+ 'upload_date': '20150212',
+ 'uploader': 'The National Archives UK',
+ 'description': 'md5:8078af856dca76edc42910b61273dbbf',
+ 'uploader_id': 'NationalArchives08',
+ 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
+ },
+ },
+ # jwplayer rtmp
+ {
+ 'url': 'http://www.suffolk.edu/sjc/live.php',
+ 'info_dict': {
+ 'id': 'live',
+ 'ext': 'flv',
+ 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
+ 'uploader': 'www.suffolk.edu',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
+ },
+ # Complex jwplayer
+ {
+ 'url': 'http://www.indiedb.com/games/king-machine/videos',
+ 'info_dict': {
+ 'id': 'videos',
+ 'ext': 'mp4',
+ 'title': 'king machine trailer 1',
+ 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ # JWPlayer config passed as variable
+ 'url': 'http://www.txxx.com/videos/3326530/ariele/',
+ 'info_dict': {
+ 'id': '3326530_hq',
+ 'ext': 'mp4',
+ 'title': 'ARIELE | Tube Cup',
+ 'uploader': 'www.txxx.com',
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ {
+ # JWPlatform iframe
+ 'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
+ 'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
+ 'info_dict': {
+ 'id': 'O0c5JcKT',
+ 'ext': 'mp4',
+ 'upload_date': '20171122',
+ 'timestamp': 1511366290,
+ 'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
+ },
+ 'add_ie': [JWPlatformIE.ie_key()],
+ },
+ {
+ # Video.js embed, multiple formats
+ 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
+ 'info_dict': {
+ 'id': 'yygqldloqIk',
+ 'ext': 'mp4',
+ 'title': 'SolidWorks. Урок 6 Настройка чертежа',
+ 'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
+ 'upload_date': '20130314',
+ 'uploader': 'PROстое3D',
+ 'uploader_id': 'PROstoe3D',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Video.js embed, single format
+ 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
+ 'info_dict': {
+ 'id': 'watch',
+ 'ext': 'mp4',
+ 'title': 'Step 1 - Good Foundation',
+ 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # rtl.nl embed
+ {
+ 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'aanslagen-kopenhagen',
+ 'title': 'Aanslagen Kopenhagen',
+ }
+ },
+ # Zapiks embed
+ {
+ 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
+ 'info_dict': {
+ 'id': '118046',
+ 'ext': 'mp4',
+ 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
+ }
+ },
+ # Kaltura embed (different embed code)
+ {
+ 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
+ 'info_dict': {
+ 'id': '1_a52wc67y',
+ 'ext': 'flv',
+ 'upload_date': '20150127',
+ 'uploader_id': 'PremierMedia',
+ 'timestamp': int,
+ 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
+ },
+ },
+ # Kaltura embed with single quotes
+ {
+ 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
+ 'info_dict': {
+ 'id': '0_izeg5utt',
+ 'ext': 'mp4',
+ 'title': '35871',
+ 'timestamp': 1355743100,
+ 'upload_date': '20121217',
+ 'uploader_id': 'cplapp@learn360.com',
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Kaltura embedded via quoted entry_id
+ 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
+ 'info_dict': {
+ 'id': '0_utuok90b',
+ 'ext': 'mp4',
+ 'title': '06_matthew_brender_raj_dutt',
+ 'timestamp': 1466638791,
+ 'upload_date': '20160622',
+ },
+ 'add_ie': ['Kaltura'],
+ 'expected_warnings': [
+ 'Could not send HEAD request'
+ ],
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ {
+ # Kaltura embedded, some fileExt broken (#11480)
+ 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
+ 'info_dict': {
+ 'id': '1_sgtvehim',
+ 'ext': 'mp4',
+ 'title': 'Our "Standard Models" of particle physics and cosmology',
+ 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
+ 'timestamp': 1321158993,
+ 'upload_date': '20111113',
+ 'uploader_id': 'kps1',
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Kaltura iframe embed
+ 'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
+ 'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
+ 'info_dict': {
+ 'id': '0_f2cfbpwy',
+ 'ext': 'mp4',
+ 'title': 'I. M. Pei: A Centennial Celebration',
+ 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
+ 'upload_date': '20170403',
+ 'uploader_id': 'batchUser',
+ 'timestamp': 1491232186,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Kaltura iframe embed, more sophisticated
+ 'url': 'http://www.cns.nyu.edu/~eero/math-tools/Videos/lecture-05sep2017.html',
+ 'info_dict': {
+ 'id': '1_9gzouybz',
+ 'ext': 'mp4',
+ 'title': 'lecture-05sep2017',
+ 'description': 'md5:40f347d91fd4ba047e511c5321064b49',
+ 'upload_date': '20170913',
+ 'uploader_id': 'eps2',
+ 'timestamp': 1505340777,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # meta twitter:player
+ 'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
+ 'info_dict': {
+ 'id': '0_01b42zps',
+ 'ext': 'mp4',
+ 'title': 'Main Twerk (Video)',
+ 'upload_date': '20171208',
+ 'uploader_id': 'sebastian.salinas@thechive.com',
+ 'timestamp': 1512713057,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ # referrer protected EaglePlatform embed
+ {
+ 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
+ 'info_dict': {
+ 'id': '582306',
+ 'ext': 'mp4',
+ 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 3382,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # ClipYou (EaglePlatform) embed (custom URL)
+ {
+ 'url': 'http://muz-tv.ru/play/7129/',
+ # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
+ 'info_dict': {
+ 'id': '12820',
+ 'ext': 'mp4',
+ 'title': "'O Sole Mio",
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 216,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable.',
+ },
+ # Pladform embed
+ {
+ 'url': 'http://muz-tv.ru/kinozal/view/7400/',
+ 'info_dict': {
+ 'id': '100183293',
+ 'ext': 'mp4',
+ 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
+ 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 694,
+ 'age_limit': 0,
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ },
+ # Playwire embed
+ {
+ 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
+ 'info_dict': {
+ 'id': '3519514',
+ 'ext': 'mp4',
+ 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'duration': 45.115,
+ },
+ },
+ # 5min embed
+ {
+ 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
+ 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
+ 'info_dict': {
+ 'id': '518726732',
+ 'ext': 'mp4',
+ 'title': 'Facebook Creates "On This Day" | Crunch Report',
+ 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
+ 'timestamp': 1427237531,
+ 'uploader': 'Crunch Report',
+ 'upload_date': '20150324',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ # Crooks and Liars embed
+ {
+ 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
+ 'info_dict': {
+ 'id': '8RUoRhRi',
+ 'ext': 'mp4',
+ 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
+ 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
+ 'timestamp': 1428207000,
+ 'upload_date': '20150405',
+ 'uploader': 'Heather',
+ },
+ },
+ # Crooks and Liars external embed
+ {
+ 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
+ 'info_dict': {
+ 'id': 'MTE3MjUtMzQ2MzA',
+ 'ext': 'mp4',
+ 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
+ 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
+ 'timestamp': 1265032391,
+ 'upload_date': '20100201',
+ 'uploader': 'Heather',
+ },
+ },
+ # NBC Sports vplayer embed
+ {
+ 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
+ 'info_dict': {
+ 'id': 'ln7x1qSThw4k',
+ 'ext': 'flv',
+ 'title': "PFT Live: New leader in the 'new-look' defense",
+ 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
+ 'uploader': 'NBCU-SPORTS',
+ 'upload_date': '20140107',
+ 'timestamp': 1389118457,
+ },
+ 'skip': 'Invalid Page URL',
+ },
+ # NBC News embed
+ {
+ 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
+ 'md5': '1aa589c675898ae6d37a17913cf68d66',
+ 'info_dict': {
+ 'id': 'x_dtl_oa_LettermanliftPR_160608',
+ 'ext': 'mp4',
+ 'title': 'David Letterman: A Preview',
+ 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
+ 'upload_date': '20160609',
+ 'timestamp': 1465431544,
+ 'uploader': 'NBCU-NEWS',
+ },
+ },
+ # UDN embed
+ {
+ 'url': 'https://video.udn.com/news/300346',
+ 'md5': 'fd2060e988c326991037b9aff9df21a6',
+ 'info_dict': {
+ 'id': '300346',
+ 'ext': 'mp4',
+ 'title': '中一中男師變性 全校師生力挺',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to parse JSON Expecting value'],
+ },
+ # Brightcove URL in single quotes
+ {
+ 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
+ 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
+ 'info_dict': {
+ 'id': '4255764656001',
+ 'ext': 'mp4',
+ 'title': 'SN Presents: Russell Martin, World Citizen',
+ 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
+ 'uploader': 'Rogers Sportsnet',
+ 'uploader_id': '1704050871',
+ 'upload_date': '20150525',
+ 'timestamp': 1432570283,
+ },
+ },
+ # OnionStudios embed
+ {
+ 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
+ 'info_dict': {
+ 'id': '2855',
+ 'ext': 'mp4',
+ 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'uploader': 'ClickHole',
+ 'uploader_id': 'clickhole',
+ }
+ },
+ # SnagFilms embed
+ {
+ 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
+ 'info_dict': {
+ 'id': '74849a00-85a9-11e1-9660-123139220831',
+ 'ext': 'mp4',
+ 'title': '#whilewewatch',
+ }
+ },
+ # AdobeTVVideo embed
+ {
+ 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
+ 'md5': '43662b577c018ad707a63766462b1e87',
+ 'info_dict': {
+ 'id': '2456',
+ 'ext': 'mp4',
+ 'title': 'New experience with Acrobat DC',
+ 'description': 'New experience with Acrobat DC',
+ 'duration': 248.667,
+ },
+ },
+ # BrightcoveInPageEmbed embed
+ {
+ 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
+ 'info_dict': {
+ 'id': '4238694884001',
+ 'ext': 'flv',
+ 'title': 'Tabletop: Dread, Last Thoughts',
+ 'description': 'Tabletop: Dread, Last Thoughts',
+ 'duration': 51690,
+ },
+ },
+ # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
+ # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
+ {
+ 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
+ 'info_dict': {
+ 'id': '4785848093001',
+ 'ext': 'mp4',
+ 'title': 'The Cardinal Pell Interview',
+ 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
+ 'uploader': 'GlobeCast Australia - GlobeStream',
+ 'uploader_id': '2733773828001',
+ 'upload_date': '20160304',
+ 'timestamp': 1457083087,
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ },
+ {
+ # Brightcove embed with whitespace around attribute names
+ 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
+ 'info_dict': {
+ 'id': '3167554373001',
+ 'ext': 'mp4',
+ 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
+ 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
+ 'uploader_id': '1079349493',
+ 'upload_date': '20140207',
+ 'timestamp': 1391810548,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # Another form of arte.tv embed
+ {
+ 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
+ 'md5': '850bfe45417ddf221288c88a0cffe2e2',
+ 'info_dict': {
+ 'id': '030273-562_PLUS7-F',
+ 'ext': 'mp4',
+ 'title': 'ARTE Reportage - Nulle part, en France',
+ 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
+ 'upload_date': '20160409',
+ },
+ },
+ # LiveLeak embed
+ {
+ 'url': 'http://www.wykop.pl/link/3088787/',
+ 'md5': '7619da8c820e835bef21a1efa2a0fc71',
+ 'info_dict': {
+ 'id': '874_1459135191',
+ 'ext': 'mp4',
+ 'title': 'Man shows poor quality of new apartment building',
+ 'description': 'The wall is like a sand pile.',
+ 'uploader': 'Lake8737',
+ },
+ 'add_ie': [LiveLeakIE.ie_key()],
+ },
+ # Another LiveLeak embed pattern (#13336)
+ {
+ 'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
+ 'info_dict': {
+ 'id': '2eb_1496309988',
+ 'ext': 'mp4',
+ 'title': 'Thief robs place where everyone was armed',
+ 'description': 'md5:694d73ee79e535953cf2488562288eee',
+ 'uploader': 'brazilwtf',
+ },
+ 'add_ie': [LiveLeakIE.ie_key()],
+ },
+ # Duplicated embedded video URLs
+ {
+ 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
+ 'info_dict': {
+ 'id': '149298443_480_16c25b74_2',
+ 'ext': 'mp4',
+ 'title': 'vs. Blue Orange Spring Game',
+ 'uploader': 'www.hudl.com',
+ },
+ },
+ # twitter:player:stream embed
+ {
+ 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
+ 'info_dict': {
+ 'id': 'master',
+ 'ext': 'mp4',
+ 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
+ 'uploader': 'www.rtl.be',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ },
+ # twitter:player embed
+ {
+ 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
+ 'md5': 'a3e0df96369831de324f0778e126653c',
+ 'info_dict': {
+ 'id': '4909620399001',
+ 'ext': 'mp4',
+ 'title': 'What Do Black Holes Sound Like?',
+ 'description': 'what do black holes sound like',
+ 'upload_date': '20160524',
+ 'uploader_id': '29913724001',
+ 'timestamp': 1464107587,
+ 'uploader': 'TheAtlantic',
+ },
+ 'add_ie': ['BrightcoveLegacy'],
+ },
+ # Facebook <iframe> embed
+ {
+ 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
+ 'md5': 'fbcde74f534176ecb015849146dd3aee',
+ 'info_dict': {
+ 'id': '599637780109885',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #599637780109885',
+ },
+ },
+ # Facebook <iframe> embed, plugin video
+ {
+ 'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
+ 'info_dict': {
+ 'id': '1754168231264132',
+ 'ext': 'mp4',
+ 'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
+ 'uploader': 'Tariq Ramadan (official)',
+ 'timestamp': 1496758379,
+ 'upload_date': '20170606',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # Facebook API embed
+ {
+ 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
+ 'md5': 'a47372ee61b39a7b90287094d447d94e',
+ 'info_dict': {
+ 'id': '10153467542406923',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #10153467542406923',
+ },
+ },
+ # Wordpress "YouTube Video Importer" plugin
+ {
+ 'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
+ 'md5': 'd16797741b560b485194eddda8121b48',
+ 'info_dict': {
+ 'id': 'HNTXWDXV9Is',
+ 'ext': 'mp4',
+ 'title': 'Blue Devils Drumline Stanford lot 2016',
+ 'upload_date': '20160627',
+ 'uploader_id': 'GENOCIDE8GENERAL10',
+ 'uploader': 'cylus cyrus',
+ },
+ },
+ {
+ # video stored on custom kaltura server
+ 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
+ 'md5': '537617d06e64dfed891fa1593c4b30cc',
+ 'info_dict': {
+ 'id': '0_1iotm5bh',
+ 'ext': 'mp4',
+ 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
+ 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
+ 'uploader_id': 'videos.expansion@el-mundo.net',
+ 'upload_date': '20150429',
+ 'timestamp': 1430303472,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Non-standard Vimeo embed
+ 'url': 'https://openclassrooms.com/courses/understanding-the-web',
+ 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
+ 'info_dict': {
+ 'id': '148867247',
+ 'ext': 'mp4',
+ 'title': 'Understanding the web - Teaser',
+ 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
+ 'upload_date': '20151214',
+ 'uploader': 'OpenClassrooms',
+ 'uploader_id': 'openclassrooms',
+ },
+ 'add_ie': ['Vimeo'],
+ },
+ {
+ # generic vimeo embed that requires original URL passed as Referer
+ 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
+ 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
+ 'info_dict': {
+ 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny',
+ 'description': 'Royalty free test video',
+ 'timestamp': 1432816365,
+ 'upload_date': '20150528',
+ 'is_live': False,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [ArkenaIE.ie_key()],
+ },
+ {
+ 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
+ 'info_dict': {
+ 'id': '1c7141f46c',
+ 'ext': 'mp4',
+ 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [Vbox7IE.ie_key()],
+ },
+ {
+ # DBTV embeds
+ 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
+ 'info_dict': {
+ 'id': '43254897',
+ 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
+ },
+ 'playlist_mincount': 3,
+ },
+ {
+ # Videa embeds
+ 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
+ 'info_dict': {
+ 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
+ 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
+ },
+ 'playlist_mincount': 2,
+ },
+ {
+ # 20 minuten embed
+ 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
+ 'info_dict': {
+ 'id': '523629',
+ 'ext': 'mp4',
+ 'title': 'So kommen Sie bei Eis und Schnee sicher an',
+ 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [TwentyMinutenIE.ie_key()],
+ },
+ {
+ # VideoPress embed
+ 'url': 'https://en.support.wordpress.com/videopress/',
+ 'info_dict': {
+ 'id': 'OcobLTqC',
+ 'ext': 'm4v',
+ 'title': 'IMG_5786',
+ 'timestamp': 1435711927,
+ 'upload_date': '20150701',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [VideoPressIE.ie_key()],
+ },
+ {
+ # Rutube embed
+ 'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
+ 'info_dict': {
+ 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
+ 'ext': 'flv',
+ 'title': 'Магаззино: Казань 2',
+ 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
+ 'uploader': 'Магаззино',
+ 'upload_date': '20170228',
+ 'uploader_id': '996642',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [RutubeIE.ie_key()],
+ },
+ {
+ # ThePlatform embedded with whitespaces in URLs
+ 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
+ 'only_matching': True,
+ },
+ {
+ # Senate ISVP iframe https
+ 'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
+ 'md5': 'fb8c70b0b515e5037981a2492099aab8',
+ 'info_dict': {
+ 'id': 'govtaff020316',
+ 'ext': 'mp4',
+ 'title': 'Integrated Senate Video Player',
+ },
+ 'add_ie': [SenateISVPIE.ie_key()],
+ },
+ {
+ # Limelight embeds (1 channel embed + 4 media embeds)
+ 'url': 'http://www.sedona.com/FacilitatorTraining2017',
+ 'info_dict': {
+ 'id': 'FacilitatorTraining2017',
+ 'title': 'Facilitator Training 2017',
+ },
+ 'playlist_mincount': 5,
+ },
+ {
+ # Limelight embed (LimelightPlayerUtil.embed)
+ 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
+ 'info_dict': {
+ 'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
+ 'ext': 'mp4',
+ 'title': '07448641',
+ 'timestamp': 1499890639,
+ 'upload_date': '20170712',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['LimelightMedia'],
+ },
+ {
+ 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
+ 'info_dict': {
+ 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
+ 'title': 'Standoff with Walnut Creek murder suspect ends',
+ 'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
+ },
+ 'playlist_mincount': 4,
+ },
+ {
+ # WashingtonPost embed
+ 'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
+ 'info_dict': {
+ 'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
+ 'ext': 'mp4',
+ 'title': "No one has seen the drama series based on Trump's life \u2014 until now",
+ 'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
+ 'timestamp': 1455216756,
+ 'uploader': 'The Washington Post',
+ 'upload_date': '20160211',
+ },
+ 'add_ie': [WashingtonPostIE.ie_key()],
+ },
+ {
+ # Mediaset embed
+ 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
+ 'info_dict': {
+ 'id': '720642',
+ 'ext': 'mp4',
+ 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [MediasetIE.ie_key()],
+ },
+ {
+ # JOJ.sk embeds
+ 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
+ 'info_dict': {
+ 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
+ 'title': 'Slovenskom sa prehnala vlna silných búrok',
+ },
+ 'playlist_mincount': 5,
+ 'add_ie': [JojIE.ie_key()],
+ },
+ {
+ # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
+ 'url': 'https://tvrain.ru/amp/418921/',
+ 'md5': 'cc00413936695987e8de148b67d14f1d',
+ 'info_dict': {
+ 'id': '418921',
+ 'ext': 'mp4',
+ 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
+ },
+ },
+ {
+ # vzaar embed
+ 'url': 'http://help.vzaar.com/article/165-embedding-video',
+ 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
+ 'info_dict': {
+ 'id': '8707641',
+ 'ext': 'mp4',
+ 'title': 'Building A Business Online: Principal Chairs Q & A',
+ },
+ },
+ {
+ # multiple HTML5 videos on one page
+ 'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
+ 'info_dict': {
+ 'id': 'keyscenarios',
+ 'title': 'Rescue Kit 14 Free Edition - Getting started',
+ },
+ 'playlist_count': 4,
+ },
+ {
+ # vshare embed
+ 'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
+ 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
+ 'info_dict': {
+ 'id': '0f64ce6',
+ 'title': 'vl14062007715967',
+ 'ext': 'mp4',
+ }
+ },
+ {
+ 'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
+ 'md5': 'aecd089f55b1cb5a59032cb049d3a356',
+ 'info_dict': {
+ 'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
+ 'ext': 'mp4',
+ 'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
+ 'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
+ 'timestamp': 1474354800,
+ 'upload_date': '20160920',
+ }
+ },
+ {
+ 'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
+ 'info_dict': {
+ 'id': '1731611',
+ 'ext': 'mp4',
+ 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
+ 'description': 'md5:eb5f23826a027ba95277d105f248b825',
+ 'timestamp': 1516100691,
+ 'upload_date': '20180116',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [SpringboardPlatformIE.ie_key()],
+ },
+ {
+ 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
+ 'info_dict': {
+ 'id': 'uPDB5I9wfp8',
+ 'ext': 'webm',
+ 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
+ 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
+ 'upload_date': '20160219',
+ 'uploader': 'Pocoyo - Português (BR)',
+ 'uploader_id': 'PocoyoBrazil',
+ },
+ 'add_ie': [YoutubeIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
+ 'info_dict': {
+ 'id': 'vMDE4NzI1Mjgt690b',
+ 'ext': 'mp4',
+ 'title': 'Котята',
+ },
+ 'add_ie': [YapFilesIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # CloudflareStream embed
+ 'url': 'https://www.cloudflare.com/products/cloudflare-stream/',
+ 'info_dict': {
+ 'id': '31c9291ab41fac05471db4e73aa11717',
+ 'ext': 'mp4',
+ 'title': '31c9291ab41fac05471db4e73aa11717',
+ },
+ 'add_ie': [CloudflareStreamIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # PeerTube embed
+ 'url': 'https://joinpeertube.org/fr/home/',
+ 'info_dict': {
+ 'id': 'home',
+ 'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
+ },
+ 'playlist_count': 2,
+ },
+ {
+ # Indavideo embed
+ 'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
+ 'info_dict': {
+ 'id': '1693903',
+ 'ext': 'mp4',
+ 'title': 'Így kell otthon hamburgert sütni',
+ 'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
+ 'timestamp': 1426330212,
+ 'upload_date': '20150314',
+ 'uploader': 'StreetKitchen',
+ 'uploader_id': '546363',
+ },
+ 'add_ie': [IndavideoEmbedIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # APA embed via JWPlatform embed
+ 'url': 'http://www.vol.at/blue-man-group/5593454',
+ 'info_dict': {
+ 'id': 'jjv85FdZ',
+ 'ext': 'mp4',
+ 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 254,
+ 'timestamp': 1519211149,
+ 'upload_date': '20180221',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://share-videos.se/auto/video/83645793?uid=13',
+ 'md5': 'b68d276de422ab07ee1d49388103f457',
+ 'info_dict': {
+ 'id': '83645793',
+ 'title': 'Lock up and get excited',
+ 'ext': 'mp4'
+ },
+ 'skip': 'TODO: fix nested playlists processing in tests',
+ },
+ # {
+ # # TODO: find another test
+ # # http://schema.org/VideoObject
+ # 'url': 'https://flipagram.com/f/nyvTSJMKId',
+ # 'md5': '888dcf08b7ea671381f00fab74692755',
+ # 'info_dict': {
+ # 'id': 'nyvTSJMKId',
+ # 'ext': 'mp4',
+ # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
+ # 'description': '#love for cats.',
+ # 'timestamp': 1461244995,
+ # 'upload_date': '20160421',
+ # },
+ # 'params': {
+ # 'force_generic_extractor': True,
+ # },
+ # }
+ ]
+
+ def report_following_redirect(self, new_url):
+ """Report information extraction."""
+ self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
+
+ def _extract_rss(self, url, video_id, doc):
+ playlist_title = doc.find('./channel/title').text
+ playlist_desc_el = doc.find('./channel/description')
+ playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
+
+ entries = []
+ for it in doc.findall('./channel/item'):
+ next_url = None
+ enclosure_nodes = it.findall('./enclosure')
+ for e in enclosure_nodes:
+ next_url = e.attrib.get('url')
+ if next_url:
+ break
+
+ if not next_url:
+ next_url = xpath_text(it, 'link', fatal=False)
+
+ if not next_url:
+ continue
+
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': next_url,
+ 'title': it.find('title').text,
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': url,
+ 'title': playlist_title,
+ 'description': playlist_desc,
+ 'entries': entries,
+ }
+
+ def _extract_camtasia(self, url, video_id, webpage):
+ """ Returns None if no camtasia video can be found. """
+
+ camtasia_cfg = self._search_regex(
+ r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
+ webpage, 'camtasia configuration file', default=None)
+ if camtasia_cfg is None:
+ return None
+
+ title = self._html_search_meta('DC.title', webpage, fatal=True)
+
+ camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
+ camtasia_cfg = self._download_xml(
+ camtasia_url, video_id,
+ note='Downloading camtasia configuration',
+ errnote='Failed to download camtasia configuration')
+ fileset_node = camtasia_cfg.find('./playlist/array/fileset')
+
+ entries = []
+ for n in fileset_node.getchildren():
+ url_n = n.find('./uri')
+ if url_n is None:
+ continue
+
+ entries.append({
+ 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
+ 'title': '%s - %s' % (title, n.tag),
+ 'url': compat_urlparse.urljoin(url, url_n.text),
+ 'duration': float_or_none(n.find('./duration').text),
+ })
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'title': title,
+ }
+
+ def _real_extract(self, url):
+ if url.startswith('//'):
+ return {
+ '_type': 'url',
+ 'url': self.http_scheme() + url,
+ }
+
+ parsed_url = compat_urlparse.urlparse(url)
+ if not parsed_url.scheme:
+ default_search = self._downloader.params.get('default_search')
+ if default_search is None:
+ default_search = 'fixup_error'
+
+ if default_search in ('auto', 'auto_warning', 'fixup_error'):
+ if '/' in url:
+ self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
+ return self.url_result('http://' + url)
+ elif default_search != 'fixup_error':
+ if default_search == 'auto_warning':
+ if re.match(r'^(?:url|URL)$', url):
+ raise ExtractorError(
+ 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
+ expected=True)
+ else:
+ self._downloader.report_warning(
+ 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
+ return self.url_result('ytsearch:' + url)
+
+ if default_search in ('error', 'fixup_error'):
+ raise ExtractorError(
+ '%r is not a valid URL. '
+ 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
+ % (url, url), expected=True)
+ else:
+ if ':' not in default_search:
+ default_search += ':'
+ return self.url_result(default_search + url)
+
+ url, smuggled_data = unsmuggle_url(url)
+ force_videoid = None
+ is_intentional = smuggled_data and smuggled_data.get('to_generic')
+ if smuggled_data and 'force_videoid' in smuggled_data:
+ force_videoid = smuggled_data['force_videoid']
+ video_id = force_videoid
+ else:
+ video_id = self._generic_id(url)
+
+ self.to_screen('%s: Requesting header' % video_id)
+
+ head_req = HEADRequest(url)
+ head_response = self._request_webpage(
+ head_req, video_id,
+ note=False, errnote='Could not send HEAD request to %s' % url,
+ fatal=False)
+
+ if head_response is not False:
+ # Check for redirect
+ new_url = compat_str(head_response.geturl())
+ if url != new_url:
+ self.report_following_redirect(new_url)
+ if force_videoid:
+ new_url = smuggle_url(
+ new_url, {'force_videoid': force_videoid})
+ return self.url_result(new_url)
+
+ full_response = None
+ if head_response is False:
+ request = sanitized_Request(url)
+ request.add_header('Accept-Encoding', '*')
+ full_response = self._request_webpage(request, video_id)
+ head_response = full_response
+
+ info_dict = {
+ 'id': video_id,
+ 'title': self._generic_title(url),
+ 'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
+ }
+
+ # Check for direct link to a video
+ content_type = head_response.headers.get('Content-Type', '').lower()
+ m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
+ if m:
+ format_id = compat_str(m.group('format_id'))
+ if format_id.endswith('mpegurl'):
+ formats = self._extract_m3u8_formats(url, video_id, 'mp4')
+ elif format_id == 'f4m':
+ formats = self._extract_f4m_formats(url, video_id)
+ else:
+ formats = [{
+ 'format_id': format_id,
+ 'url': url,
+ 'vcodec': 'none' if m.group('type') == 'audio' else None
+ }]
+ info_dict['direct'] = True
+ self._sort_formats(formats)
+ info_dict['formats'] = formats
+ return info_dict
+
+ if not self._downloader.params.get('test', False) and not is_intentional:
+ force = self._downloader.params.get('force_generic_extractor', False)
+ self._downloader.report_warning(
+ '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
+
+ if not full_response:
+ request = sanitized_Request(url)
+ # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
+ # making it impossible to download only chunk of the file (yet we need only 512kB to
+ # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
+ # that will always result in downloading the whole file that is not desirable.
+ # Therefore for extraction pass we have to override Accept-Encoding to any in order
+ # to accept raw bytes and being able to download only a chunk.
+ # It may probably better to solve this by checking Content-Type for application/octet-stream
+ # after HEAD request finishes, but not sure if we can rely on this.
+ request.add_header('Accept-Encoding', '*')
+ full_response = self._request_webpage(request, video_id)
+
+ first_bytes = full_response.read(512)
+
+ # Is it an M3U playlist?
+ if first_bytes.startswith(b'#EXTM3U'):
+ info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
+ self._sort_formats(info_dict['formats'])
+ return info_dict
+
+ # Maybe it's a direct link to a video?
+ # Be careful not to download the whole thing!
+ if not is_html(first_bytes):
+ self._downloader.report_warning(
+ 'URL could be a direct video link, returning it as such.')
+ info_dict.update({
+ 'direct': True,
+ 'url': url,
+ })
+ return info_dict
+
+ webpage = self._webpage_read_content(
+ full_response, url, video_id, prefix=first_bytes)
+
+ self.report_extraction(video_id)
+
+ # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
+ try:
+ doc = compat_etree_fromstring(webpage.encode('utf-8'))
+ if doc.tag == 'rss':
+ return self._extract_rss(url, video_id, doc)
+ elif doc.tag == 'SmoothStreamingMedia':
+ info_dict['formats'] = self._parse_ism_formats(doc, url)
+ self._sort_formats(info_dict['formats'])
+ return info_dict
+ elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
+ smil = self._parse_smil(doc, url, video_id)
+ self._sort_formats(smil['formats'])
+ return smil
+ elif doc.tag == '{http://xspf.org/ns/0/}playlist':
+ return self.playlist_result(
+ self._parse_xspf(
+ doc, video_id, xspf_url=url,
+ xspf_base_url=compat_str(full_response.geturl())),
+ video_id)
+ elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
+ info_dict['formats'] = self._parse_mpd_formats(
+ doc,
+ mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
+ mpd_url=url)
+ self._sort_formats(info_dict['formats'])
+ return info_dict
+ elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
+ info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
+ self._sort_formats(info_dict['formats'])
+ return info_dict
+ except compat_xml_parse_error:
+ pass
+
+ # Is it a Camtasia project?
+ camtasia_res = self._extract_camtasia(url, video_id, webpage)
+ if camtasia_res is not None:
+ return camtasia_res
+
+ # Sometimes embedded video player is hidden behind percent encoding
+ # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
+ # Unescaping the whole page allows to handle those cases in a generic way
+ webpage = compat_urllib_parse_unquote(webpage)
+
+ # it's tempting to parse this further, but you would
+ # have to take into account all the variations like
+ # Video Title - Site Name
+ # Site Name | Video Title
+ # Video Title - Tagline | Site Name
+ # and so on and so forth; it's just not practical
+ video_title = self._og_search_title(
+ webpage, default=None) or self._html_search_regex(
+ r'(?s)<title>(.*?)</title>', webpage, 'video title',
+ default='video')
+
+ # Try to detect age limit automatically
+ age_limit = self._rta_search(webpage)
+ # And then there are the jokers who advertise that they use RTA,
+ # but actually don't.
+ AGE_LIMIT_MARKERS = [
+ r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
+ ]
+ if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
+ age_limit = 18
+
+ # video uploader is domain name
+ video_uploader = self._search_regex(
+ r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
+
+ video_description = self._og_search_description(webpage, default=None)
+ video_thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+ info_dict.update({
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'age_limit': age_limit,
+ })
+
+ # Look for Brightcove Legacy Studio embeds
+ bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
+ if bc_urls:
+ entries = [{
+ '_type': 'url',
+ 'url': smuggle_url(bc_url, {'Referer': url}),
+ 'ie_key': 'BrightcoveLegacy'
+ } for bc_url in bc_urls]
+
+ return {
+ '_type': 'playlist',
+ 'title': video_title,
+ 'id': video_id,
+ 'entries': entries,
+ }
+
+ # Look for Brightcove New Studio embeds
+ bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
+ if bc_urls:
+ return self.playlist_from_matches(
+ bc_urls, video_id, video_title,
+ getter=lambda x: smuggle_url(x, {'referrer': url}),
+ ie='BrightcoveNew')
+
+ # Look for Nexx embeds
+ nexx_urls = NexxIE._extract_urls(webpage)
+ if nexx_urls:
+ return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
+
+ # Look for Nexx iFrame embeds
+ nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
+ if nexx_embed_urls:
+ return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
+
+ # Look for ThePlatform embeds
+ tp_urls = ThePlatformIE._extract_urls(webpage)
+ if tp_urls:
+ return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
+
+ # Look for Vessel embeds
+ vessel_urls = VesselIE._extract_urls(webpage)
+ if vessel_urls:
+ return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
+
+ # Look for embedded rtl.nl player
+ matches = re.findall(
+ r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
+ webpage)
+ if matches:
+ return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
+
+ vimeo_urls = VimeoIE._extract_urls(url, webpage)
+ if vimeo_urls:
+ return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
+
+ vid_me_embed_url = self._search_regex(
+ r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
+ webpage, 'vid.me embed', default=None)
+ if vid_me_embed_url is not None:
+ return self.url_result(vid_me_embed_url, 'Vidme')
+
+ # Look for YouTube embeds
+ youtube_urls = YoutubeIE._extract_urls(webpage)
+ if youtube_urls:
+ return self.playlist_from_matches(
+ youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
+
+ matches = DailymotionIE._extract_urls(webpage)
+ if matches:
+ return self.playlist_from_matches(matches, video_id, video_title)
+
+ # Look for embedded Dailymotion playlist player (#3822)
+ m = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
+ if m:
+ playlists = re.findall(
+ r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
+ if playlists:
+ return self.playlist_from_matches(
+ playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
+
+ # Look for DailyMail embeds
+ dailymail_urls = DailyMailIE._extract_urls(webpage)
+ if dailymail_urls:
+ return self.playlist_from_matches(
+ dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
+
+ # Look for embedded Wistia player
+ wistia_url = WistiaIE._extract_url(webpage)
+ if wistia_url:
+ return {
+ '_type': 'url_transparent',
+ 'url': self._proto_relative_url(wistia_url),
+ 'ie_key': WistiaIE.ie_key(),
+ 'uploader': video_uploader,
+ }
+
+ # Look for SVT player
+ svt_url = SVTIE._extract_url(webpage)
+ if svt_url:
+ return self.url_result(svt_url, 'SVT')
+
+ # Look for Bandcamp pages with custom domain
+ mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
+ if mobj is not None:
+ burl = unescapeHTML(mobj.group(1))
+ # Don't set the extractor because it can be a track url or an album
+ return self.url_result(burl)
+
+ # Look for embedded Vevo player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for embedded Viddler player
+ mobj = re.search(
+ r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for NYTimes player
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for Libsyn player
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for Ooyala videos
+ mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
+ re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
+ re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
+ re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
+ re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
+ if mobj is not None:
+ embed_token = self._search_regex(
+ r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
+ webpage, 'ooyala embed token', default=None)
+ return OoyalaIE._build_url_result(smuggle_url(
+ mobj.group('ec'), {
+ 'domain': url,
+ 'embed_token': embed_token,
+ }))
+
+ # Look for multiple Ooyala embeds on SBN network websites
+ mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
+ if mobj is not None:
+ embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
+ if embeds:
+ return self.playlist_from_matches(
+ embeds, video_id, video_title,
+ getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
+
+ # Look for Aparat videos
+ mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group(1), 'Aparat')
+
+ # Look for MPORA videos
+ mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group(1), 'Mpora')
+
+ # Look for embedded NovaMov-based player
+ mobj = re.search(
+ r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
+ (?P<url>http://(?:(?:embed|www)\.)?
+ (?:novamov\.com|
+ nowvideo\.(?:ch|sx|eu|at|ag|co)|
+ videoweed\.(?:es|com)|
+ movshare\.(?:net|sx|ag)|
+ divxstage\.(?:eu|net|ch|co|at|ag))
+ /embed\.php.+?)\1''', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for embedded Facebook player
+ facebook_urls = FacebookIE._extract_urls(webpage)
+ if facebook_urls:
+ return self.playlist_from_matches(facebook_urls, video_id, video_title)
+
+ # Look for embedded VK player
+ mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'VK')
+
+ # Look for embedded Odnoklassniki player
+ mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Odnoklassniki')
+
+ # Look for embedded ivi player
+ mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Ivi')
+
+ # Look for embedded Huffington Post player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'HuffPost')
+
+ # Look for embed.ly
+ mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+ mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
+ if mobj is not None:
+ return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
+
+ # Look for funnyordie embed
+ matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
+ if matches:
+ return self.playlist_from_matches(
+ matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
+
+ # Look for BBC iPlayer embed
+ matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
+ if matches:
+ return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
+
+ # Look for embedded RUTV player
+ rutv_url = RUTVIE._extract_url(webpage)
+ if rutv_url:
+ return self.url_result(rutv_url, 'RUTV')
+
+ # Look for embedded TVC player
+ tvc_url = TVCIE._extract_url(webpage)
+ if tvc_url:
+ return self.url_result(tvc_url, 'TVC')
+
+ # Look for embedded SportBox player
+ sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
+ if sportbox_urls:
+ return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
+
+ # Look for embedded XHamster player
+ xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
+ if xhamster_urls:
+ return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
+
+ # Look for embedded TNAFlixNetwork player
+ tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
+ if tnaflix_urls:
+ return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
+
+ # Look for embedded PornHub player
+ pornhub_urls = PornHubIE._extract_urls(webpage)
+ if pornhub_urls:
+ return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
+
+ # Look for embedded DrTuber player
+ drtuber_urls = DrTuberIE._extract_urls(webpage)
+ if drtuber_urls:
+ return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
+
+ # Look for embedded RedTube player
+ redtube_urls = RedTubeIE._extract_urls(webpage)
+ if redtube_urls:
+ return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
+
+ # Look for embedded Tube8 player
+ tube8_urls = Tube8IE._extract_urls(webpage)
+ if tube8_urls:
+ return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
+
+ # Look for embedded Tvigle player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Tvigle')
+
+ # Look for embedded TED player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'TED')
+
+ # Look for embedded Ustream videos
+ ustream_url = UstreamIE._extract_url(webpage)
+ if ustream_url:
+ return self.url_result(ustream_url, UstreamIE.ie_key())
+
+ # Look for embedded arte.tv player
+ mobj = re.search(
+ r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'ArteTVEmbed')
+
+ # Look for embedded francetv player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for embedded smotri.com player
+ smotri_url = SmotriIE._extract_url(webpage)
+ if smotri_url:
+ return self.url_result(smotri_url, 'Smotri')
+
+ # Look for embedded Myvi.ru player
+ myvi_url = MyviIE._extract_url(webpage)
+ if myvi_url:
+ return self.url_result(myvi_url)
+
+ # Look for embedded soundcloud player
+ soundcloud_urls = SoundcloudIE._extract_urls(webpage)
+ if soundcloud_urls:
+ return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
+
+ # Look for tunein player
+ tunein_urls = TuneInBaseIE._extract_urls(webpage)
+ if tunein_urls:
+ return self.playlist_from_matches(tunein_urls, video_id, video_title)
+
+ # Look for embedded mtvservices player
+ mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
+ if mtvservices_url:
+ return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
+
+ # Look for embedded yahoo player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Yahoo')
+
+ # Look for embedded sbs.com.au player
+ mobj = re.search(
+ r'''(?x)
+ (?:
+ <meta\s+property="og:video"\s+content=|
+ <iframe[^>]+?src=
+ )
+ (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'SBS')
+
+ # Look for embedded Cinchcast player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Cinchcast')
+
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
+ webpage)
+ if not mobj:
+ mobj = re.search(
+ r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'MLB')
+
+ mobj = re.search(
+ r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
+ webpage)
+ if mobj is not None:
+ return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
+
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Livestream')
+
+ # Look for Zapiks embed
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Zapiks')
+
+ # Look for Kaltura embeds
+ kaltura_url = KalturaIE._extract_url(webpage)
+ if kaltura_url:
+ return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
+
+ # Look for EaglePlatform embeds
+ eagleplatform_url = EaglePlatformIE._extract_url(webpage)
+ if eagleplatform_url:
+ return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
+
+ # Look for ClipYou (uses EaglePlatform) embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
+ if mobj is not None:
+ return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
+
+ # Look for Pladform embeds
+ pladform_url = PladformIE._extract_url(webpage)
+ if pladform_url:
+ return self.url_result(pladform_url)
+
+ # Look for Videomore embeds
+ videomore_url = VideomoreIE._extract_url(webpage)
+ if videomore_url:
+ return self.url_result(videomore_url)
+
+ # Look for Webcaster embeds
+ webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
+ if webcaster_url:
+ return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
+
+ # Look for Playwire embeds
+ mobj = re.search(
+ r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for 5min embeds
+ mobj = re.search(
+ r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
+ if mobj is not None:
+ return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
+
+ # Look for Crooks and Liars embeds
+ mobj = re.search(
+ r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for NBC Sports VPlayer embeds
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+
+ # Look for NBC News embeds
+ nbc_news_embed_url = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
+ if nbc_news_embed_url:
+ return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
+
+ # Look for Google Drive embeds
+ google_drive_url = GoogleDriveIE._extract_url(webpage)
+ if google_drive_url:
+ return self.url_result(google_drive_url, 'GoogleDrive')
+
+ # Look for UDN embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
+ if mobj is not None:
+ return self.url_result(
+ compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
+
+ # Look for Senate ISVP iframe
+ senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
+ if senate_isvp_url:
+ return self.url_result(senate_isvp_url, 'SenateISVP')
+
+ # Look for OnionStudios embeds
+ onionstudios_url = OnionStudiosIE._extract_url(webpage)
+ if onionstudios_url:
+ return self.url_result(onionstudios_url)
+
+ # Look for ViewLift embeds
+ viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
+ if viewlift_url:
+ return self.url_result(viewlift_url)
+
+ # Look for JWPlatform embeds
+ jwplatform_urls = JWPlatformIE._extract_urls(webpage)
+ if jwplatform_urls:
+ return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
+
+ # Look for Digiteka embeds
+ digiteka_url = DigitekaIE._extract_url(webpage)
+ if digiteka_url:
+ return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
+
+ # Look for Arkena embeds
+ arkena_url = ArkenaIE._extract_url(webpage)
+ if arkena_url:
+ return self.url_result(arkena_url, ArkenaIE.ie_key())
+
+ # Look for Piksel embeds
+ piksel_url = PikselIE._extract_url(webpage)
+ if piksel_url:
+ return self.url_result(piksel_url, PikselIE.ie_key())
+
+ # Look for Limelight embeds
+ limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
+ if limelight_urls:
+ return self.playlist_result(
+ limelight_urls, video_id, video_title, video_description)
+
+ # Look for Anvato embeds
+ anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
+ if anvato_urls:
+ return self.playlist_result(
+ anvato_urls, video_id, video_title, video_description)
+
+ # Look for AdobeTVVideo embeds
+ mobj = re.search(
+ r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
+ webpage)
+ if mobj is not None:
+ return self.url_result(
+ self._proto_relative_url(unescapeHTML(mobj.group(1))),
+ 'AdobeTVVideo')
+
+ # Look for Vine embeds
+ mobj = re.search(
+ r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
+ webpage)
+ if mobj is not None:
+ return self.url_result(
+ self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
+
+ # Look for VODPlatform embeds
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(
+ self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
+
+ # Look for Mangomolo embeds
+ mobj = re.search(
+ r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
+ (?:
+ video\?.*?\bid=(?P<video_id>\d+)|
+ index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
+ ).+?)\1''', webpage)
+ if mobj is not None:
+ info = {
+ '_type': 'url_transparent',
+ 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'uploader': video_uploader,
+ }
+ video_id = mobj.group('video_id')
+ if video_id:
+ info.update({
+ 'ie_key': 'MangomoloVideo',
+ 'id': video_id,
+ })
+ else:
+ info.update({
+ 'ie_key': 'MangomoloLive',
+ 'id': mobj.group('channel_id'),
+ })
+ return info
+
+ # Look for Instagram embeds
+ instagram_embed_url = InstagramIE._extract_embed_url(webpage)
+ if instagram_embed_url is not None:
+ return self.url_result(
+ self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
+
+ # Look for LiveLeak embeds
+ liveleak_urls = LiveLeakIE._extract_urls(webpage)
+ if liveleak_urls:
+ return self.playlist_from_matches(liveleak_urls, video_id, video_title)
+
+ # Look for 3Q SDN embeds
+ threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
+ if threeqsdn_url:
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': ThreeQSDNIE.ie_key(),
+ 'url': self._proto_relative_url(threeqsdn_url),
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'uploader': video_uploader,
+ }
+
+ # Look for VBOX7 embeds
+ vbox7_url = Vbox7IE._extract_url(webpage)
+ if vbox7_url:
+ return self.url_result(vbox7_url, Vbox7IE.ie_key())
+
+ # Look for DBTV embeds
+ dbtv_urls = DBTVIE._extract_urls(webpage)
+ if dbtv_urls:
+ return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
+
+ # Look for Videa embeds
+ videa_urls = VideaIE._extract_urls(webpage)
+ if videa_urls:
+ return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
+
+ # Look for 20 minuten embeds
+ twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
+ if twentymin_urls:
+ return self.playlist_from_matches(
+ twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
+
+ # Look for Openload embeds
+ openload_urls = OpenloadIE._extract_urls(webpage)
+ if openload_urls:
+ return self.playlist_from_matches(
+ openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
+
+ # Look for VideoPress embeds
+ videopress_urls = VideoPressIE._extract_urls(webpage)
+ if videopress_urls:
+ return self.playlist_from_matches(
+ videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
+
+ # Look for Rutube embeds
+ rutube_urls = RutubeIE._extract_urls(webpage)
+ if rutube_urls:
+ return self.playlist_from_matches(
+ rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
+
+ # Look for WashingtonPost embeds
+ wapo_urls = WashingtonPostIE._extract_urls(webpage)
+ if wapo_urls:
+ return self.playlist_from_matches(
+ wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
+
+ # Look for Mediaset embeds
+ mediaset_urls = MediasetIE._extract_urls(webpage)
+ if mediaset_urls:
+ return self.playlist_from_matches(
+ mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
+
+ # Look for JOJ.sk embeds
+ joj_urls = JojIE._extract_urls(webpage)
+ if joj_urls:
+ return self.playlist_from_matches(
+ joj_urls, video_id, video_title, ie=JojIE.ie_key())
+
+ # Look for megaphone.fm embeds
+ mpfn_urls = MegaphoneIE._extract_urls(webpage)
+ if mpfn_urls:
+ return self.playlist_from_matches(
+ mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
+
+ # Look for vzaar embeds
+ vzaar_urls = VzaarIE._extract_urls(webpage)
+ if vzaar_urls:
+ return self.playlist_from_matches(
+ vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
+
+ channel9_urls = Channel9IE._extract_urls(webpage)
+ if channel9_urls:
+ return self.playlist_from_matches(
+ channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
+
+ vshare_urls = VShareIE._extract_urls(webpage)
+ if vshare_urls:
+ return self.playlist_from_matches(
+ vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
+
+ # Look for Mediasite embeds
+ mediasite_urls = MediasiteIE._extract_urls(webpage)
+ if mediasite_urls:
+ entries = [
+ self.url_result(smuggle_url(
+ compat_urlparse.urljoin(url, mediasite_url),
+ {'UrlReferrer': url}), ie=MediasiteIE.ie_key())
+ for mediasite_url in mediasite_urls]
+ return self.playlist_result(entries, video_id, video_title)
+
+ springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
+ if springboardplatform_urls:
+ return self.playlist_from_matches(
+ springboardplatform_urls, video_id, video_title,
+ ie=SpringboardPlatformIE.ie_key())
+
+ yapfiles_urls = YapFilesIE._extract_urls(webpage)
+ if yapfiles_urls:
+ return self.playlist_from_matches(
+ yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
+
+ vice_urls = ViceIE._extract_urls(webpage)
+ if vice_urls:
+ return self.playlist_from_matches(
+ vice_urls, video_id, video_title, ie=ViceIE.ie_key())
+
+ xfileshare_urls = XFileShareIE._extract_urls(webpage)
+ if xfileshare_urls:
+ return self.playlist_from_matches(
+ xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
+
+ cloudflarestream_urls = CloudflareStreamIE._extract_urls(webpage)
+ if cloudflarestream_urls:
+ return self.playlist_from_matches(
+ cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
+
+ peertube_urls = PeerTubeIE._extract_urls(webpage, url)
+ if peertube_urls:
+ return self.playlist_from_matches(
+ peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
+
+ indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
+ if indavideo_urls:
+ return self.playlist_from_matches(
+ indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
+
+ apa_urls = APAIE._extract_urls(webpage)
+ if apa_urls:
+ return self.playlist_from_matches(
+ apa_urls, video_id, video_title, ie=APAIE.ie_key())
+
+ foxnews_urls = FoxNewsIE._extract_urls(webpage)
+ if foxnews_urls:
+ return self.playlist_from_matches(
+ foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
+
+ sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
+ r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
+ webpage)]
+ if sharevideos_urls:
+ return self.playlist_from_matches(
+ sharevideos_urls, video_id, video_title)
+
+ # Look for HTML5 media
+ entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
+ if entries:
+ if len(entries) == 1:
+ entries[0].update({
+ 'id': video_id,
+ 'title': video_title,
+ })
+ else:
+ for num, entry in enumerate(entries, start=1):
+ entry.update({
+ 'id': '%s-%s' % (video_id, num),
+ 'title': '%s (%d)' % (video_title, num),
+ })
+ for entry in entries:
+ self._sort_formats(entry['formats'])
+ return self.playlist_result(entries, video_id, video_title)
+
+ jwplayer_data = self._find_jwplayer_data(
+ webpage, video_id, transform_source=js_to_json)
+ if jwplayer_data:
+ info = self._parse_jwplayer_data(
+ jwplayer_data, video_id, require_title=False, base_url=url)
+ return merge_dicts(info, info_dict)
+
+ # Video.js embed
+ mobj = re.search(
+ r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
+ webpage)
+ if mobj is not None:
+ sources = self._parse_json(
+ mobj.group(1), video_id, transform_source=js_to_json,
+ fatal=False) or []
+ if not isinstance(sources, list):
+ sources = [sources]
+ formats = []
+ for source in sources:
+ src = source.get('src')
+ if not src or not isinstance(src, compat_str):
+ continue
+ src = compat_urlparse.urljoin(url, src)
+ src_type = source.get('type')
+ if isinstance(src_type, compat_str):
+ src_type = src_type.lower()
+ ext = determine_ext(src).lower()
+ if src_type == 'video/youtube':
+ return self.url_result(src, YoutubeIE.ie_key())
+ if src_type == 'application/dash+xml' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id='dash', fatal=False))
+ elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ 'ext': (mimetype2ext(src_type) or
+ ext if ext in KNOWN_EXTENSIONS else 'mp4'),
+ })
+ if formats:
+ self._sort_formats(formats)
+ info_dict['formats'] = formats
+ return info_dict
+
+ # Looking for http://schema.org/VideoObject
+ json_ld = self._search_json_ld(
+ webpage, video_id, default={}, expected_type='VideoObject')
+ if json_ld.get('url'):
+ return merge_dicts(json_ld, info_dict)
+
+ def check_video(vurl):
+ if YoutubeIE.suitable(vurl):
+ return True
+ if RtmpIE.suitable(vurl):
+ return True
+ vpath = compat_urlparse.urlparse(vurl).path
+ vext = determine_ext(vpath)
+ return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
+
+ def filter_video(urls):
+ return list(filter(check_video, urls))
+
+ # Start with something easy: JW Player in SWFObject
+ found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
+ if not found:
+ # Look for gorilla-vid style embedding
+ found = filter_video(re.findall(r'''(?sx)
+ (?:
+ jw_plugins|
+ JWPlayerOptions|
+ jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
+ )
+ .*?
+ ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
+ if not found:
+ # Broaden the search a little bit
+ found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
+ if not found:
+ # Broaden the findall a little bit: JWPlayer JS loader
+ found = filter_video(re.findall(
+ r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
+ if not found:
+ # Flow player
+ found = filter_video(re.findall(r'''(?xs)
+ flowplayer\("[^"]+",\s*
+ \{[^}]+?\}\s*,
+ \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
+ ["']?url["']?\s*:\s*["']([^"']+)["']
+ ''', webpage))
+ if not found:
+ # Cinerama player
+ found = re.findall(
+ r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
+ if not found:
+ # Try to find twitter cards info
+ # twitter:player:stream should be checked before twitter:player since
+ # it is expected to contain a raw stream (see
+ # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
+ found = filter_video(re.findall(
+ r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
+ if not found:
+ # We look for Open Graph info:
+ # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
+ m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
+ # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
+ if m_video_type is not None:
+ found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
+ if not found:
+ REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
+ found = re.search(
+ r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
+ r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
+ webpage)
+ if not found:
+ # Look also in Refresh HTTP header
+ refresh_header = head_response.headers.get('Refresh')
+ if refresh_header:
+ # In python 2 response HTTP headers are bytestrings
+ if sys.version_info < (3, 0) and isinstance(refresh_header, str):
+ refresh_header = refresh_header.decode('iso-8859-1')
+ found = re.search(REDIRECT_REGEX, refresh_header)
+ if found:
+ new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
+ if new_url != url:
+ self.report_following_redirect(new_url)
+ return {
+ '_type': 'url',
+ 'url': new_url,
+ }
+ else:
+ found = None
+
+ if not found:
+ # twitter:player is a https URL to iframe player that may or may not
+ # be supported by youtube-dl thus this is checked the very last (see
+ # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
+ embed_url = self._html_search_meta('twitter:player', webpage, default=None)
+ if embed_url and embed_url != url:
+ return self.url_result(embed_url)
+
+ if not found:
+ raise UnsupportedError(url)
+
+ entries = []
+ for video_url in orderedSet(found):
+ video_url = unescapeHTML(video_url)
+ video_url = video_url.replace('\\/', '/')
+ video_url = compat_urlparse.urljoin(url, video_url)
+ video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
+
+ # Sometimes, jwplayer extraction will result in a YouTube URL
+ if YoutubeIE.suitable(video_url):
+ entries.append(self.url_result(video_url, 'Youtube'))
+ continue
+
+ # here's a fun little line of code for you:
+ video_id = os.path.splitext(video_id)[0]
+
+ entry_info_dict = {
+ 'id': video_id,
+ 'uploader': video_uploader,
+ 'title': video_title,
+ 'age_limit': age_limit,
+ }
+
+ if RtmpIE.suitable(video_url):
+ entry_info_dict.update({
+ '_type': 'url_transparent',
+ 'ie_key': RtmpIE.ie_key(),
+ 'url': video_url,
+ })
+ entries.append(entry_info_dict)
+ continue
+
+ ext = determine_ext(video_url)
+ if ext == 'smil':
+ entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
+ elif ext == 'xspf':
+ return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
+ elif ext == 'm3u8':
+ entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
+ elif ext == 'mpd':
+ entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
+ elif ext == 'f4m':
+ entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
+ elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
+ # Just matching .ism/manifest is not enough to be reliably sure
+ # whether it's actually an ISM manifest or some other streaming
+ # manifest since there are various streaming URL formats
+ # possible (see [1]) as well as some other shenanigans like
+ # .smil/manifest URLs that actually serve an ISM (see [2]) and
+ # so on.
+ # Thus the most reasonable way to solve this is to delegate
+ # to generic extractor in order to look into the contents of
+ # the manifest itself.
+ # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
+ # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
+ entry_info_dict = self.url_result(
+ smuggle_url(video_url, {'to_generic': True}),
+ GenericIE.ie_key())
+ else:
+ entry_info_dict['url'] = video_url
+
+ if entry_info_dict.get('formats'):
+ self._sort_formats(entry_info_dict['formats'])
+
+ entries.append(entry_info_dict)
+
+ if len(entries) == 1:
+ return entries[0]
+ else:
+ for num, e in enumerate(entries, start=1):
+ # 'url' results don't have a title
+ if e.get('title') is not None:
+ e['title'] = '%s (%d)' % (e['title'], num)
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ }
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
new file mode 100644
index 0000000..d264fe2
--- /dev/null
+++ b/youtube_dl/extractor/openload.py
@@ -0,0 +1,379 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import os
+import re
+import subprocess
+import tempfile
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urlparse,
+ compat_kwargs,
+)
+from ..utils import (
+ check_executable,
+ determine_ext,
+ encodeArgument,
+ ExtractorError,
+ get_element_by_id,
+ get_exe_version,
+ is_outdated_version,
+ std_headers,
+)
+
+
+def cookie_to_dict(cookie):
+ cookie_dict = {
+ 'name': cookie.name,
+ 'value': cookie.value,
+ }
+ if cookie.port_specified:
+ cookie_dict['port'] = cookie.port
+ if cookie.domain_specified:
+ cookie_dict['domain'] = cookie.domain
+ if cookie.path_specified:
+ cookie_dict['path'] = cookie.path
+ if cookie.expires is not None:
+ cookie_dict['expires'] = cookie.expires
+ if cookie.secure is not None:
+ cookie_dict['secure'] = cookie.secure
+ if cookie.discard is not None:
+ cookie_dict['discard'] = cookie.discard
+ try:
+ if (cookie.has_nonstandard_attr('httpOnly') or
+ cookie.has_nonstandard_attr('httponly') or
+ cookie.has_nonstandard_attr('HttpOnly')):
+ cookie_dict['httponly'] = True
+ except TypeError:
+ pass
+ return cookie_dict
+
+
+def cookie_jar_to_list(cookie_jar):
+ return [cookie_to_dict(cookie) for cookie in cookie_jar]
+
+
+class PhantomJSwrapper(object):
+ """PhantomJS wrapper class
+
+ This class is experimental.
+ """
+
+ _TEMPLATE = r'''
+ phantom.onError = function(msg, trace) {{
+ var msgStack = ['PHANTOM ERROR: ' + msg];
+ if(trace && trace.length) {{
+ msgStack.push('TRACE:');
+ trace.forEach(function(t) {{
+ msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
+ + (t.function ? ' (in function ' + t.function +')' : ''));
+ }});
+ }}
+ console.error(msgStack.join('\n'));
+ phantom.exit(1);
+ }};
+ var page = require('webpage').create();
+ var fs = require('fs');
+ var read = {{ mode: 'r', charset: 'utf-8' }};
+ var write = {{ mode: 'w', charset: 'utf-8' }};
+ JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
+ phantom.addCookie(x);
+ }});
+ page.settings.resourceTimeout = {timeout};
+ page.settings.userAgent = "{ua}";
+ page.onLoadStarted = function() {{
+ page.evaluate(function() {{
+ delete window._phantom;
+ delete window.callPhantom;
+ }});
+ }};
+ var saveAndExit = function() {{
+ fs.write("{html}", page.content, write);
+ fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
+ phantom.exit();
+ }};
+ page.onLoadFinished = function(status) {{
+ if(page.url === "") {{
+ page.setContent(fs.read("{html}", read), "{url}");
+ }}
+ else {{
+ {jscode}
+ }}
+ }};
+ page.open("");
+ '''
+
+ _TMP_FILE_NAMES = ['script', 'html', 'cookies']
+
+ @staticmethod
+ def _version():
+ return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
+
+ def __init__(self, extractor, required_version=None, timeout=10000):
+ self._TMP_FILES = {}
+
+ self.exe = check_executable('phantomjs', ['-v'])
+ if not self.exe:
+ raise ExtractorError('PhantomJS executable not found in PATH, '
+ 'download it from http://phantomjs.org',
+ expected=True)
+
+ self.extractor = extractor
+
+ if required_version:
+ version = self._version()
+ if is_outdated_version(version, required_version):
+ self.extractor._downloader.report_warning(
+ 'Your copy of PhantomJS is outdated, update it to version '
+ '%s or newer if you encounter any errors.' % required_version)
+
+ self.options = {
+ 'timeout': timeout,
+ }
+ for name in self._TMP_FILE_NAMES:
+ tmp = tempfile.NamedTemporaryFile(delete=False)
+ tmp.close()
+ self._TMP_FILES[name] = tmp
+
+ def __del__(self):
+ for name in self._TMP_FILE_NAMES:
+ try:
+ os.remove(self._TMP_FILES[name].name)
+ except (IOError, OSError, KeyError):
+ pass
+
+ def _save_cookies(self, url):
+ cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
+ for cookie in cookies:
+ if 'path' not in cookie:
+ cookie['path'] = '/'
+ if 'domain' not in cookie:
+ cookie['domain'] = compat_urlparse.urlparse(url).netloc
+ with open(self._TMP_FILES['cookies'].name, 'wb') as f:
+ f.write(json.dumps(cookies).encode('utf-8'))
+
+ def _load_cookies(self):
+ with open(self._TMP_FILES['cookies'].name, 'rb') as f:
+ cookies = json.loads(f.read().decode('utf-8'))
+ for cookie in cookies:
+ if cookie['httponly'] is True:
+ cookie['rest'] = {'httpOnly': None}
+ if 'expiry' in cookie:
+ cookie['expire_time'] = cookie['expiry']
+ self.extractor._set_cookie(**compat_kwargs(cookie))
+
+ def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
+ """
+ Downloads webpage (if needed) and executes JS
+
+ Params:
+ url: website url
+ html: optional, html code of website
+ video_id: video id
+ note: optional, displayed when downloading webpage
+ note2: optional, displayed when executing JS
+ headers: custom http headers
+ jscode: code to be executed when page is loaded
+
+ Returns tuple with:
+ * downloaded website (after JS execution)
+ * anything you print with `console.log` (but not inside `page.execute`!)
+
+ In most cases you don't need to add any `jscode`.
+ It is executed in `page.onLoadFinished`.
+ `saveAndExit();` is mandatory, use it instead of `phantom.exit()`
+ It is possible to wait for some element on the webpage, for example:
+ var check = function() {
+ var elementFound = page.evaluate(function() {
+ return document.querySelector('#b.done') !== null;
+ });
+ if(elementFound)
+ saveAndExit();
+ else
+ window.setTimeout(check, 500);
+ }
+
+ page.evaluate(function(){
+ document.querySelector('#a').click();
+ });
+ check();
+ """
+ if 'saveAndExit();' not in jscode:
+ raise ExtractorError('`saveAndExit();` not found in `jscode`')
+ if not html:
+ html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
+ with open(self._TMP_FILES['html'].name, 'wb') as f:
+ f.write(html.encode('utf-8'))
+
+ self._save_cookies(url)
+
+ replaces = self.options
+ replaces['url'] = url
+ user_agent = headers.get('User-Agent') or std_headers['User-Agent']
+ replaces['ua'] = user_agent.replace('"', '\\"')
+ replaces['jscode'] = jscode
+
+ for x in self._TMP_FILE_NAMES:
+ replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
+
+ with open(self._TMP_FILES['script'].name, 'wb') as f:
+ f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
+
+ if video_id is None:
+ self.extractor.to_screen('%s' % (note2,))
+ else:
+ self.extractor.to_screen('%s: %s' % (video_id, note2))
+
+ p = subprocess.Popen([
+ self.exe, '--ssl-protocol=any',
+ self._TMP_FILES['script'].name
+ ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = p.communicate()
+ if p.returncode != 0:
+ raise ExtractorError(
+ 'Executing JS failed\n:' + encodeArgument(err))
+ with open(self._TMP_FILES['html'].name, 'rb') as f:
+ html = f.read().decode('utf-8')
+
+ self._load_cookies()
+
+ return (html, encodeArgument(out))
+
+
+class OpenloadIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
+
+ _TESTS = [{
+ 'url': 'https://openload.co/f/kUEfGclsU9o',
+ 'md5': 'bf1c059b004ebc7a256f89408e65c36e',
+ 'info_dict': {
+ 'id': 'kUEfGclsU9o',
+ 'ext': 'mp4',
+ 'title': 'skyrim_no-audio_1080.mp4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'https://openload.co/embed/rjC09fkPLYs',
+ 'info_dict': {
+ 'id': 'rjC09fkPLYs',
+ 'ext': 'mp4',
+ 'title': 'movie.mp4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'subtitles': {
+ 'en': [{
+ 'ext': 'vtt',
+ }],
+ },
+ },
+ 'params': {
+ 'skip_download': True, # test subtitles only
+ },
+ }, {
+ 'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://openload.io/f/ZAn6oz-VZGE/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://openload.co/f/_-ztPaZtMhM/',
+ 'only_matching': True,
+ }, {
+ # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
+ # for title and ext
+ 'url': 'https://openload.co/embed/Sxz5sADo82g/',
+ 'only_matching': True,
+ }, {
+ # unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available
+ # via https://openload.co/f/e-Ixz9ZR5L0/
+ 'url': 'https://openload.co/f/e-Ixz9ZR5L0/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.openload.link/f/KnG-kKZdcfY',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oload.stream/f/KnG-kKZdcfY',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oload.win/f/kUEfGclsU9o',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oload.download/f/kUEfGclsU9o',
+ 'only_matching': True,
+ }, {
+ # Its title has not got its extension but url has it
+ 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
+ 'only_matching': True,
+ }]
+
+ _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+src=["\']((?:https?://)?(?:openload\.(?:co|io)|oload\.tv)/embed/[a-zA-Z0-9-_]+)',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ url_pattern = 'https://openload.co/%%s/%s/' % video_id
+ headers = {
+ 'User-Agent': self._USER_AGENT,
+ }
+
+ for path in ('embed', 'f'):
+ page_url = url_pattern % path
+ last = path == 'f'
+ webpage = self._download_webpage(
+ page_url, video_id, 'Downloading %s webpage' % path,
+ headers=headers, fatal=last)
+ if not webpage:
+ continue
+ if 'File not found' in webpage or 'deleted by the owner' in webpage:
+ if not last:
+ continue
+ raise ExtractorError('File not found', expected=True, video_id=video_id)
+ break
+
+ phantom = PhantomJSwrapper(self, required_version='2.0')
+ webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers)
+
+ decoded_id = (get_element_by_id('streamurl', webpage) or
+ get_element_by_id('streamuri', webpage) or
+ get_element_by_id('streamurj', webpage) or
+ self._search_regex(
+ (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
+ r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)',
+ r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<',
+ r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<',
+ r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
+ 'stream URL'))
+
+ video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
+
+ title = self._og_search_title(webpage, default=None) or self._search_regex(
+ r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
+ 'title', default=None) or self._html_search_meta(
+ 'description', webpage, 'title', fatal=True)
+
+ entries = self._parse_html5_media_entries(page_url, webpage, video_id)
+ entry = entries[0] if entries else {}
+ subtitles = entry.get('subtitles')
+
+ info_dict = {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
+ 'url': video_url,
+ 'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
+ 'subtitles': subtitles,
+ 'http_headers': headers,
+ }
+ return info_dict
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
new file mode 100644
index 0000000..772f100
--- /dev/null
+++ b/youtube_dl/extractor/youtube.py
@@ -0,0 +1,2914 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+
+import itertools
+import json
+import os.path
+import random
+import re
+import time
+import traceback
+
+from .common import InfoExtractor, SearchInfoExtractor
+from ..jsinterp import JSInterpreter
+from ..swfinterp import SWFInterpreter
+from ..compat import (
+ compat_chr,
+ compat_kwargs,
+ compat_parse_qs,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_unquote_plus,
+ compat_urllib_parse_urlencode,
+ compat_urllib_parse_urlparse,
+ compat_urlparse,
+ compat_str,
+)
+from ..utils import (
+ clean_html,
+ error_to_compat_str,
+ ExtractorError,
+ float_or_none,
+ get_element_by_attribute,
+ get_element_by_id,
+ int_or_none,
+ mimetype2ext,
+ orderedSet,
+ parse_codecs,
+ parse_duration,
+ qualities,
+ remove_quotes,
+ remove_start,
+ smuggle_url,
+ str_to_int,
+ try_get,
+ unescapeHTML,
+ unified_strdate,
+ unsmuggle_url,
+ uppercase_escape,
+ urlencode_postdata,
+)
+
+
+class YoutubeBaseInfoExtractor(InfoExtractor):
+ """Provide base functions for Youtube extractors"""
+ _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
+ _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
+
+ _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
+ _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
+ _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
+
+ _NETRC_MACHINE = 'youtube'
+ # If True it will raise an error if no login info is provided
+ _LOGIN_REQUIRED = False
+
+ _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
+
+ def _set_language(self):
+ self._set_cookie(
+ '.youtube.com', 'PREF', 'f1=50000000&hl=en',
+ # YouTube sets the expire time to about two months
+ expire_time=time.time() + 2 * 30 * 24 * 3600)
+
+ def _ids_to_results(self, ids):
+ return [
+ self.url_result(vid_id, 'Youtube', video_id=vid_id)
+ for vid_id in ids]
+
+ def _login(self):
+ """
+ Attempt to log in to YouTube.
+ True is returned if successful or skipped.
+ False is returned if login failed.
+
+ If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
+ """
+ username, password = self._get_login_info()
+ # No authentication to be performed
+ if username is None:
+ if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
+ raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+ return True
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None,
+ note='Downloading login page',
+ errnote='unable to fetch login page', fatal=False)
+ if login_page is False:
+ return
+
+ login_form = self._hidden_inputs(login_page)
+
+ def req(url, f_req, note, errnote):
+ data = login_form.copy()
+ data.update({
+ 'pstMsg': 1,
+ 'checkConnection': 'youtube',
+ 'checkedDomains': 'youtube',
+ 'hl': 'en',
+ 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
+ 'f.req': json.dumps(f_req),
+ 'flowName': 'GlifWebSignIn',
+ 'flowEntry': 'ServiceLogin',
+ })
+ return self._download_json(
+ url, None, note=note, errnote=errnote,
+ transform_source=lambda s: re.sub(r'^[^[]*', '', s),
+ fatal=False,
+ data=urlencode_postdata(data), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
+ 'Google-Accounts-XSRF': 1,
+ })
+
+ def warn(message):
+ self._downloader.report_warning(message)
+
+ lookup_req = [
+ username,
+ None, [], None, 'US', None, None, 2, False, True,
+ [
+ None, None,
+ [2, 1, None, 1,
+ 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
+ None, [], 4],
+ 1, [None, None, []], None, None, None, True
+ ],
+ username,
+ ]
+
+ lookup_results = req(
+ self._LOOKUP_URL, lookup_req,
+ 'Looking up account info', 'Unable to look up account info')
+
+ if lookup_results is False:
+ return False
+
+ user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
+ if not user_hash:
+ warn('Unable to extract user hash')
+ return False
+
+ challenge_req = [
+ user_hash,
+ None, 1, None, [1, None, None, None, [password, None, True]],
+ [
+ None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
+ 1, [None, None, []], None, None, None, True
+ ]]
+
+ challenge_results = req(
+ self._CHALLENGE_URL, challenge_req,
+ 'Logging in', 'Unable to log in')
+
+ if challenge_results is False:
+ return
+
+ login_res = try_get(challenge_results, lambda x: x[0][5], list)
+ if login_res:
+ login_msg = try_get(login_res, lambda x: x[5], compat_str)
+ warn(
+ 'Unable to login: %s' % 'Invalid password'
+ if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
+ return False
+
+ res = try_get(challenge_results, lambda x: x[0][-1], list)
+ if not res:
+ warn('Unable to extract result entry')
+ return False
+
+ login_challenge = try_get(res, lambda x: x[0][0], list)
+ if login_challenge:
+ challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
+ if challenge_str == 'TWO_STEP_VERIFICATION':
+ # SEND_SUCCESS - TFA code has been successfully sent to phone
+ # QUOTA_EXCEEDED - reached the limit of TFA codes
+ status = try_get(login_challenge, lambda x: x[5], compat_str)
+ if status == 'QUOTA_EXCEEDED':
+ warn('Exceeded the limit of TFA codes, try later')
+ return False
+
+ tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
+ if not tl:
+ warn('Unable to extract TL')
+ return False
+
+ tfa_code = self._get_tfa_info('2-step verification code')
+
+ if not tfa_code:
+ warn(
+ 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
+ '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
+ return False
+
+ tfa_code = remove_start(tfa_code, 'G-')
+
+ tfa_req = [
+ user_hash, None, 2, None,
+ [
+ 9, None, None, None, None, None, None, None,
+ [None, tfa_code, True, 2]
+ ]]
+
+ tfa_results = req(
+ self._TFA_URL.format(tl), tfa_req,
+ 'Submitting TFA code', 'Unable to submit TFA code')
+
+ if tfa_results is False:
+ return False
+
+ tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
+ if tfa_res:
+ tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
+ warn(
+ 'Unable to finish TFA: %s' % 'Invalid TFA code'
+ if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
+ return False
+
+ check_cookie_url = try_get(
+ tfa_results, lambda x: x[0][-1][2], compat_str)
+ else:
+ CHALLENGES = {
+ 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
+ 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
+ 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
+ }
+ challenge = CHALLENGES.get(
+ challenge_str,
+ '%s returned error %s.' % (self.IE_NAME, challenge_str))
+ warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
+ return False
+ else:
+ check_cookie_url = try_get(res, lambda x: x[2], compat_str)
+
+ if not check_cookie_url:
+ warn('Unable to extract CheckCookie URL')
+ return False
+
+ check_cookie_results = self._download_webpage(
+ check_cookie_url, None, 'Checking cookie', fatal=False)
+
+ if check_cookie_results is False:
+ return False
+
+ if 'https://myaccount.google.com/' not in check_cookie_results:
+ warn('Unable to log in')
+ return False
+
+ return True
+
+ def _download_webpage_handle(self, *args, **kwargs):
+ kwargs.setdefault('query', {})['disable_polymer'] = 'true'
+ return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
+ *args, **compat_kwargs(kwargs))
+
+ def _real_initialize(self):
+ if self._downloader is None:
+ return
+ self._set_language()
+ if not self._login():
+ return
+
+
+class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
+ # Extract entries from page with "Load more" button
+ def _entries(self, page, playlist_id):
+ more_widget_html = content_html = page
+ for page_num in itertools.count(1):
+ for entry in self._process_page(content_html):
+ yield entry
+
+ mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
+ if not mobj:
+ break
+
+ more = self._download_json(
+ 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+ 'Downloading page #%s' % page_num,
+ transform_source=uppercase_escape)
+ content_html = more['content_html']
+ if not content_html.strip():
+ # Some webpages show a "Load more" button but they don't
+ # have more videos
+ break
+ more_widget_html = more['load_more_widget_html']
+
+
+class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
+ def _process_page(self, content):
+ for video_id, video_title in self.extract_videos_from_page(content):
+ yield self.url_result(video_id, 'Youtube', video_id, video_title)
+
+ def extract_videos_from_page(self, page):
+ ids_in_page = []
+ titles_in_page = []
+ for mobj in re.finditer(self._VIDEO_RE, page):
+ # The link with index 0 is not the first video of the playlist (not sure if still actual)
+ if 'index' in mobj.groupdict() and mobj.group('id') == '0':
+ continue
+ video_id = mobj.group('id')
+ video_title = unescapeHTML(mobj.group('title'))
+ if video_title:
+ video_title = video_title.strip()
+ try:
+ idx = ids_in_page.index(video_id)
+ if video_title and not titles_in_page[idx]:
+ titles_in_page[idx] = video_title
+ except ValueError:
+ ids_in_page.append(video_id)
+ titles_in_page.append(video_title)
+ return zip(ids_in_page, titles_in_page)
+
+
+class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
+ def _process_page(self, content):
+ for playlist_id in orderedSet(re.findall(
+ r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
+ content)):
+ yield self.url_result(
+ 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+ title = self._og_search_title(webpage, fatal=False)
+ return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
+
+
+class YoutubeIE(YoutubeBaseInfoExtractor):
+ IE_DESC = 'YouTube.com'
+ _VALID_URL = r"""(?x)^
+ (
+ (?:https?://|//) # http(s):// or protocol-independent URL
+ (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
+ (?:www\.)?deturl\.com/www\.youtube\.com/|
+ (?:www\.)?pwnyoutube\.com/|
+ (?:www\.)?hooktube\.com/|
+ (?:www\.)?yourepeat\.com/|
+ tube\.majestyc\.net/|
+ youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
+ (?:.*?\#/)? # handle anchor (#/) redirect urls
+ (?: # the various things that can precede the ID:
+ (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
+ |(?: # or the v= param in all its forms
+ (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+ (?:\?|\#!?) # the params delimiter ? or # or #!
+ (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
+ v=
+ )
+ ))
+ |(?:
+ youtu\.be| # just youtu.be/xxxx
+ vid\.plus| # or vid.plus/xxxx
+ zwearz\.com/watch| # or zwearz.com/watch/xxxx
+ )/
+ |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
+ )
+ )? # all until now is optional -> you can pass the naked ID
+ ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
+ (?!.*?\blist=
+ (?:
+ %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
+ WL # WL are handled by the watch later IE
+ )
+ )
+ (?(1).+)? # if we found the ID, everything can follow
+ $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
+ _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
+ _formats = {
+ '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
+ '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
+ '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
+ '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
+ '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
+ '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
+ '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
+ '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
+ '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
+ '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
+ '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
+ '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+
+
+ # 3D videos
+ '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
+ '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
+ '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
+ '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
+ '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
+ '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
+ '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
+
+ # Apple HTTP Live Streaming
+ '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
+ '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
+ '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
+ '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
+ '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
+
+ # DASH mp4 video
+ '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
+ '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+ '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+ '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
+
+ # Dash mp4 audio
+ '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
+ '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
+ '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
+ '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+ '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+ '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
+ '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
+
+ # Dash webm
+ '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
+ '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
+ '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+
+ # Dash webm audio
+ '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
+ '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
+
+ # Dash webm audio with opus inside
+ '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
+ '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
+ '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
+
+ # RTMP (unnamed)
+ '_rtmp': {'protocol': 'rtmp'},
+ }
+ _SUBTITLE_FORMATS = ('ttml', 'vtt')
+
+ _GEO_BYPASS = False
+
+ IE_NAME = 'youtube'
+ _TESTS = [
+ {
+ 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
+ 'info_dict': {
+ 'id': 'BaW_jenozKc',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
+ 'uploader': 'Philipp Hagemeister',
+ 'uploader_id': 'phihag',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+ 'upload_date': '20121002',
+ 'license': 'Standard YouTube License',
+ 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
+ 'categories': ['Science & Technology'],
+ 'tags': ['youtube-dl'],
+ 'duration': 10,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'start_time': 1,
+ 'end_time': 9,
+ }
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
+ 'note': 'Test generic use_cipher_signature video (#897)',
+ 'info_dict': {
+ 'id': 'UxxajLWwzqY',
+ 'ext': 'mp4',
+ 'upload_date': '20120506',
+ 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
+ 'alt_title': 'I Love It (feat. Charli XCX)',
+ 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
+ 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
+ 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
+ 'iconic ep', 'iconic', 'love', 'it'],
+ 'duration': 180,
+ 'uploader': 'Icona Pop',
+ 'uploader_id': 'IconaPop',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
+ 'license': 'Standard YouTube License',
+ 'creator': 'Icona Pop',
+ 'track': 'I Love It (feat. Charli XCX)',
+ 'artist': 'Icona Pop',
+ }
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
+ 'note': 'Test VEVO video with age protection (#956)',
+ 'info_dict': {
+ 'id': '07FYdnEawAQ',
+ 'ext': 'mp4',
+ 'upload_date': '20130703',
+ 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
+ 'alt_title': 'Tunnel Vision',
+ 'description': 'md5:64249768eec3bc4276236606ea996373',
+ 'duration': 419,
+ 'uploader': 'justintimberlakeVEVO',
+ 'uploader_id': 'justintimberlakeVEVO',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
+ 'license': 'Standard YouTube License',
+ 'creator': 'Justin Timberlake',
+ 'track': 'Tunnel Vision',
+ 'artist': 'Justin Timberlake',
+ 'age_limit': 18,
+ }
+ },
+ {
+ 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
+ 'note': 'Embed-only video (#1746)',
+ 'info_dict': {
+ 'id': 'yZIXLfi8CZQ',
+ 'ext': 'mp4',
+ 'upload_date': '20120608',
+ 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
+ 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
+ 'uploader': 'SET India',
+ 'uploader_id': 'setindia',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
+ 'license': 'Standard YouTube License',
+ 'age_limit': 18,
+ }
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
+ 'note': 'Use the first video ID in the URL',
+ 'info_dict': {
+ 'id': 'BaW_jenozKc',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
+ 'uploader': 'Philipp Hagemeister',
+ 'uploader_id': 'phihag',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+ 'upload_date': '20121002',
+ 'license': 'Standard YouTube License',
+ 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
+ 'categories': ['Science & Technology'],
+ 'tags': ['youtube-dl'],
+ 'duration': 10,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
+ 'note': '256k DASH audio (format 141) via DASH manifest',
+ 'info_dict': {
+ 'id': 'a9LDPn-MO4I',
+ 'ext': 'm4a',
+ 'upload_date': '20121002',
+ 'uploader_id': '8KVIDEO',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
+ 'description': '',
+ 'uploader': '8KVIDEO',
+ 'license': 'Standard YouTube License',
+ 'title': 'UHDTV TEST 8K VIDEO.mp4'
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '141',
+ },
+ 'skip': 'format 141 not served anymore',
+ },
+ # DASH manifest with encrypted signature
+ {
+ 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
+ 'info_dict': {
+ 'id': 'IB3lcPjvWLA',
+ 'ext': 'm4a',
+ 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
+ 'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
+ 'duration': 244,
+ 'uploader': 'AfrojackVEVO',
+ 'uploader_id': 'AfrojackVEVO',
+ 'upload_date': '20131011',
+ 'license': 'Standard YouTube License',
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '141/bestaudio[ext=m4a]',
+ },
+ },
+ # JS player signature function name containing $
+ {
+ 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
+ 'info_dict': {
+ 'id': 'nfWlot6h_JM',
+ 'ext': 'm4a',
+ 'title': 'Taylor Swift - Shake It Off',
+ 'alt_title': 'Shake It Off',
+ 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
+ 'duration': 242,
+ 'uploader': 'TaylorSwiftVEVO',
+ 'uploader_id': 'TaylorSwiftVEVO',
+ 'upload_date': '20140818',
+ 'license': 'Standard YouTube License',
+ 'creator': 'Taylor Swift',
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '141/bestaudio[ext=m4a]',
+ },
+ },
+ # Controversy video
+ {
+ 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
+ 'info_dict': {
+ 'id': 'T4XJQO3qol8',
+ 'ext': 'mp4',
+ 'duration': 219,
+ 'upload_date': '20100909',
+ 'uploader': 'TJ Kirk',
+ 'uploader_id': 'TheAmazingAtheist',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
+ 'license': 'Standard YouTube License',
+ 'title': 'Burning Everyone\'s Koran',
+ 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
+ }
+ },
+ # Normal age-gate video (No vevo, embed allowed)
+ {
+ 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
+ 'info_dict': {
+ 'id': 'HtVdAasjOgU',
+ 'ext': 'mp4',
+ 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
+ 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
+ 'duration': 142,
+ 'uploader': 'The Witcher',
+ 'uploader_id': 'WitcherGame',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
+ 'upload_date': '20140605',
+ 'license': 'Standard YouTube License',
+ 'age_limit': 18,
+ },
+ },
+ # Age-gate video with encrypted signature
+ {
+ 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
+ 'info_dict': {
+ 'id': '6kLq3WMV1nU',
+ 'ext': 'webm',
+ 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
+ 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
+ 'duration': 246,
+ 'uploader': 'LloydVEVO',
+ 'uploader_id': 'LloydVEVO',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
+ 'upload_date': '20110629',
+ 'license': 'Standard YouTube License',
+ 'age_limit': 18,
+ },
+ },
+ # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
+ # YouTube Red ad is not captured for creator
+ {
+ 'url': '__2ABJjxzNo',
+ 'info_dict': {
+ 'id': '__2ABJjxzNo',
+ 'ext': 'mp4',
+ 'duration': 266,
+ 'upload_date': '20100430',
+ 'uploader_id': 'deadmau5',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
+ 'creator': 'deadmau5',
+ 'description': 'md5:12c56784b8032162bb936a5f76d55360',
+ 'uploader': 'deadmau5',
+ 'license': 'Standard YouTube License',
+ 'title': 'Deadmau5 - Some Chords (HD)',
+ 'alt_title': 'Some Chords',
+ },
+ 'expected_warnings': [
+ 'DASH manifest missing',
+ ]
+ },
+ # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
+ {
+ 'url': 'lqQg6PlCWgI',
+ 'info_dict': {
+ 'id': 'lqQg6PlCWgI',
+ 'ext': 'mp4',
+ 'duration': 6085,
+ 'upload_date': '20150827',
+ 'uploader_id': 'olympic',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
+ 'license': 'Standard YouTube License',
+ 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
+ 'uploader': 'Olympic',
+ 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
+ },
+ 'params': {
+ 'skip_download': 'requires avconv',
+ }
+ },
+ # Non-square pixels
+ {
+ 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
+ 'info_dict': {
+ 'id': '_b-2C3KPAM0',
+ 'ext': 'mp4',
+ 'stretched_ratio': 16 / 9.,
+ 'duration': 85,
+ 'upload_date': '20110310',
+ 'uploader_id': 'AllenMeow',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
+ 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
+ 'uploader': '孫ᄋᄅ',
+ 'license': 'Standard YouTube License',
+ 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
+ },
+ },
+ # url_encoded_fmt_stream_map is empty string
+ {
+ 'url': 'qEJwOuvDf7I',
+ 'info_dict': {
+ 'id': 'qEJwOuvDf7I',
+ 'ext': 'webm',
+ 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
+ 'description': '',
+ 'upload_date': '20150404',
+ 'uploader_id': 'spbelect',
+ 'uploader': 'Наблюдатели Петербурга',
+ },
+ 'params': {
+ 'skip_download': 'requires avconv',
+ },
+ 'skip': 'This live event has ended.',
+ },
+ # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
+ {
+ 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
+ 'info_dict': {
+ 'id': 'FIl7x6_3R5Y',
+ 'ext': 'webm',
+ 'title': 'md5:7b81415841e02ecd4313668cde88737a',
+ 'description': 'md5:116377fd2963b81ec4ce64b542173306',
+ 'duration': 220,
+ 'upload_date': '20150625',
+ 'uploader_id': 'dorappi2000',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
+ 'uploader': 'dorappi2000',
+ 'license': 'Standard YouTube License',
+ 'formats': 'mincount:31',
+ },
+ 'skip': 'not actual anymore',
+ },
+ # DASH manifest with segment_list
+ {
+ 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
+ 'md5': '8ce563a1d667b599d21064e982ab9e31',
+ 'info_dict': {
+ 'id': 'CsmdDsKjzN8',
+ 'ext': 'mp4',
+ 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
+ 'uploader': 'Airtek',
+ 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
+ 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
+ 'license': 'Standard YouTube License',
+ 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '135', # bestvideo
+ },
+ 'skip': 'This live event has ended.',
+ },
+ {
+ # Multifeed videos (multiple cameras), URL is for Main Camera
+ 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
+ 'info_dict': {
+ 'id': 'jqWvoWXjCVs',
+ 'title': 'teamPGP: Rocket League Noob Stream',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'jqWvoWXjCVs',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7335,
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'license': 'Standard YouTube License',
+ },
+ }, {
+ 'info_dict': {
+ 'id': '6h8e8xoXJzg',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7337,
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'license': 'Standard YouTube License',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'PUOgX5z9xZw',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7337,
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'license': 'Standard YouTube License',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'teuwxikvS5k',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (zim)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7334,
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'license': 'Standard YouTube License',
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
+ 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
+ 'info_dict': {
+ 'id': 'gVfLd0zydlo',
+ 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
+ },
+ 'playlist_count': 2,
+ 'skip': 'Not multifeed anymore',
+ },
+ {
+ 'url': 'https://vid.plus/FlRa-iH7PGw',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
+ 'only_matching': True,
+ },
+ {
+ # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
+ # Also tests cut-off URL expansion in video description (see
+ # https://github.com/rg3/youtube-dl/issues/1892,
+ # https://github.com/rg3/youtube-dl/issues/8164)
+ 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
+ 'info_dict': {
+ 'id': 'lsguqyKfVQg',
+ 'ext': 'mp4',
+ 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
+ 'alt_title': 'Dark Walk - Position Music',
+ 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
+ 'duration': 133,
+ 'upload_date': '20151119',
+ 'uploader_id': 'IronSoulElf',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
+ 'uploader': 'IronSoulElf',
+ 'license': 'Standard YouTube License',
+ 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
+ 'track': 'Dark Walk - Position Music',
+ 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
+ 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
+ 'only_matching': True,
+ },
+ {
+ # Video with yt:stretch=17:0
+ 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
+ 'info_dict': {
+ 'id': 'Q39EVAstoRM',
+ 'ext': 'mp4',
+ 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
+ 'description': 'md5:ee18a25c350637c8faff806845bddee9',
+ 'upload_date': '20151107',
+ 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
+ 'uploader': 'CH GAMER DROID',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video does not exist.',
+ },
+ {
+ # Video licensed under Creative Commons
+ 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
+ 'info_dict': {
+ 'id': 'M4gD1WSo5mA',
+ 'ext': 'mp4',
+ 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
+ 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
+ 'duration': 721,
+ 'upload_date': '20150127',
+ 'uploader_id': 'BerkmanCenter',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
+ 'uploader': 'The Berkman Klein Center for Internet & Society',
+ 'license': 'Creative Commons Attribution license (reuse allowed)',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Channel-like uploader_url
+ 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
+ 'info_dict': {
+ 'id': 'eQcmzGIKrzg',
+ 'ext': 'mp4',
+ 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
+ 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
+ 'duration': 4060,
+ 'upload_date': '20151119',
+ 'uploader': 'Bernie Sanders',
+ 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
+ 'license': 'Creative Commons Attribution license (reuse allowed)',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
+ 'only_matching': True,
+ },
+ {
+ # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
+ 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
+ 'only_matching': True,
+ },
+ {
+ # Rental video preview
+ 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
+ 'info_dict': {
+ 'id': 'uGpuVWrhIzE',
+ 'ext': 'mp4',
+ 'title': 'Piku - Trailer',
+ 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
+ 'upload_date': '20150811',
+ 'uploader': 'FlixMatrix',
+ 'uploader_id': 'FlixMatrixKaravan',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
+ 'license': 'Standard YouTube License',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video is not available.',
+ },
+ {
+ # YouTube Red video with episode data
+ 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
+ 'info_dict': {
+ 'id': 'iqKdEhx-dD4',
+ 'ext': 'mp4',
+ 'title': 'Isolation - Mind Field (Ep 1)',
+ 'description': 'md5:25b78d2f64ae81719f5c96319889b736',
+ 'duration': 2085,
+ 'upload_date': '20170118',
+ 'uploader': 'Vsauce',
+ 'uploader_id': 'Vsauce',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
+ 'license': 'Standard YouTube License',
+ 'series': 'Mind Field',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': [
+ 'Skipping DASH manifest',
+ ],
+ },
+ {
+ # The following content has been identified by the YouTube community
+ # as inappropriate or offensive to some audiences.
+ 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
+ 'info_dict': {
+ 'id': '6SJNVb0GnPI',
+ 'ext': 'mp4',
+ 'title': 'Race Differences in Intelligence',
+ 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
+ 'duration': 965,
+ 'upload_date': '20140124',
+ 'uploader': 'New Century Foundation',
+ 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
+ 'license': 'Standard YouTube License',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # itag 212
+ 'url': '1t24XAntNCY',
+ 'only_matching': True,
+ },
+ {
+ # geo restricted to JP
+ 'url': 'sJL6WA-aGkQ',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
+ 'only_matching': True,
+ },
+ ]
+
+ def __init__(self, *args, **kwargs):
+ super(YoutubeIE, self).__init__(*args, **kwargs)
+ self._player_cache = {}
+
+ def report_video_info_webpage_download(self, video_id):
+ """Report attempt to download video info webpage."""
+ self.to_screen('%s: Downloading video info webpage' % video_id)
+
+ def report_information_extraction(self, video_id):
+ """Report attempt to extract video information."""
+ self.to_screen('%s: Extracting video information' % video_id)
+
+ def report_unavailable_format(self, video_id, format):
+ """Report extracted video URL."""
+ self.to_screen('%s: Format %s not available' % (video_id, format))
+
+ def report_rtmp_download(self):
+ """Indicate the download will use the RTMP protocol."""
+ self.to_screen('RTMP download detected')
+
+ def _signature_cache_id(self, example_sig):
+ """ Return a string representation of a signature """
+ return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
+
+ def _extract_signature_function(self, video_id, player_url, example_sig):
+ id_m = re.match(
+ r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
+ player_url)
+ if not id_m:
+ raise ExtractorError('Cannot identify player %r' % player_url)
+ player_type = id_m.group('ext')
+ player_id = id_m.group('id')
+
+ # Read from filesystem cache
+ func_id = '%s_%s_%s' % (
+ player_type, player_id, self._signature_cache_id(example_sig))
+ assert os.path.basename(func_id) == func_id
+
+ cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
+ if cache_spec is not None:
+ return lambda s: ''.join(s[i] for i in cache_spec)
+
+ download_note = (
+ 'Downloading player %s' % player_url
+ if self._downloader.params.get('verbose') else
+ 'Downloading %s player %s' % (player_type, player_id)
+ )
+ if player_type == 'js':
+ code = self._download_webpage(
+ player_url, video_id,
+ note=download_note,
+ errnote='Download of %s failed' % player_url)
+ res = self._parse_sig_js(code)
+ elif player_type == 'swf':
+ urlh = self._request_webpage(
+ player_url, video_id,
+ note=download_note,
+ errnote='Download of %s failed' % player_url)
+ code = urlh.read()
+ res = self._parse_sig_swf(code)
+ else:
+ assert False, 'Invalid player type %r' % player_type
+
+ test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ cache_res = res(test_string)
+ cache_spec = [ord(c) for c in cache_res]
+
+ self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
+ return res
+
+ def _print_sig_code(self, func, example_sig):
+ def gen_sig_code(idxs):
+ def _genslice(start, end, step):
+ starts = '' if start == 0 else str(start)
+ ends = (':%d' % (end + step)) if end + step >= 0 else ':'
+ steps = '' if step == 1 else (':%d' % step)
+ return 's[%s%s%s]' % (starts, ends, steps)
+
+ step = None
+ # Quelch pyflakes warnings - start will be set when step is set
+ start = '(Never used)'
+ for i, prev in zip(idxs[1:], idxs[:-1]):
+ if step is not None:
+ if i - prev == step:
+ continue
+ yield _genslice(start, prev, step)
+ step = None
+ continue
+ if i - prev in [-1, 1]:
+ step = i - prev
+ start = prev
+ continue
+ else:
+ yield 's[%d]' % prev
+ if step is None:
+ yield 's[%d]' % i
+ else:
+ yield _genslice(start, i, step)
+
+ test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ cache_res = func(test_string)
+ cache_spec = [ord(c) for c in cache_res]
+ expr_code = ' + '.join(gen_sig_code(cache_spec))
+ signature_id_tuple = '(%s)' % (
+ ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
+ code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
+ ' return %s\n') % (signature_id_tuple, expr_code)
+ self.to_screen('Extracted signature function:\n' + code)
+
+ def _parse_sig_js(self, jscode):
+ funcname = self._search_regex(
+ (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
+ jscode, 'Initial JS player signature function name', group='sig')
+
+ jsi = JSInterpreter(jscode)
+ initial_function = jsi.extract_function(funcname)
+ return lambda s: initial_function([s])
+
+ def _parse_sig_swf(self, file_contents):
+ swfi = SWFInterpreter(file_contents)
+ TARGET_CLASSNAME = 'SignatureDecipher'
+ searched_class = swfi.extract_class(TARGET_CLASSNAME)
+ initial_function = swfi.extract_function(searched_class, 'decipher')
+ return lambda s: initial_function([s])
+
+ def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
+ """Turn the encrypted s field into a working signature"""
+
+ if player_url is None:
+ raise ExtractorError('Cannot decrypt signature without player_url')
+
+ if player_url.startswith('//'):
+ player_url = 'https:' + player_url
+ elif not re.match(r'https?://', player_url):
+ player_url = compat_urlparse.urljoin(
+ 'https://www.youtube.com', player_url)
+ try:
+ player_id = (player_url, self._signature_cache_id(s))
+ if player_id not in self._player_cache:
+ func = self._extract_signature_function(
+ video_id, player_url, s
+ )
+ self._player_cache[player_id] = func
+ func = self._player_cache[player_id]
+ if self._downloader.params.get('youtube_print_sig_code'):
+ self._print_sig_code(func, s)
+ return func(s)
+ except Exception as e:
+ tb = traceback.format_exc()
+ raise ExtractorError(
+ 'Signature extraction failed: ' + tb, cause=e)
+
+ def _get_subtitles(self, video_id, webpage):
+ try:
+ subs_doc = self._download_xml(
+ 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
+ video_id, note=False)
+ except ExtractorError as err:
+ self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
+ return {}
+
+ sub_lang_list = {}
+ for track in subs_doc.findall('track'):
+ lang = track.attrib['lang_code']
+ if lang in sub_lang_list:
+ continue
+ sub_formats = []
+ for ext in self._SUBTITLE_FORMATS:
+ params = compat_urllib_parse_urlencode({
+ 'lang': lang,
+ 'v': video_id,
+ 'fmt': ext,
+ 'name': track.attrib['name'].encode('utf-8'),
+ })
+ sub_formats.append({
+ 'url': 'https://www.youtube.com/api/timedtext?' + params,
+ 'ext': ext,
+ })
+ sub_lang_list[lang] = sub_formats
+ if not sub_lang_list:
+ self._downloader.report_warning('video doesn\'t have subtitles')
+ return {}
+ return sub_lang_list
+
+ def _get_ytplayer_config(self, video_id, webpage):
+ patterns = (
+ # User data may contain arbitrary character sequences that may affect
+ # JSON extraction with regex, e.g. when '};' is contained the second
+ # regex won't capture the whole JSON. Yet working around by trying more
+ # concrete regex first keeping in mind proper quoted string handling
+ # to be implemented in future that will replace this workaround (see
+ # https://github.com/rg3/youtube-dl/issues/7468,
+ # https://github.com/rg3/youtube-dl/pull/7599)
+ r';ytplayer\.config\s*=\s*({.+?});ytplayer',
+ r';ytplayer\.config\s*=\s*({.+?});',
+ )
+ config = self._search_regex(
+ patterns, webpage, 'ytplayer.config', default=None)
+ if config:
+ return self._parse_json(
+ uppercase_escape(config), video_id, fatal=False)
+
+ def _get_automatic_captions(self, video_id, webpage):
+ """We need the webpage for getting the captions url, pass it as an
+ argument to speed up the process."""
+ self.to_screen('%s: Looking for automatic captions' % video_id)
+ player_config = self._get_ytplayer_config(video_id, webpage)
+ err_msg = 'Couldn\'t find automatic captions for %s' % video_id
+ if not player_config:
+ self._downloader.report_warning(err_msg)
+ return {}
+ try:
+ args = player_config['args']
+ caption_url = args.get('ttsurl')
+ if caption_url:
+ timestamp = args['timestamp']
+ # We get the available subtitles
+ list_params = compat_urllib_parse_urlencode({
+ 'type': 'list',
+ 'tlangs': 1,
+ 'asrs': 1,
+ })
+ list_url = caption_url + '&' + list_params
+ caption_list = self._download_xml(list_url, video_id)
+ original_lang_node = caption_list.find('track')
+ if original_lang_node is None:
+ self._downloader.report_warning('Video doesn\'t have automatic captions')
+ return {}
+ original_lang = original_lang_node.attrib['lang_code']
+ caption_kind = original_lang_node.attrib.get('kind', '')
+
+ sub_lang_list = {}
+ for lang_node in caption_list.findall('target'):
+ sub_lang = lang_node.attrib['lang_code']
+ sub_formats = []
+ for ext in self._SUBTITLE_FORMATS:
+ params = compat_urllib_parse_urlencode({
+ 'lang': original_lang,
+ 'tlang': sub_lang,
+ 'fmt': ext,
+ 'ts': timestamp,
+ 'kind': caption_kind,
+ })
+ sub_formats.append({
+ 'url': caption_url + '&' + params,
+ 'ext': ext,
+ })
+ sub_lang_list[sub_lang] = sub_formats
+ return sub_lang_list
+
+ def make_captions(sub_url, sub_langs):
+ parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
+ caption_qs = compat_parse_qs(parsed_sub_url.query)
+ captions = {}
+ for sub_lang in sub_langs:
+ sub_formats = []
+ for ext in self._SUBTITLE_FORMATS:
+ caption_qs.update({
+ 'tlang': [sub_lang],
+ 'fmt': [ext],
+ })
+ sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
+ query=compat_urllib_parse_urlencode(caption_qs, True)))
+ sub_formats.append({
+ 'url': sub_url,
+ 'ext': ext,
+ })
+ captions[sub_lang] = sub_formats
+ return captions
+
+ # New captions format as of 22.06.2017
+ player_response = args.get('player_response')
+ if player_response and isinstance(player_response, compat_str):
+ player_response = self._parse_json(
+ player_response, video_id, fatal=False)
+ if player_response:
+ renderer = player_response['captions']['playerCaptionsTracklistRenderer']
+ base_url = renderer['captionTracks'][0]['baseUrl']
+ sub_lang_list = []
+ for lang in renderer['translationLanguages']:
+ lang_code = lang.get('languageCode')
+ if lang_code:
+ sub_lang_list.append(lang_code)
+ return make_captions(base_url, sub_lang_list)
+
+ # Some videos don't provide ttsurl but rather caption_tracks and
+ # caption_translation_languages (e.g. 20LmZk1hakA)
+ # Does not used anymore as of 22.06.2017
+ caption_tracks = args['caption_tracks']
+ caption_translation_languages = args['caption_translation_languages']
+ caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
+ sub_lang_list = []
+ for lang in caption_translation_languages.split(','):
+ lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
+ sub_lang = lang_qs.get('lc', [None])[0]
+ if sub_lang:
+ sub_lang_list.append(sub_lang)
+ return make_captions(caption_url, sub_lang_list)
+ # An extractor error can be raise by the download process if there are
+ # no automatic captions but there are subtitles
+ except (KeyError, IndexError, ExtractorError):
+ self._downloader.report_warning(err_msg)
+ return {}
+
+ def _mark_watched(self, video_id, video_info):
+ playback_url = video_info.get('videostats_playback_base_url', [None])[0]
+ if not playback_url:
+ return
+ parsed_playback_url = compat_urlparse.urlparse(playback_url)
+ qs = compat_urlparse.parse_qs(parsed_playback_url.query)
+
+ # cpn generation algorithm is reverse engineered from base.js.
+ # In fact it works even with dummy cpn.
+ CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
+ cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
+
+ qs.update({
+ 'ver': ['2'],
+ 'cpn': [cpn],
+ })
+ playback_url = compat_urlparse.urlunparse(
+ parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
+ self._download_webpage(
+ playback_url, video_id, 'Marking watched',
+ 'Unable to mark watched', fatal=False)
+
+ @staticmethod
+ def _extract_urls(webpage):
+ # Embedded YouTube player
+ entries = [
+ unescapeHTML(mobj.group('url'))
+ for mobj in re.finditer(r'''(?x)
+ (?:
+ <iframe[^>]+?src=|
+ data-video-url=|
+ <embed[^>]+?src=|
+ embedSWF\(?:\s*|
+ <object[^>]+data=|
+ new\s+SWFObject\(
+ )
+ (["\'])
+ (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
+ (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
+ \1''', webpage)]
+
+ # lazyYT YouTube embed
+ entries.extend(list(map(
+ unescapeHTML,
+ re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
+
+ # Wordpress "YouTube Video Importer" plugin
+ matches = re.findall(r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
+ data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
+ entries.extend(m[-1] for m in matches)
+
+ return entries
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = YoutubeIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ @classmethod
+ def extract_id(cls, url):
+ mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
+ if mobj is None:
+ raise ExtractorError('Invalid URL: %s' % url)
+ video_id = mobj.group(2)
+ return video_id
+
+ def _extract_annotations(self, video_id):
+ url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
+ return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
+
+ @staticmethod
+ def _extract_chapters(description, duration):
+ if not description:
+ return None
+ chapter_lines = re.findall(
+ r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
+ description)
+ if not chapter_lines:
+ return None
+ chapters = []
+ for next_num, (chapter_line, time_point) in enumerate(
+ chapter_lines, start=1):
+ start_time = parse_duration(time_point)
+ if start_time is None:
+ continue
+ if start_time > duration:
+ break
+ end_time = (duration if next_num == len(chapter_lines)
+ else parse_duration(chapter_lines[next_num][1]))
+ if end_time is None:
+ continue
+ if end_time > duration:
+ end_time = duration
+ if start_time > end_time:
+ break
+ chapter_title = re.sub(
+ r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
+ chapter_title = re.sub(r'\s+', ' ', chapter_title)
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ 'title': chapter_title,
+ })
+ return chapters
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ proto = (
+ 'http' if self._downloader.params.get('prefer_insecure', False)
+ else 'https')
+
+ start_time = None
+ end_time = None
+ parsed_url = compat_urllib_parse_urlparse(url)
+ for component in [parsed_url.fragment, parsed_url.query]:
+ query = compat_parse_qs(component)
+ if start_time is None and 't' in query:
+ start_time = parse_duration(query['t'][0])
+ if start_time is None and 'start' in query:
+ start_time = parse_duration(query['start'][0])
+ if end_time is None and 'end' in query:
+ end_time = parse_duration(query['end'][0])
+
+ # Extract original video URL from URL with redirection, like age verification, using next_url parameter
+ mobj = re.search(self._NEXT_URL_RE, url)
+ if mobj:
+ url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
+ video_id = self.extract_id(url)
+
+ # Get video webpage
+ url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
+ video_webpage = self._download_webpage(url, video_id)
+
+ # Attempt to extract SWF player URL
+ mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
+ if mobj is not None:
+ player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
+ else:
+ player_url = None
+
+ dash_mpds = []
+
+ def add_dash_mpd(video_info):
+ dash_mpd = video_info.get('dashmpd')
+ if dash_mpd and dash_mpd[0] not in dash_mpds:
+ dash_mpds.append(dash_mpd[0])
+
+ is_live = None
+ view_count = None
+
+ def extract_view_count(v_info):
+ return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
+
+ related_vid_info = self._search_regex(r"""'RELATED_PLAYER_ARGS':\s*(\{.*?\})""", video_webpage, "related_player_args", default='')
+
+ if related_vid_info == '':
+ related_vids = []
+ else:
+ related_vid_info = json.loads(related_vid_info)['rvs']
+ if related_vid_info == '':
+ related_vids = []
+ else:
+ related_vids = (compat_parse_qs(related_item) for related_item in related_vid_info.split(","))
+ related_vids = [{key : value[0] for key,value in vid.items()} for vid in related_vids]
+
+ # Get video info
+ embed_webpage = None
+ if re.search(r'player-age-gate-content">', video_webpage) is not None:
+ age_gate = True
+ # We simulate the access to the video from www.youtube.com/v/{video_id}
+ # this can be viewed without login into Youtube
+ url = proto + '://www.youtube.com/embed/%s' % video_id
+ embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
+ data = compat_urllib_parse_urlencode({
+ 'video_id': video_id,
+ 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
+ 'sts': self._search_regex(
+ r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
+ })
+ video_info_url = proto + '://www.youtube.com/get_video_info?' + data
+ video_info_webpage = self._download_webpage(
+ video_info_url, video_id,
+ note='Refetching age-gated info webpage',
+ errnote='unable to download video info webpage')
+ video_info = compat_parse_qs(video_info_webpage)
+ add_dash_mpd(video_info)
+ else:
+ age_gate = False
+ video_info = None
+ sts = None
+ # Try looking directly into the video webpage
+ ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
+ if ytplayer_config:
+ args = ytplayer_config['args']
+ if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
+ # Convert to the same format returned by compat_parse_qs
+ video_info = dict((k, [v]) for k, v in args.items())
+ add_dash_mpd(video_info)
+ # Rental video is not rented but preview is available (e.g.
+ # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
+ # https://github.com/rg3/youtube-dl/issues/10532)
+ if not video_info and args.get('ypc_vid'):
+ return self.url_result(
+ args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
+ if args.get('livestream') == '1' or args.get('live_playback') == 1:
+ is_live = True
+ sts = ytplayer_config.get('sts')
+ if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
+ # We also try looking in get_video_info since it may contain different dashmpd
+ # URL that points to a DASH manifest with possibly different itag set (some itags
+ # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
+ # manifest pointed by get_video_info's dashmpd).
+ # The general idea is to take a union of itags of both DASH manifests (for example
+ # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
+ self.report_video_info_webpage_download(video_id)
+ for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
+ query = {
+ 'video_id': video_id,
+ 'ps': 'default',
+ 'eurl': '',
+ 'gl': 'US',
+ 'hl': 'en',
+ }
+ if el:
+ query['el'] = el
+ if sts:
+ query['sts'] = sts
+ video_info_webpage = self._download_webpage(
+ '%s://www.youtube.com/get_video_info' % proto,
+ video_id, note=False,
+ errnote='unable to download video info webpage',
+ fatal=False, query=query)
+ if not video_info_webpage:
+ continue
+ get_video_info = compat_parse_qs(video_info_webpage)
+ add_dash_mpd(get_video_info)
+ if view_count is None:
+ view_count = extract_view_count(get_video_info)
+ if not video_info:
+ video_info = get_video_info
+ if 'token' in get_video_info:
+ # Different get_video_info requests may report different results, e.g.
+ # some may report video unavailability, but some may serve it without
+ # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
+ # the original webpage as well as el=info and el=embedded get_video_info
+ # requests report video unavailability due to geo restriction while
+ # el=detailpage succeeds and returns valid data). This is probably
+ # due to YouTube measures against IP ranges of hosting providers.
+ # Working around by preferring the first succeeded video_info containing
+ # the token if no such video_info yet was found.
+ if 'token' not in video_info:
+ video_info = get_video_info
+ break
+
+ def extract_unavailable_message():
+ return self._html_search_regex(
+ r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
+ video_webpage, 'unavailable message', default=None)
+
+ if 'token' not in video_info:
+ if 'reason' in video_info:
+ if 'The uploader has not made this video available in your country.' in video_info['reason']:
+ regions_allowed = self._html_search_meta(
+ 'regionsAllowed', video_webpage, default=None)
+ countries = regions_allowed.split(',') if regions_allowed else None
+ self.raise_geo_restricted(
+ msg=video_info['reason'][0], countries=countries)
+ reason = video_info['reason'][0]
+ if 'Invalid parameters' in reason:
+ unavailable_message = extract_unavailable_message()
+ if unavailable_message:
+ reason = unavailable_message
+ raise ExtractorError(
+ 'YouTube said: %s' % reason,
+ expected=True, video_id=video_id)
+ else:
+ raise ExtractorError(
+ '"token" parameter not in video info for unknown reason',
+ video_id=video_id)
+
+ # title
+ if 'title' in video_info:
+ video_title = video_info['title'][0]
+ else:
+ self._downloader.report_warning('Unable to extract video title')
+ video_title = '_'
+
+ # description
+ description_original = video_description = get_element_by_id("eow-description", video_webpage)
+ if video_description:
+
+ def replace_url(m):
+ redir_url = compat_urlparse.urljoin(url, m.group(1))
+ parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
+ if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
+ qs = compat_parse_qs(parsed_redir_url.query)
+ q = qs.get('q')
+ if q and q[0]:
+ return q[0]
+ return redir_url
+
+ description_original = video_description = re.sub(r'''(?x)
+ <a\s+
+ (?:[a-zA-Z-]+="[^"]*"\s+)*?
+ (?:title|href)="([^"]+)"\s+
+ (?:[a-zA-Z-]+="[^"]*"\s+)*?
+ class="[^"]*"[^>]*>
+ [^<]+\.{3}\s*
+ </a>
+ ''', replace_url, video_description)
+ video_description = clean_html(video_description)
+ else:
+ fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
+ if fd_mobj:
+ video_description = unescapeHTML(fd_mobj.group(1))
+ else:
+ video_description = ''
+
+ if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
+ if not self._downloader.params.get('noplaylist'):
+ entries = []
+ feed_ids = []
+ multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
+ for feed in multifeed_metadata_list.split(','):
+ # Unquote should take place before split on comma (,) since textual
+ # fields may contain comma as well (see
+ # https://github.com/rg3/youtube-dl/issues/8536)
+ feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
+ entries.append({
+ '_type': 'url_transparent',
+ 'ie_key': 'Youtube',
+ 'url': smuggle_url(
+ '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
+ {'force_singlefeed': True}),
+ 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
+ })
+ feed_ids.append(feed_data['id'][0])
+ self.to_screen(
+ 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
+ % (', '.join(feed_ids), video_id))
+ return self.playlist_result(entries, video_id, video_title, video_description)
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+
+ if view_count is None:
+ view_count = extract_view_count(video_info)
+
+ # Check for "rental" videos
+ if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
+ raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
+
+ def _extract_filesize(media_url):
+ return int_or_none(self._search_regex(
+ r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
+
+ if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
+ self.report_rtmp_download()
+ formats = [{
+ 'format_id': '_rtmp',
+ 'protocol': 'rtmp',
+ 'url': video_info['conn'][0],
+ 'player_url': player_url,
+ }]
+ elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
+ encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
+ if 'rtmpe%3Dyes' in encoded_url_map:
+ raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
+ formats_spec = {}
+ fmt_list = video_info.get('fmt_list', [''])[0]
+ if fmt_list:
+ for fmt in fmt_list.split(','):
+ spec = fmt.split('/')
+ if len(spec) > 1:
+ width_height = spec[1].split('x')
+ if len(width_height) == 2:
+ formats_spec[spec[0]] = {
+ 'resolution': spec[1],
+ 'width': int_or_none(width_height[0]),
+ 'height': int_or_none(width_height[1]),
+ }
+ q = qualities(['small', 'medium', 'hd720'])
+ formats = []
+ for url_data_str in encoded_url_map.split(','):
+ url_data = compat_parse_qs(url_data_str)
+ if 'itag' not in url_data or 'url' not in url_data:
+ continue
+ format_id = url_data['itag'][0]
+ url = url_data['url'][0]
+
+ if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
+ ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
+ jsplayer_url_json = self._search_regex(
+ ASSETS_RE,
+ embed_webpage if age_gate else video_webpage,
+ 'JS player URL (1)', default=None)
+ if not jsplayer_url_json and not age_gate:
+ # We need the embed website after all
+ if embed_webpage is None:
+ embed_url = proto + '://www.youtube.com/embed/%s' % video_id
+ embed_webpage = self._download_webpage(
+ embed_url, video_id, 'Downloading embed webpage')
+ jsplayer_url_json = self._search_regex(
+ ASSETS_RE, embed_webpage, 'JS player URL')
+
+ player_url = json.loads(jsplayer_url_json)
+ if player_url is None:
+ player_url_json = self._search_regex(
+ r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
+ video_webpage, 'age gate player URL')
+ player_url = json.loads(player_url_json)
+
+ if 'sig' in url_data:
+ url += '&signature=' + url_data['sig'][0]
+ elif 's' in url_data:
+ encrypted_sig = url_data['s'][0]
+
+ if self._downloader.params.get('verbose'):
+ if player_url is None:
+ player_version = 'unknown'
+ player_desc = 'unknown'
+ else:
+ if player_url.endswith('swf'):
+ player_version = self._search_regex(
+ r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
+ 'flash player', fatal=False)
+ player_desc = 'flash player %s' % player_version
+ else:
+ player_version = self._search_regex(
+ [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
+ r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
+ player_url,
+ 'html5 player', fatal=False)
+ player_desc = 'html5 player %s' % player_version
+
+ parts_sizes = self._signature_cache_id(encrypted_sig)
+ self.to_screen('{%s} signature length %s, %s' %
+ (format_id, parts_sizes, player_desc))
+
+ signature = self._decrypt_signature(
+ encrypted_sig, video_id, player_url, age_gate)
+ url += '&signature=' + signature
+ if 'ratebypass' not in url:
+ url += '&ratebypass=yes'
+
+ dct = {
+ 'format_id': format_id,
+ 'url': url,
+ 'player_url': player_url,
+ }
+ if format_id in self._formats:
+ dct.update(self._formats[format_id])
+ if format_id in formats_spec:
+ dct.update(formats_spec[format_id])
+
+ # Some itags are not included in DASH manifest thus corresponding formats will
+ # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
+ # Trying to extract metadata from url_encoded_fmt_stream_map entry.
+ mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
+ width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
+
+ filesize = int_or_none(url_data.get(
+ 'clen', [None])[0]) or _extract_filesize(url)
+
+ quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
+
+ more_fields = {
+ 'filesize': filesize,
+ 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
+ 'width': width,
+ 'height': height,
+ 'fps': int_or_none(url_data.get('fps', [None])[0]),
+ 'format_note': quality,
+ 'quality': q(quality),
+ }
+ for key, value in more_fields.items():
+ if value:
+ dct[key] = value
+ type_ = url_data.get('type', [None])[0]
+ if type_:
+ type_split = type_.split(';')
+ kind_ext = type_split[0].split('/')
+ if len(kind_ext) == 2:
+ kind, _ = kind_ext
+ dct['ext'] = mimetype2ext(type_split[0])
+ if kind in ('audio', 'video'):
+ codecs = None
+ for mobj in re.finditer(
+ r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
+ if mobj.group('key') == 'codecs':
+ codecs = mobj.group('val')
+ break
+ if codecs:
+ dct.update(parse_codecs(codecs))
+ if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
+ dct['downloader_options'] = {
+ # Youtube throttles chunks >~10M
+ 'http_chunk_size': 10485760,
+ }
+ formats.append(dct)
+ elif video_info.get('hlsvp'):
+ manifest_url = video_info['hlsvp'][0]
+ formats = []
+ m3u8_formats = self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4', fatal=False)
+ for a_format in m3u8_formats:
+ itag = self._search_regex(
+ r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
+ if itag:
+ a_format['format_id'] = itag
+ if itag in self._formats:
+ dct = self._formats[itag].copy()
+ dct.update(a_format)
+ a_format = dct
+ a_format['player_url'] = player_url
+ # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
+ a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
+ formats.append(a_format)
+ else:
+ error_message = clean_html(video_info.get('reason', [None])[0])
+ if not error_message:
+ error_message = extract_unavailable_message()
+ if error_message:
+ raise ExtractorError(error_message, expected=True)
+ raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
+
+ # uploader
+ video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
+ if video_uploader:
+ video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
+ else:
+ self._downloader.report_warning('unable to extract uploader name')
+
+ # uploader_id
+ video_uploader_id = None
+ video_uploader_url = None
+ mobj = re.search(
+ r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
+ video_webpage)
+ if mobj is not None:
+ video_uploader_id = mobj.group('uploader_id')
+ video_uploader_url = mobj.group('uploader_url')
+ else:
+ self._downloader.report_warning('unable to extract uploader nickname')
+
+ # thumbnail image
+ # We try first to get a high quality image:
+ m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
+ video_webpage, re.DOTALL)
+ if m_thumb is not None:
+ video_thumbnail = m_thumb.group(1)
+ elif 'thumbnail_url' not in video_info:
+ self._downloader.report_warning('unable to extract video thumbnail')
+ video_thumbnail = None
+ else: # don't panic if we can't find it
+ video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
+
+ # upload date
+ upload_date = self._html_search_meta(
+ 'datePublished', video_webpage, 'upload date', default=None)
+ if not upload_date:
+ upload_date = self._search_regex(
+ [r'(?s)id="eow-date.*?>(.*?)</span>',
+ r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
+ video_webpage, 'upload date', default=None)
+ upload_date = unified_strdate(upload_date)
+
+ video_license = self._html_search_regex(
+ r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
+ video_webpage, 'license', default=None)
+
+ m_music = re.search(
+ r'''(?x)
+ <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
+ <ul[^>]*>\s*
+ <li>(?P<title>.+?)
+ by (?P<creator>.+?)
+ (?:
+ \(.+?\)|
+ <a[^>]*
+ (?:
+ \bhref=["\']/red[^>]*>| # drop possible
+ >\s*Listen ad-free with YouTube Red # YouTube Red ad
+ )
+ .*?
+ )?</li
+ ''',
+ video_webpage)
+ if m_music:
+ video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
+ video_creator = clean_html(m_music.group('creator'))
+ else:
+ video_alt_title = video_creator = None
+
+ def extract_meta(field):
+ return self._html_search_regex(
+ r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
+ video_webpage, field, default=None)
+
+ track = extract_meta('Song')
+ artist = extract_meta('Artist')
+
+ m_episode = re.search(
+ r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
+ video_webpage)
+ if m_episode:
+ series = m_episode.group('series')
+ season_number = int(m_episode.group('season'))
+ episode_number = int(m_episode.group('episode'))
+ else:
+ series = season_number = episode_number = None
+
+ m_cat_container = self._search_regex(
+ r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
+ video_webpage, 'categories', default=None)
+ if m_cat_container:
+ category = self._html_search_regex(
+ r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
+ default=None)
+ video_categories = None if category is None else [category]
+ else:
+ video_categories = None
+
+ video_tags = [
+ unescapeHTML(m.group('content'))
+ for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
+
+ def _extract_count(count_name):
+ return str_to_int(self._search_regex(
+ r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
+ % re.escape(count_name),
+ video_webpage, count_name, default=None))
+
+ like_count = _extract_count('like')
+ dislike_count = _extract_count('dislike')
+
+ # subtitles
+ video_subtitles = self.extract_subtitles(video_id, video_webpage)
+ automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
+
+ video_duration = try_get(
+ video_info, lambda x: int_or_none(x['length_seconds'][0]))
+ if not video_duration:
+ video_duration = parse_duration(self._html_search_meta(
+ 'duration', video_webpage, 'video duration'))
+
+ # annotations
+ video_annotations = None
+ if self._downloader.params.get('writeannotations', False):
+ video_annotations = self._extract_annotations(video_id)
+
+ chapters = self._extract_chapters(description_original, video_duration)
+
+ # Look for the DASH manifest
+ if self._downloader.params.get('youtube_include_dash_manifest', True):
+ dash_mpd_fatal = True
+ for mpd_url in dash_mpds:
+ dash_formats = {}
+ try:
+ def decrypt_sig(mobj):
+ s = mobj.group(1)
+ dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
+ return '/signature/%s' % dec_s
+
+ mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
+
+ for df in self._extract_mpd_formats(
+ mpd_url, video_id, fatal=dash_mpd_fatal,
+ formats_dict=self._formats):
+ if not df.get('filesize'):
+ df['filesize'] = _extract_filesize(df['url'])
+ # Do not overwrite DASH format found in some previous DASH manifest
+ if df['format_id'] not in dash_formats:
+ dash_formats[df['format_id']] = df
+ # Additional DASH manifests may end up in HTTP Error 403 therefore
+ # allow them to fail without bug report message if we already have
+ # some DASH manifest succeeded. This is temporary workaround to reduce
+ # burst of bug reports until we figure out the reason and whether it
+ # can be fixed at all.
+ dash_mpd_fatal = False
+ except (ExtractorError, KeyError) as e:
+ self.report_warning(
+ 'Skipping DASH manifest: %r' % e, video_id)
+ if dash_formats:
+ # Remove the formats we found through non-DASH, they
+ # contain less info and it can be wrong, because we use
+ # fixed values (for example the resolution). See
+ # https://github.com/rg3/youtube-dl/issues/5774 for an
+ # example.
+ formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
+ formats.extend(dash_formats.values())
+
+ # Check for malformed aspect ratio
+ stretched_m = re.search(
+ r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
+ video_webpage)
+ if stretched_m:
+ w = float(stretched_m.group('w'))
+ h = float(stretched_m.group('h'))
+ # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
+ # We will only process correct ratios.
+ if w > 0 and h > 0:
+ ratio = w / h
+ for f in formats:
+ if f.get('vcodec') != 'none':
+ f['stretched_ratio'] = ratio
+
+ self._sort_formats(formats)
+
+ self.mark_watched(video_id, video_info)
+
+ return {
+ 'id': video_id,
+ 'uploader': video_uploader,
+ 'uploader_id': video_uploader_id,
+ 'uploader_url': video_uploader_url,
+ 'upload_date': upload_date,
+ 'license': video_license,
+ 'creator': video_creator or artist,
+ 'title': video_title,
+ 'alt_title': video_alt_title or track,
+ 'thumbnail': video_thumbnail,
+ 'description': video_description,
+ 'categories': video_categories,
+ 'tags': video_tags,
+ 'subtitles': video_subtitles,
+ 'automatic_captions': automatic_captions,
+ 'duration': video_duration,
+ 'age_limit': 18 if age_gate else 0,
+ 'annotations': video_annotations,
+ 'chapters': chapters,
+ 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
+ 'formats': formats,
+ 'is_live': is_live,
+ 'start_time': start_time,
+ 'end_time': end_time,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'track': track,
+ 'artist': artist,
+ 'related_vids': related_vids,
+ }
+
+
+class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
+ IE_DESC = 'YouTube.com playlists'
+ _VALID_URL = r"""(?x)(?:
+ (?:https?://)?
+ (?:\w+\.)?
+ (?:
+ youtube\.com/
+ (?:
+ (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
+ \? (?:.*?[&;])*? (?:p|a|list)=
+ | p/
+ )|
+ youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
+ )
+ (
+ (?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
+ # Top tracks, they can also include dots
+ |(?:MC)[\w\.]*
+ )
+ .*
+ |
+ (%(playlist_id)s)
+ )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
+ _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
+ _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
+ IE_NAME = 'youtube:playlist'
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
+ 'info_dict': {
+ 'title': 'ytdl test PL',
+ 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
+ 'info_dict': {
+ 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
+ 'title': 'YDL_Empty_List',
+ },
+ 'playlist_count': 0,
+ 'skip': 'This playlist is private',
+ }, {
+ 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
+ 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
+ 'info_dict': {
+ 'title': '29C3: Not my department',
+ 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
+ },
+ 'playlist_count': 95,
+ }, {
+ 'note': 'issue #673',
+ 'url': 'PLBB231211A4F62143',
+ 'info_dict': {
+ 'title': '[OLD]Team Fortress 2 (Class-based LP)',
+ 'id': 'PLBB231211A4F62143',
+ },
+ 'playlist_mincount': 26,
+ }, {
+ 'note': 'Large playlist',
+ 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
+ 'info_dict': {
+ 'title': 'Uploads from Cauchemar',
+ 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
+ },
+ 'playlist_mincount': 799,
+ }, {
+ 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
+ 'info_dict': {
+ 'title': 'YDL_safe_search',
+ 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
+ },
+ 'playlist_count': 2,
+ 'skip': 'This playlist is private',
+ }, {
+ 'note': 'embedded',
+ 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+ 'playlist_count': 4,
+ 'info_dict': {
+ 'title': 'JODA15',
+ 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+ }
+ }, {
+ 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+ 'playlist_mincount': 485,
+ 'info_dict': {
+ 'title': '2017 華語最新單曲 (2/24更新)',
+ 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+ }
+ }, {
+ 'note': 'Embedded SWF player',
+ 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
+ 'playlist_count': 4,
+ 'info_dict': {
+ 'title': 'JODA7',
+ 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
+ }
+ }, {
+ 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
+ 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
+ 'info_dict': {
+ 'title': 'Uploads from Interstellar Movie',
+ 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
+ },
+ 'playlist_mincount': 21,
+ }, {
+ # Playlist URL that does not actually serve a playlist
+ 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
+ 'info_dict': {
+ 'id': 'FqZTN594JQw',
+ 'ext': 'webm',
+ 'title': "Smiley's People 01 detective, Adventure Series, Action",
+ 'uploader': 'STREEM',
+ 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
+ 'upload_date': '20150526',
+ 'license': 'Standard YouTube License',
+ 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
+ 'categories': ['People & Blogs'],
+ 'tags': list,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [YoutubeIE.ie_key()],
+ }, {
+ 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
+ 'info_dict': {
+ 'id': 'yeWKywCrFtk',
+ 'ext': 'mp4',
+ 'title': 'Small Scale Baler and Braiding Rugs',
+ 'uploader': 'Backus-Page House Museum',
+ 'uploader_id': 'backuspagemuseum',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
+ 'upload_date': '20161008',
+ 'license': 'Standard YouTube License',
+ 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
+ 'categories': ['Nonprofits & Activism'],
+ 'tags': list,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
+ 'only_matching': True,
+ }, {
+ 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
+ 'only_matching': True,
+ }]
+
+ def _real_initialize(self):
+ self._login()
+
+ def _extract_mix(self, playlist_id):
+ # The mixes are generated from a single video
+ # the id of the playlist is just 'RD' + video_id
+ ids = []
+ last_id = playlist_id[-11:]
+ for n in itertools.count(1):
+ url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
+ webpage = self._download_webpage(
+ url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
+ new_ids = orderedSet(re.findall(
+ r'''(?xs)data-video-username=".*?".*?
+ href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
+ webpage))
+ # Fetch new pages until all the videos are repeated, it seems that
+ # there are always 51 unique videos.
+ new_ids = [_id for _id in new_ids if _id not in ids]
+ if not new_ids:
+ break
+ ids.extend(new_ids)
+ last_id = ids[-1]
+
+ url_results = self._ids_to_results(ids)
+
+ search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
+ title_span = (
+ search_title('playlist-title') or
+ search_title('title long-title') or
+ search_title('title'))
+ title = clean_html(title_span)
+
+ return self.playlist_result(url_results, playlist_id, title)
+
+ def _extract_playlist(self, playlist_id):
+ url = self._TEMPLATE_URL % playlist_id
+ page = self._download_webpage(url, playlist_id)
+
+ # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
+ for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
+ match = match.strip()
+ # Check if the playlist exists or is private
+ mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
+ if mobj:
+ reason = mobj.group('reason')
+ message = 'This playlist %s' % reason
+ if 'private' in reason:
+ message += ', use --username or --netrc to access it'
+ message += '.'
+ raise ExtractorError(message, expected=True)
+ elif re.match(r'[^<]*Invalid parameters[^<]*', match):
+ raise ExtractorError(
+ 'Invalid parameters. Maybe URL is incorrect.',
+ expected=True)
+ elif re.match(r'[^<]*Choose your language[^<]*', match):
+ continue
+ else:
+ self.report_warning('Youtube gives an alert message: ' + match)
+
+ playlist_title = self._html_search_regex(
+ r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
+ page, 'title', default=None)
+
+ _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
+ uploader = self._search_regex(
+ r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
+ page, 'uploader', default=None)
+ mobj = re.search(
+ r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
+ page)
+ if mobj:
+ uploader_id = mobj.group('uploader_id')
+ uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
+ else:
+ uploader_id = uploader_url = None
+
+ has_videos = True
+
+ if not playlist_title:
+ try:
+ # Some playlist URLs don't actually serve a playlist (e.g.
+ # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
+ next(self._entries(page, playlist_id))
+ except StopIteration:
+ has_videos = False
+
+ playlist = self.playlist_result(
+ self._entries(page, playlist_id), playlist_id, playlist_title)
+ playlist.update({
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'uploader_url': uploader_url,
+ })
+
+ return has_videos, playlist
+
+ def _check_download_just_video(self, url, playlist_id):
+ # Check if it's a video-specific URL
+ query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ video_id = query_dict.get('v', [None])[0] or self._search_regex(
+ r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
+ 'video id', default=None)
+ if video_id:
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+ return video_id, None
+ return None, None
+
+ def _real_extract(self, url):
+ # Extract playlist id
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError('Invalid URL: %s' % url)
+ playlist_id = mobj.group(1) or mobj.group(2)
+
+ video_id, video = self._check_download_just_video(url, playlist_id)
+ if video:
+ return video
+
+ if playlist_id.startswith(('RD', 'UL', 'PU')):
+ # Mixes require a custom extraction process
+ return self._extract_mix(playlist_id)
+
+ has_videos, playlist = self._extract_playlist(playlist_id)
+ if has_videos or not video_id:
+ return playlist
+
+ # Some playlist URLs don't actually serve a playlist (see
+ # https://github.com/rg3/youtube-dl/issues/10537).
+ # Fallback to plain video extraction if there is a video id
+ # along with playlist id.
+ return self.url_result(video_id, 'Youtube', video_id=video_id)
+
+
+class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
+ IE_DESC = 'YouTube.com channels'
+ _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
+ _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
+ _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
+ IE_NAME = 'youtube:channel'
+ _TESTS = [{
+ 'note': 'paginated channel',
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'playlist_mincount': 91,
+ 'info_dict': {
+ 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'Uploads from lex will',
+ }
+ }, {
+ 'note': 'Age restricted channel',
+ # from https://www.youtube.com/user/DeusExOfficial
+ 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
+ 'playlist_mincount': 64,
+ 'info_dict': {
+ 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
+ 'title': 'Uploads from Deus Ex',
+ },
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
+ else super(YoutubeChannelIE, cls).suitable(url))
+
+ def _build_template_url(self, url, channel_id):
+ return self._TEMPLATE_URL % channel_id
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ url = self._build_template_url(url, channel_id)
+
+ # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
+ # Workaround by extracting as a playlist if managed to obtain channel playlist URL
+ # otherwise fallback on channel by page extraction
+ channel_page = self._download_webpage(
+ url + '?view=57', channel_id,
+ 'Downloading channel page', fatal=False)
+ if channel_page is False:
+ channel_playlist_id = False
+ else:
+ channel_playlist_id = self._html_search_meta(
+ 'channelId', channel_page, 'channel id', default=None)
+ if not channel_playlist_id:
+ channel_url = self._html_search_meta(
+ ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
+ channel_page, 'channel url', default=None)
+ if channel_url:
+ channel_playlist_id = self._search_regex(
+ r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
+ channel_url, 'channel id', default=None)
+ if channel_playlist_id and channel_playlist_id.startswith('UC'):
+ playlist_id = 'UU' + channel_playlist_id[2:]
+ return self.url_result(
+ compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
+
+ channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
+ autogenerated = re.search(r'''(?x)
+ class="[^"]*?(?:
+ channel-header-autogenerated-label|
+ yt-channel-title-autogenerated
+ )[^"]*"''', channel_page) is not None
+
+ if autogenerated:
+ # The videos are contained in a single page
+ # the ajax pages can't be used, they are empty
+ entries = [
+ self.url_result(
+ video_id, 'Youtube', video_id=video_id,
+ video_title=video_title)
+ for video_id, video_title in self.extract_videos_from_page(channel_page)]
+ return self.playlist_result(entries, channel_id)
+
+ try:
+ next(self._entries(channel_page, channel_id))
+ except StopIteration:
+ alert_message = self._html_search_regex(
+ r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
+ channel_page, 'alert', default=None, group='alert')
+ if alert_message:
+ raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
+
+ return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
+
+
+class YoutubeUserIE(YoutubeChannelIE):
+ IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
+ _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
+ _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
+ IE_NAME = 'youtube:user'
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
+ 'playlist_mincount': 320,
+ 'info_dict': {
+ 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
+ 'title': 'Uploads from The Linux Foundation',
+ }
+ }, {
+ # Only available via https://www.youtube.com/c/12minuteathlete/videos
+ # but not https://www.youtube.com/user/12minuteathlete/videos
+ 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
+ 'playlist_mincount': 249,
+ 'info_dict': {
+ 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
+ 'title': 'Uploads from 12 Minute Athlete',
+ }
+ }, {
+ 'url': 'ytuser:phihag',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/c/gametrailers',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/gametrailers',
+ 'only_matching': True,
+ }, {
+ # This channel is not available, geo restricted to JP
+ 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ # Don't return True if the url can be extracted with other youtube
+ # extractor, the regex would is too permissive and it would match.
+ other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
+ if any(ie.suitable(url) for ie in other_yt_ies):
+ return False
+ else:
+ return super(YoutubeUserIE, cls).suitable(url)
+
+ def _build_template_url(self, url, channel_id):
+ mobj = re.match(self._VALID_URL, url)
+ return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
+
+
+class YoutubeLiveIE(YoutubeBaseInfoExtractor):
+ IE_DESC = 'YouTube.com live streams'
+ _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
+ IE_NAME = 'youtube:live'
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
+ 'info_dict': {
+ 'id': 'a48o2S1cPoo',
+ 'ext': 'mp4',
+ 'title': 'The Young Turks - Live Main Show',
+ 'uploader': 'The Young Turks',
+ 'uploader_id': 'TheYoungTurks',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
+ 'upload_date': '20150715',
+ 'license': 'Standard YouTube License',
+ 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
+ 'categories': ['News & Politics'],
+ 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/TheYoungTurks/live',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ channel_id = mobj.group('id')
+ base_url = mobj.group('base_url')
+ webpage = self._download_webpage(url, channel_id, fatal=False)
+ if webpage:
+ page_type = self._og_search_property(
+ 'type', webpage, 'page type', default='')
+ video_id = self._html_search_meta(
+ 'videoId', webpage, 'video id', default=None)
+ if page_type.startswith('video') and video_id and re.match(
+ r'^[0-9A-Za-z_-]{11}$', video_id):
+ return self.url_result(video_id, YoutubeIE.ie_key())
+ return self.url_result(base_url)
+
+
+class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
+ IE_DESC = 'YouTube.com user/channel playlists'
+ _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
+ IE_NAME = 'youtube:playlists'
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
+ 'playlist_mincount': 4,
+ 'info_dict': {
+ 'id': 'ThirstForScience',
+ 'title': 'Thirst for Science',
+ },
+ }, {
+ # with "Load more" button
+ 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
+ 'playlist_mincount': 70,
+ 'info_dict': {
+ 'id': 'igorkle1',
+ 'title': 'Игорь Клейнер',
+ },
+ }, {
+ 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
+ 'playlist_mincount': 17,
+ 'info_dict': {
+ 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
+ 'title': 'Chem Player',
+ },
+ }]
+
+
+class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
+ _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
+
+
+class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
+ IE_DESC = 'YouTube.com searches'
+ # there doesn't appear to be a real limit, for example if you search for
+ # 'python' you get more than 8.000.000 results
+ _MAX_RESULTS = float('inf')
+ IE_NAME = 'youtube:search'
+ _SEARCH_KEY = 'ytsearch'
+ _EXTRA_QUERY_ARGS = {}
+ _TESTS = []
+
+ def _get_n_results(self, query, n):
+ """Get a specified number of results for a query"""
+
+ videos = []
+ limit = n
+
+ url_query = {
+ 'search_query': query.encode('utf-8'),
+ }
+ url_query.update(self._EXTRA_QUERY_ARGS)
+ result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
+
+ for pagenum in itertools.count(1):
+ data = self._download_json(
+ result_url, video_id='query "%s"' % query,
+ note='Downloading page %s' % pagenum,
+ errnote='Unable to download API page',
+ query={'spf': 'navigate'})
+ html_content = data[1]['body']['content']
+
+ if 'class="search-message' in html_content:
+ raise ExtractorError(
+ '[youtube] No video results', expected=True)
+
+ new_videos = list(self._process_page(html_content))
+ videos += new_videos
+ if not new_videos or len(videos) > limit:
+ break
+ next_link = self._html_search_regex(
+ r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
+ html_content, 'next link', default=None)
+ if next_link is None:
+ break
+ result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
+
+ if len(videos) > n:
+ videos = videos[:n]
+ return self.playlist_result(videos, query)
+
+
+class YoutubeSearchDateIE(YoutubeSearchIE):
+ IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
+ _SEARCH_KEY = 'ytsearchdate'
+ IE_DESC = 'YouTube.com searches, newest videos first'
+ _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
+
+
+class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
+ IE_DESC = 'YouTube.com search URLs'
+ IE_NAME = 'youtube:search_url'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'title': 'youtube-dl test video',
+ }
+ }, {
+ 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ query = compat_urllib_parse_unquote_plus(mobj.group('query'))
+ webpage = self._download_webpage(url, query)
+ return self.playlist_result(self._process_page(webpage), playlist_title=query)
+
+
+class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
+ IE_DESC = 'YouTube.com (multi-season) shows'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
+ IE_NAME = 'youtube:show'
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/show/airdisasters',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'airdisasters',
+ 'title': 'Air Disasters',
+ }
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ return super(YoutubeShowIE, self)._real_extract(
+ 'https://www.youtube.com/show/%s/playlists' % playlist_id)
+
+
+class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
+ """
+ Base class for feed extractors
+ Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
+ """
+ _LOGIN_REQUIRED = True
+
+ @property
+ def IE_NAME(self):
+ return 'youtube:%s' % self._FEED_NAME
+
+ def _real_initialize(self):
+ self._login()
+
+ def _entries(self, page):
+ # The extraction process is the same as for playlists, but the regex
+ # for the video ids doesn't contain an index
+ ids = []
+ more_widget_html = content_html = page
+ for page_num in itertools.count(1):
+ matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
+
+ # 'recommended' feed has infinite 'load more' and each new portion spins
+ # the same videos in (sometimes) slightly different order, so we'll check
+ # for unicity and break when portion has no new videos
+ new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
+ if not new_ids:
+ break
+
+ ids.extend(new_ids)
+
+ for entry in self._ids_to_results(new_ids):
+ yield entry
+
+ mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
+ if not mobj:
+ break
+
+ more = self._download_json(
+ 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
+ 'Downloading page #%s' % page_num,
+ transform_source=uppercase_escape)
+ content_html = more['content_html']
+ more_widget_html = more['load_more_widget_html']
+
+ def _real_extract(self, url):
+ page = self._download_webpage(
+ 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
+ self._PLAYLIST_TITLE)
+ return self.playlist_result(
+ self._entries(page), playlist_title=self._PLAYLIST_TITLE)
+
+
+class YoutubeWatchLaterIE(YoutubePlaylistIE):
+ IE_NAME = 'youtube:watchlater'
+ IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/playlist?list=WL',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ _, video = self._check_download_just_video(url, 'WL')
+ if video:
+ return video
+ _, playlist = self._extract_playlist('WL')
+ return playlist
+
+
+class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
+ IE_NAME = 'youtube:favorites'
+ IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
+ _LOGIN_REQUIRED = True
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
+ playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
+ return self.url_result(playlist_id, 'YoutubePlaylist')
+
+
+class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
+ _FEED_NAME = 'recommended'
+ _PLAYLIST_TITLE = 'Youtube Recommended videos'
+
+
+class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
+ _FEED_NAME = 'subscriptions'
+ _PLAYLIST_TITLE = 'Youtube Subscriptions'
+
+
+class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
+ _FEED_NAME = 'history'
+ _PLAYLIST_TITLE = 'Youtube History'
+
+
+class YoutubeTruncatedURLIE(InfoExtractor):
+ IE_NAME = 'youtube:truncated_url'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'''(?x)
+ (?:https?://)?
+ (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
+ (?:watch\?(?:
+ feature=[a-z_]+|
+ annotation_id=annotation_[^&]+|
+ x-yt-cl=[0-9]+|
+ hl=[^&]*|
+ t=[0-9]+
+ )?
+ |
+ attribution_link\?a=[^&]+
+ )
+ $
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?feature=foo',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?hl=en-GB',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?t=2372',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ raise ExtractorError(
+ 'Did you forget to quote the URL? Remember that & is a meta '
+ 'character in most shells, so you want to put the URL in quotes, '
+ 'like youtube-dl '
+ '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
+ ' or simply youtube-dl BaW_jenozKc .',
+ expected=True)
+
+
+class YoutubeTruncatedIDIE(InfoExtractor):
+ IE_NAME = 'youtube:truncated_id'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ raise ExtractorError(
+ 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
+ expected=True)
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
new file mode 100644
index 0000000..7bda596
--- /dev/null
+++ b/youtube_dl/jsinterp.py
@@ -0,0 +1,262 @@
+from __future__ import unicode_literals
+
+import json
+import operator
+import re
+
+from .utils import (
+ ExtractorError,
+ remove_quotes,
+)
+
+_OPERATORS = [
+ ('|', operator.or_),
+ ('^', operator.xor),
+ ('&', operator.and_),
+ ('>>', operator.rshift),
+ ('<<', operator.lshift),
+ ('-', operator.sub),
+ ('+', operator.add),
+ ('%', operator.mod),
+ ('/', operator.truediv),
+ ('*', operator.mul),
+]
+_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
+_ASSIGN_OPERATORS.append(('=', lambda cur, right: right))
+
+_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
+
+
+class JSInterpreter(object):
+ def __init__(self, code, objects=None):
+ if objects is None:
+ objects = {}
+ self.code = code
+ self._functions = {}
+ self._objects = objects
+
+ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
+ if allow_recursion < 0:
+ raise ExtractorError('Recursion limit reached')
+
+ should_abort = False
+ stmt = stmt.lstrip()
+ stmt_m = re.match(r'var\s', stmt)
+ if stmt_m:
+ expr = stmt[len(stmt_m.group(0)):]
+ else:
+ return_m = re.match(r'return(?:\s+|$)', stmt)
+ if return_m:
+ expr = stmt[len(return_m.group(0)):]
+ should_abort = True
+ else:
+ # Try interpreting it as an expression
+ expr = stmt
+
+ v = self.interpret_expression(expr, local_vars, allow_recursion)
+ return v, should_abort
+
+ def interpret_expression(self, expr, local_vars, allow_recursion):
+ expr = expr.strip()
+ if expr == '': # Empty expression
+ return None
+
+ if expr.startswith('('):
+ parens_count = 0
+ for m in re.finditer(r'[()]', expr):
+ if m.group(0) == '(':
+ parens_count += 1
+ else:
+ parens_count -= 1
+ if parens_count == 0:
+ sub_expr = expr[1:m.start()]
+ sub_result = self.interpret_expression(
+ sub_expr, local_vars, allow_recursion)
+ remaining_expr = expr[m.end():].strip()
+ if not remaining_expr:
+ return sub_result
+ else:
+ expr = json.dumps(sub_result) + remaining_expr
+ break
+ else:
+ raise ExtractorError('Premature end of parens in %r' % expr)
+
+ for op, opfunc in _ASSIGN_OPERATORS:
+ m = re.match(r'''(?x)
+ (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
+ \s*%s
+ (?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
+ if not m:
+ continue
+ right_val = self.interpret_expression(
+ m.group('expr'), local_vars, allow_recursion - 1)
+
+ if m.groupdict().get('index'):
+ lvar = local_vars[m.group('out')]
+ idx = self.interpret_expression(
+ m.group('index'), local_vars, allow_recursion)
+ assert isinstance(idx, int)
+ cur = lvar[idx]
+ val = opfunc(cur, right_val)
+ lvar[idx] = val
+ return val
+ else:
+ cur = local_vars.get(m.group('out'))
+ val = opfunc(cur, right_val)
+ local_vars[m.group('out')] = val
+ return val
+
+ if expr.isdigit():
+ return int(expr)
+
+ var_m = re.match(
+ r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE,
+ expr)
+ if var_m:
+ return local_vars[var_m.group('name')]
+
+ try:
+ return json.loads(expr)
+ except ValueError:
+ pass
+
+ m = re.match(
+ r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
+ if m:
+ val = local_vars[m.group('in')]
+ idx = self.interpret_expression(
+ m.group('idx'), local_vars, allow_recursion - 1)
+ return val[idx]
+
+ m = re.match(
+ r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,
+ expr)
+ if m:
+ variable = m.group('var')
+ member = remove_quotes(m.group('member') or m.group('member2'))
+ arg_str = m.group('args')
+
+ if variable in local_vars:
+ obj = local_vars[variable]
+ else:
+ if variable not in self._objects:
+ self._objects[variable] = self.extract_object(variable)
+ obj = self._objects[variable]
+
+ if arg_str is None:
+ # Member access
+ if member == 'length':
+ return len(obj)
+ return obj[member]
+
+ assert expr.endswith(')')
+ # Function call
+ if arg_str == '':
+ argvals = tuple()
+ else:
+ argvals = tuple([
+ self.interpret_expression(v, local_vars, allow_recursion)
+ for v in arg_str.split(',')])
+
+ if member == 'split':
+ assert argvals == ('',)
+ return list(obj)
+ if member == 'join':
+ assert len(argvals) == 1
+ return argvals[0].join(obj)
+ if member == 'reverse':
+ assert len(argvals) == 0
+ obj.reverse()
+ return obj
+ if member == 'slice':
+ assert len(argvals) == 1
+ return obj[argvals[0]:]
+ if member == 'splice':
+ assert isinstance(obj, list)
+ index, howMany = argvals
+ res = []
+ for i in range(index, min(index + howMany, len(obj))):
+ res.append(obj.pop(index))
+ return res
+
+ return obj[member](argvals)
+
+ for op, opfunc in _OPERATORS:
+ m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr)
+ if not m:
+ continue
+ x, abort = self.interpret_statement(
+ m.group('x'), local_vars, allow_recursion - 1)
+ if abort:
+ raise ExtractorError(
+ 'Premature left-side return of %s in %r' % (op, expr))
+ y, abort = self.interpret_statement(
+ m.group('y'), local_vars, allow_recursion - 1)
+ if abort:
+ raise ExtractorError(
+ 'Premature right-side return of %s in %r' % (op, expr))
+ return opfunc(x, y)
+
+ m = re.match(
+ r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
+ if m:
+ fname = m.group('func')
+ argvals = tuple([
+ int(v) if v.isdigit() else local_vars[v]
+ for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple()
+ if fname not in self._functions:
+ self._functions[fname] = self.extract_function(fname)
+ return self._functions[fname](argvals)
+
+ raise ExtractorError('Unsupported JS expression %r' % expr)
+
+ def extract_object(self, objname):
+ _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
+ obj = {}
+ obj_m = re.search(
+ r'''(?x)
+ (?<!this\.)%s\s*=\s*{\s*
+ (?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
+ }\s*;
+ ''' % (re.escape(objname), _FUNC_NAME_RE),
+ self.code)
+ fields = obj_m.group('fields')
+ # Currently, it only supports function definitions
+ fields_m = re.finditer(
+ r'''(?x)
+ (?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}
+ ''' % _FUNC_NAME_RE,
+ fields)
+ for f in fields_m:
+ argnames = f.group('args').split(',')
+ obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code'))
+
+ return obj
+
+ def extract_function(self, funcname):
+ func_m = re.search(
+ r'''(?x)
+ (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
+ \((?P<args>[^)]*)\)\s*
+ \{(?P<code>[^}]+)\}''' % (
+ re.escape(funcname), re.escape(funcname), re.escape(funcname)),
+ self.code)
+ if func_m is None:
+ raise ExtractorError('Could not find JS function %r' % funcname)
+ argnames = func_m.group('args').split(',')
+
+ return self.build_function(argnames, func_m.group('code'))
+
+ def call_function(self, funcname, *args):
+ f = self.extract_function(funcname)
+ return f(args)
+
+ def build_function(self, argnames, code):
+ def resf(args):
+ local_vars = dict(zip(argnames, args))
+ for stmt in code.split(';'):
+ res, abort = self.interpret_statement(stmt, local_vars)
+ if abort:
+ break
+ return res
+ return resf
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
new file mode 100644
index 0000000..e7d8e89
--- /dev/null
+++ b/youtube_dl/options.py
@@ -0,0 +1,916 @@
+from __future__ import unicode_literals
+
+import os.path
+import optparse
+import re
+import sys
+
+from .downloader.external import list_external_downloaders
+from .compat import (
+ compat_expanduser,
+ compat_get_terminal_size,
+ compat_getenv,
+ compat_kwargs,
+ compat_shlex_split,
+)
+from .utils import (
+ preferredencoding,
+ write_string,
+)
+from .version import __version__
+
+
+def _hide_login_info(opts):
+ PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'])
+ eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
+
+ def _scrub_eq(o):
+ m = eqre.match(o)
+ if m:
+ return m.group('key') + '=PRIVATE'
+ else:
+ return o
+
+ opts = list(map(_scrub_eq, opts))
+ for idx, opt in enumerate(opts):
+ if opt in PRIVATE_OPTS and idx + 1 < len(opts):
+ opts[idx + 1] = 'PRIVATE'
+ return opts
+
+
+def parseOpts(overrideArguments=None):
+ def _readOptions(filename_bytes, default=[]):
+ try:
+ optionf = open(filename_bytes)
+ except IOError:
+ return default # silently skip if file is not present
+ try:
+ # FIXME: https://github.com/rg3/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
+ contents = optionf.read()
+ if sys.version_info < (3,):
+ contents = contents.decode(preferredencoding())
+ res = compat_shlex_split(contents, comments=True)
+ finally:
+ optionf.close()
+ return res
+
+ def _readUserConf():
+ xdg_config_home = compat_getenv('XDG_CONFIG_HOME')
+ if xdg_config_home:
+ userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
+ if not os.path.isfile(userConfFile):
+ userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
+ else:
+ userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl', 'config')
+ if not os.path.isfile(userConfFile):
+ userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl.conf')
+ userConf = _readOptions(userConfFile, None)
+
+ if userConf is None:
+ appdata_dir = compat_getenv('appdata')
+ if appdata_dir:
+ userConf = _readOptions(
+ os.path.join(appdata_dir, 'youtube-dl', 'config'),
+ default=None)
+ if userConf is None:
+ userConf = _readOptions(
+ os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
+ default=None)
+
+ if userConf is None:
+ userConf = _readOptions(
+ os.path.join(compat_expanduser('~'), 'youtube-dl.conf'),
+ default=None)
+ if userConf is None:
+ userConf = _readOptions(
+ os.path.join(compat_expanduser('~'), 'youtube-dl.conf.txt'),
+ default=None)
+
+ if userConf is None:
+ userConf = []
+
+ return userConf
+
+ def _format_option_string(option):
+ ''' ('-o', '--option') -> -o, --format METAVAR'''
+
+ opts = []
+
+ if option._short_opts:
+ opts.append(option._short_opts[0])
+ if option._long_opts:
+ opts.append(option._long_opts[0])
+ if len(opts) > 1:
+ opts.insert(1, ', ')
+
+ if option.takes_value():
+ opts.append(' %s' % option.metavar)
+
+ return ''.join(opts)
+
+ def _comma_separated_values_options_callback(option, opt_str, value, parser):
+ setattr(parser.values, option.dest, value.split(','))
+
+ # No need to wrap help messages if we're on a wide console
+ columns = compat_get_terminal_size().columns
+ max_width = columns if columns else 80
+ max_help_position = 80
+
+ fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
+ fmt.format_option_strings = _format_option_string
+
+ kw = {
+ 'version': __version__,
+ 'formatter': fmt,
+ 'usage': '%prog [OPTIONS] URL [URL...]',
+ 'conflict_handler': 'resolve',
+ }
+
+ parser = optparse.OptionParser(**compat_kwargs(kw))
+
+ general = optparse.OptionGroup(parser, 'General Options')
+ general.add_option(
+ '-h', '--help',
+ action='help',
+ help='Print this help text and exit')
+ general.add_option(
+ '-v', '--version',
+ action='version',
+ help='Print program version and exit')
+ general.add_option(
+ '-U', '--update',
+ action='store_true', dest='update_self',
+ help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
+ general.add_option(
+ '-i', '--ignore-errors',
+ action='store_true', dest='ignoreerrors', default=False,
+ help='Continue on download errors, for example to skip unavailable videos in a playlist')
+ general.add_option(
+ '--abort-on-error',
+ action='store_false', dest='ignoreerrors',
+ help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
+ general.add_option(
+ '--dump-user-agent',
+ action='store_true', dest='dump_user_agent', default=False,
+ help='Display the current browser identification')
+ general.add_option(
+ '--list-extractors',
+ action='store_true', dest='list_extractors', default=False,
+ help='List all supported extractors')
+ general.add_option(
+ '--extractor-descriptions',
+ action='store_true', dest='list_extractor_descriptions', default=False,
+ help='Output descriptions of all supported extractors')
+ general.add_option(
+ '--force-generic-extractor',
+ action='store_true', dest='force_generic_extractor', default=False,
+ help='Force extraction to use the generic extractor')
+ general.add_option(
+ '--default-search',
+ dest='default_search', metavar='PREFIX',
+ help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
+ general.add_option(
+ '--ignore-config',
+ action='store_true',
+ help='Do not read configuration files. '
+ 'When given in the global configuration file /etc/youtube-dl.conf: '
+ 'Do not read the user configuration in ~/.config/youtube-dl/config '
+ '(%APPDATA%/youtube-dl/config.txt on Windows)')
+ general.add_option(
+ '--config-location',
+ dest='config_location', metavar='PATH',
+ help='Location of the configuration file; either the path to the config or its containing directory.')
+ general.add_option(
+ '--flat-playlist',
+ action='store_const', dest='extract_flat', const='in_playlist',
+ default=False,
+ help='Do not extract the videos of a playlist, only list them.')
+ general.add_option(
+ '--mark-watched',
+ action='store_true', dest='mark_watched', default=False,
+ help='Mark videos watched (YouTube only)')
+ general.add_option(
+ '--no-mark-watched',
+ action='store_false', dest='mark_watched', default=False,
+ help='Do not mark videos watched (YouTube only)')
+ general.add_option(
+ '--no-color', '--no-colors',
+ action='store_true', dest='no_color',
+ default=False,
+ help='Do not emit color codes in output')
+
+ network = optparse.OptionGroup(parser, 'Network Options')
+ network.add_option(
+ '--proxy', dest='proxy',
+ default=None, metavar='URL',
+ help='Use the specified HTTP/HTTPS/SOCKS proxy. To enable '
+ 'SOCKS proxy, specify a proper scheme. For example '
+ 'socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") '
+ 'for direct connection')
+ network.add_option(
+ '--socket-timeout',
+ dest='socket_timeout', type=float, default=None, metavar='SECONDS',
+ help='Time to wait before giving up, in seconds')
+ network.add_option(
+ '--source-address',
+ metavar='IP', dest='source_address', default=None,
+ help='Client-side IP address to bind to',
+ )
+ network.add_option(
+ '-4', '--force-ipv4',
+ action='store_const', const='0.0.0.0', dest='source_address',
+ help='Make all connections via IPv4',
+ )
+ network.add_option(
+ '-6', '--force-ipv6',
+ action='store_const', const='::', dest='source_address',
+ help='Make all connections via IPv6',
+ )
+
+ geo = optparse.OptionGroup(parser, 'Geo Restriction')
+ geo.add_option(
+ '--geo-verification-proxy',
+ dest='geo_verification_proxy', default=None, metavar='URL',
+ help='Use this proxy to verify the IP address for some geo-restricted sites. '
+ 'The default proxy specified by --proxy (or none, if the option is not present) is used for the actual downloading.')
+ geo.add_option(
+ '--cn-verification-proxy',
+ dest='cn_verification_proxy', default=None, metavar='URL',
+ help=optparse.SUPPRESS_HELP)
+ geo.add_option(
+ '--geo-bypass',
+ action='store_true', dest='geo_bypass', default=True,
+ help='Bypass geographic restriction via faking X-Forwarded-For HTTP header')
+ geo.add_option(
+ '--no-geo-bypass',
+ action='store_false', dest='geo_bypass', default=True,
+ help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header')
+ geo.add_option(
+ '--geo-bypass-country', metavar='CODE',
+ dest='geo_bypass_country', default=None,
+ help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code')
+ geo.add_option(
+ '--geo-bypass-ip-block', metavar='IP_BLOCK',
+ dest='geo_bypass_ip_block', default=None,
+ help='Force bypass geographic restriction with explicitly provided IP block in CIDR notation')
+
+ selection = optparse.OptionGroup(parser, 'Video Selection')
+ selection.add_option(
+ '--playlist-start',
+ dest='playliststart', metavar='NUMBER', default=1, type=int,
+ help='Playlist video to start at (default is %default)')
+ selection.add_option(
+ '--playlist-end',
+ dest='playlistend', metavar='NUMBER', default=None, type=int,
+ help='Playlist video to end at (default is last)')
+ selection.add_option(
+ '--playlist-items',
+ dest='playlist_items', metavar='ITEM_SPEC', default=None,
+ help='Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
+ selection.add_option(
+ '--match-title',
+ dest='matchtitle', metavar='REGEX',
+ help='Download only matching titles (regex or caseless sub-string)')
+ selection.add_option(
+ '--reject-title',
+ dest='rejecttitle', metavar='REGEX',
+ help='Skip download for matching titles (regex or caseless sub-string)')
+ selection.add_option(
+ '--max-downloads',
+ dest='max_downloads', metavar='NUMBER', type=int, default=None,
+ help='Abort after downloading NUMBER files')
+ selection.add_option(
+ '--min-filesize',
+ metavar='SIZE', dest='min_filesize', default=None,
+ help='Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)')
+ selection.add_option(
+ '--max-filesize',
+ metavar='SIZE', dest='max_filesize', default=None,
+ help='Do not download any videos larger than SIZE (e.g. 50k or 44.6m)')
+ selection.add_option(
+ '--date',
+ metavar='DATE', dest='date', default=None,
+ help='Download only videos uploaded in this date')
+ selection.add_option(
+ '--datebefore',
+ metavar='DATE', dest='datebefore', default=None,
+ help='Download only videos uploaded on or before this date (i.e. inclusive)')
+ selection.add_option(
+ '--dateafter',
+ metavar='DATE', dest='dateafter', default=None,
+ help='Download only videos uploaded on or after this date (i.e. inclusive)')
+ selection.add_option(
+ '--min-views',
+ metavar='COUNT', dest='min_views', default=None, type=int,
+ help='Do not download any videos with less than COUNT views')
+ selection.add_option(
+ '--max-views',
+ metavar='COUNT', dest='max_views', default=None, type=int,
+ help='Do not download any videos with more than COUNT views')
+ selection.add_option(
+ '--match-filter',
+ metavar='FILTER', dest='match_filter', default=None,
+ help=(
+ 'Generic video filter. '
+ 'Specify any key (see the "OUTPUT TEMPLATE" for a list of available keys) to '
+ 'match if the key is present, '
+ '!key to check if the key is not present, '
+ 'key > NUMBER (like "comment_count > 12", also works with '
+ '>=, <, <=, !=, =) to compare against a number, '
+ 'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) '
+ 'to match against a string literal '
+ 'and & to require multiple matches. '
+ 'Values which are not known are excluded unless you '
+ 'put a question mark (?) after the operator. '
+ 'For example, to only match videos that have been liked more than '
+ '100 times and disliked less than 50 times (or the dislike '
+ 'functionality is not available at the given service), but who '
+ 'also have a description, use --match-filter '
+ '"like_count > 100 & dislike_count <? 50 & description" .'
+ ))
+ selection.add_option(
+ '--no-playlist',
+ action='store_true', dest='noplaylist', default=False,
+ help='Download only the video, if the URL refers to a video and a playlist.')
+ selection.add_option(
+ '--yes-playlist',
+ action='store_false', dest='noplaylist', default=False,
+ help='Download the playlist, if the URL refers to a video and a playlist.')
+ selection.add_option(
+ '--age-limit',
+ metavar='YEARS', dest='age_limit', default=None, type=int,
+ help='Download only videos suitable for the given age')
+ selection.add_option(
+ '--download-archive', metavar='FILE',
+ dest='download_archive',
+ help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
+ selection.add_option(
+ '--include-ads',
+ dest='include_ads', action='store_true',
+ help='Download advertisements as well (experimental)')
+
+ authentication = optparse.OptionGroup(parser, 'Authentication Options')
+ authentication.add_option(
+ '-u', '--username',
+ dest='username', metavar='USERNAME',
+ help='Login with this account ID')
+ authentication.add_option(
+ '-p', '--password',
+ dest='password', metavar='PASSWORD',
+ help='Account password. If this option is left out, youtube-dl will ask interactively.')
+ authentication.add_option(
+ '-2', '--twofactor',
+ dest='twofactor', metavar='TWOFACTOR',
+ help='Two-factor authentication code')
+ authentication.add_option(
+ '-n', '--netrc',
+ action='store_true', dest='usenetrc', default=False,
+ help='Use .netrc authentication data')
+ authentication.add_option(
+ '--video-password',
+ dest='videopassword', metavar='PASSWORD',
+ help='Video password (vimeo, smotri, youku)')
+
+ adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options')
+ adobe_pass.add_option(
+ '--ap-mso',
+ dest='ap_mso', metavar='MSO',
+ help='Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs')
+ adobe_pass.add_option(
+ '--ap-username',
+ dest='ap_username', metavar='USERNAME',
+ help='Multiple-system operator account login')
+ adobe_pass.add_option(
+ '--ap-password',
+ dest='ap_password', metavar='PASSWORD',
+ help='Multiple-system operator account password. If this option is left out, youtube-dl will ask interactively.')
+ adobe_pass.add_option(
+ '--ap-list-mso',
+ action='store_true', dest='ap_list_mso', default=False,
+ help='List all supported multiple-system operators')
+
+ video_format = optparse.OptionGroup(parser, 'Video Format Options')
+ video_format.add_option(
+ '-f', '--format',
+ action='store', dest='format', metavar='FORMAT', default=None,
+ help='Video format code, see the "FORMAT SELECTION" for all the info')
+ video_format.add_option(
+ '--all-formats',
+ action='store_const', dest='format', const='all',
+ help='Download all available video formats')
+ video_format.add_option(
+ '--prefer-free-formats',
+ action='store_true', dest='prefer_free_formats', default=False,
+ help='Prefer free video formats unless a specific one is requested')
+ video_format.add_option(
+ '-F', '--list-formats',
+ action='store_true', dest='listformats',
+ help='List all available formats of requested videos')
+ video_format.add_option(
+ '--youtube-include-dash-manifest',
+ action='store_true', dest='youtube_include_dash_manifest', default=True,
+ help=optparse.SUPPRESS_HELP)
+ video_format.add_option(
+ '--youtube-skip-dash-manifest',
+ action='store_false', dest='youtube_include_dash_manifest',
+ help='Do not download the DASH manifests and related data on YouTube videos')
+ video_format.add_option(
+ '--merge-output-format',
+ action='store', dest='merge_output_format', metavar='FORMAT', default=None,
+ help=(
+ 'If a merge is required (e.g. bestvideo+bestaudio), '
+ 'output to given container format. One of mkv, mp4, ogg, webm, flv. '
+ 'Ignored if no merge is required'))
+
+ subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
+ subtitles.add_option(
+ '--write-sub', '--write-srt',
+ action='store_true', dest='writesubtitles', default=False,
+ help='Write subtitle file')
+ subtitles.add_option(
+ '--write-auto-sub', '--write-automatic-sub',
+ action='store_true', dest='writeautomaticsub', default=False,
+ help='Write automatically generated subtitle file (YouTube only)')
+ subtitles.add_option(
+ '--all-subs',
+ action='store_true', dest='allsubtitles', default=False,
+ help='Download all the available subtitles of the video')
+ subtitles.add_option(
+ '--list-subs',
+ action='store_true', dest='listsubtitles', default=False,
+ help='List all available subtitles for the video')
+ subtitles.add_option(
+ '--sub-format',
+ action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
+ help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"')
+ subtitles.add_option(
+ '--sub-lang', '--sub-langs', '--srt-lang',
+ action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
+ default=[], callback=_comma_separated_values_options_callback,
+ help='Languages of the subtitles to download (optional) separated by commas, use --list-subs for available language tags')
+
+ downloader = optparse.OptionGroup(parser, 'Download Options')
+ downloader.add_option(
+ '-r', '--limit-rate', '--rate-limit',
+ dest='ratelimit', metavar='RATE',
+ help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
+ downloader.add_option(
+ '-R', '--retries',
+ dest='retries', metavar='RETRIES', default=10,
+ help='Number of retries (default is %default), or "infinite".')
+ downloader.add_option(
+ '--fragment-retries',
+ dest='fragment_retries', metavar='RETRIES', default=10,
+ help='Number of retries for a fragment (default is %default), or "infinite" (DASH, hlsnative and ISM)')
+ downloader.add_option(
+ '--skip-unavailable-fragments',
+ action='store_true', dest='skip_unavailable_fragments', default=True,
+ help='Skip unavailable fragments (DASH, hlsnative and ISM)')
+ downloader.add_option(
+ '--abort-on-unavailable-fragment',
+ action='store_false', dest='skip_unavailable_fragments',
+ help='Abort downloading when some fragment is not available')
+ downloader.add_option(
+ '--keep-fragments',
+ action='store_true', dest='keep_fragments', default=False,
+ help='Keep downloaded fragments on disk after downloading is finished; fragments are erased by default')
+ downloader.add_option(
+ '--buffer-size',
+ dest='buffersize', metavar='SIZE', default='1024',
+ help='Size of download buffer (e.g. 1024 or 16K) (default is %default)')
+ downloader.add_option(
+ '--no-resize-buffer',
+ action='store_true', dest='noresizebuffer', default=False,
+ help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
+ downloader.add_option(
+ '--http-chunk-size',
+ dest='http_chunk_size', metavar='SIZE', default=None,
+ help='Size of a chunk for chunk-based HTTP downloading (e.g. 10485760 or 10M) (default is disabled). '
+ 'May be useful for bypassing bandwidth throttling imposed by a webserver (experimental)')
+ downloader.add_option(
+ '--test',
+ action='store_true', dest='test', default=False,
+ help=optparse.SUPPRESS_HELP)
+ downloader.add_option(
+ '--playlist-reverse',
+ action='store_true',
+ help='Download playlist videos in reverse order')
+ downloader.add_option(
+ '--playlist-random',
+ action='store_true',
+ help='Download playlist videos in random order')
+ downloader.add_option(
+ '--xattr-set-filesize',
+ dest='xattr_set_filesize', action='store_true',
+ help='Set file xattribute ytdl.filesize with expected file size')
+ downloader.add_option(
+ '--hls-prefer-native',
+ dest='hls_prefer_native', action='store_true', default=None,
+ help='Use the native HLS downloader instead of ffmpeg')
+ downloader.add_option(
+ '--hls-prefer-ffmpeg',
+ dest='hls_prefer_native', action='store_false', default=None,
+ help='Use ffmpeg instead of the native HLS downloader')
+ downloader.add_option(
+ '--hls-use-mpegts',
+ dest='hls_use_mpegts', action='store_true',
+ help='Use the mpegts container for HLS videos, allowing to play the '
+ 'video while downloading (some players may not be able to play it)')
+ downloader.add_option(
+ '--external-downloader',
+ dest='external_downloader', metavar='COMMAND',
+ help='Use the specified external downloader. '
+ 'Currently supports %s' % ','.join(list_external_downloaders()))
+ downloader.add_option(
+ '--external-downloader-args',
+ dest='external_downloader_args', metavar='ARGS',
+ help='Give these arguments to the external downloader')
+
+ workarounds = optparse.OptionGroup(parser, 'Workarounds')
+ workarounds.add_option(
+ '--encoding',
+ dest='encoding', metavar='ENCODING',
+ help='Force the specified encoding (experimental)')
+ workarounds.add_option(
+ '--no-check-certificate',
+ action='store_true', dest='no_check_certificate', default=False,
+ help='Suppress HTTPS certificate validation')
+ workarounds.add_option(
+ '--prefer-insecure',
+ '--prefer-unsecure', action='store_true', dest='prefer_insecure',
+ help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
+ workarounds.add_option(
+ '--user-agent',
+ metavar='UA', dest='user_agent',
+ help='Specify a custom user agent')
+ workarounds.add_option(
+ '--referer',
+ metavar='URL', dest='referer', default=None,
+ help='Specify a custom referer, use if the video access is restricted to one domain',
+ )
+ workarounds.add_option(
+ '--add-header',
+ metavar='FIELD:VALUE', dest='headers', action='append',
+ help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
+ )
+ workarounds.add_option(
+ '--bidi-workaround',
+ dest='bidi_workaround', action='store_true',
+ help='Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
+ workarounds.add_option(
+ '--sleep-interval', '--min-sleep-interval', metavar='SECONDS',
+ dest='sleep_interval', type=float,
+ help=(
+ 'Number of seconds to sleep before each download when used alone '
+ 'or a lower bound of a range for randomized sleep before each download '
+ '(minimum possible number of seconds to sleep) when used along with '
+ '--max-sleep-interval.'))
+ workarounds.add_option(
+ '--max-sleep-interval', metavar='SECONDS',
+ dest='max_sleep_interval', type=float,
+ help=(
+ 'Upper bound of a range for randomized sleep before each download '
+ '(maximum possible number of seconds to sleep). Must only be used '
+ 'along with --min-sleep-interval.'))
+
+ verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
+ verbosity.add_option(
+ '-q', '--quiet',
+ action='store_true', dest='quiet', default=False,
+ help='Activate quiet mode')
+ verbosity.add_option(
+ '--no-warnings',
+ dest='no_warnings', action='store_true', default=False,
+ help='Ignore warnings')
+ verbosity.add_option(
+ '-s', '--simulate',
+ action='store_true', dest='simulate', default=False,
+ help='Do not download the video and do not write anything to disk')
+ verbosity.add_option(
+ '--skip-download',
+ action='store_true', dest='skip_download', default=False,
+ help='Do not download the video')
+ verbosity.add_option(
+ '-g', '--get-url',
+ action='store_true', dest='geturl', default=False,
+ help='Simulate, quiet but print URL')
+ verbosity.add_option(
+ '-e', '--get-title',
+ action='store_true', dest='gettitle', default=False,
+ help='Simulate, quiet but print title')
+ verbosity.add_option(
+ '--get-id',
+ action='store_true', dest='getid', default=False,
+ help='Simulate, quiet but print id')
+ verbosity.add_option(
+ '--get-thumbnail',
+ action='store_true', dest='getthumbnail', default=False,
+ help='Simulate, quiet but print thumbnail URL')
+ verbosity.add_option(
+ '--get-description',
+ action='store_true', dest='getdescription', default=False,
+ help='Simulate, quiet but print video description')
+ verbosity.add_option(
+ '--get-duration',
+ action='store_true', dest='getduration', default=False,
+ help='Simulate, quiet but print video length')
+ verbosity.add_option(
+ '--get-filename',
+ action='store_true', dest='getfilename', default=False,
+ help='Simulate, quiet but print output filename')
+ verbosity.add_option(
+ '--get-format',
+ action='store_true', dest='getformat', default=False,
+ help='Simulate, quiet but print output format')
+ verbosity.add_option(
+ '-j', '--dump-json',
+ action='store_true', dest='dumpjson', default=False,
+ help='Simulate, quiet but print JSON information. See the "OUTPUT TEMPLATE" for a description of available keys.')
+ verbosity.add_option(
+ '-J', '--dump-single-json',
+ action='store_true', dest='dump_single_json', default=False,
+ help='Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
+ verbosity.add_option(
+ '--print-json',
+ action='store_true', dest='print_json', default=False,
+ help='Be quiet and print the video information as JSON (video is still being downloaded).',
+ )
+ verbosity.add_option(
+ '--newline',
+ action='store_true', dest='progress_with_newline', default=False,
+ help='Output progress bar as new lines')
+ verbosity.add_option(
+ '--no-progress',
+ action='store_true', dest='noprogress', default=False,
+ help='Do not print progress bar')
+ verbosity.add_option(
+ '--console-title',
+ action='store_true', dest='consoletitle', default=False,
+ help='Display progress in console titlebar')
+ verbosity.add_option(
+ '-v', '--verbose',
+ action='store_true', dest='verbose', default=False,
+ help='Print various debugging information')
+ verbosity.add_option(
+ '--dump-pages', '--dump-intermediate-pages',
+ action='store_true', dest='dump_intermediate_pages', default=False,
+ help='Print downloaded pages encoded using base64 to debug problems (very verbose)')
+ verbosity.add_option(
+ '--write-pages',
+ action='store_true', dest='write_pages', default=False,
+ help='Write downloaded intermediary pages to files in the current directory to debug problems')
+ verbosity.add_option(
+ '--youtube-print-sig-code',
+ action='store_true', dest='youtube_print_sig_code', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '--print-traffic', '--dump-headers',
+ dest='debug_printtraffic', action='store_true', default=False,
+ help='Display sent and read HTTP traffic')
+ verbosity.add_option(
+ '-C', '--call-home',
+ dest='call_home', action='store_true', default=False,
+ help='Contact the youtube-dl server for debugging')
+ verbosity.add_option(
+ '--no-call-home',
+ dest='call_home', action='store_false', default=False,
+ help='Do NOT contact the youtube-dl server for debugging')
+
+ filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
+ filesystem.add_option(
+ '-a', '--batch-file',
+ dest='batchfile', metavar='FILE',
+ help="File containing URLs to download ('-' for stdin), one URL per line. "
+ "Lines starting with '#', ';' or ']' are considered as comments and ignored.")
+ filesystem.add_option(
+ '--id', default=False,
+ action='store_true', dest='useid', help='Use only video ID in file name')
+ filesystem.add_option(
+ '-o', '--output',
+ dest='outtmpl', metavar='TEMPLATE',
+ help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info'))
+ filesystem.add_option(
+ '--autonumber-size',
+ dest='autonumber_size', metavar='NUMBER', type=int,
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '--autonumber-start',
+ dest='autonumber_start', metavar='NUMBER', default=1, type=int,
+ help='Specify the start value for %(autonumber)s (default is %default)')
+ filesystem.add_option(
+ '--restrict-filenames',
+ action='store_true', dest='restrictfilenames', default=False,
+ help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames')
+ filesystem.add_option(
+ '-A', '--auto-number',
+ action='store_true', dest='autonumber', default=False,
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '-t', '--title',
+ action='store_true', dest='usetitle', default=False,
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '-l', '--literal', default=False,
+ action='store_true', dest='usetitle',
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '-w', '--no-overwrites',
+ action='store_true', dest='nooverwrites', default=False,
+ help='Do not overwrite files')
+ filesystem.add_option(
+ '-c', '--continue',
+ action='store_true', dest='continue_dl', default=True,
+ help='Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.')
+ filesystem.add_option(
+ '--no-continue',
+ action='store_false', dest='continue_dl',
+ help='Do not resume partially downloaded files (restart from beginning)')
+ filesystem.add_option(
+ '--no-part',
+ action='store_true', dest='nopart', default=False,
+ help='Do not use .part files - write directly into output file')
+ filesystem.add_option(
+ '--no-mtime',
+ action='store_false', dest='updatetime', default=True,
+ help='Do not use the Last-modified header to set the file modification time')
+ filesystem.add_option(
+ '--write-description',
+ action='store_true', dest='writedescription', default=False,
+ help='Write video description to a .description file')
+ filesystem.add_option(
+ '--write-info-json',
+ action='store_true', dest='writeinfojson', default=False,
+ help='Write video metadata to a .info.json file')
+ filesystem.add_option(
+ '--write-annotations',
+ action='store_true', dest='writeannotations', default=False,
+ help='Write video annotations to a .annotations.xml file')
+ filesystem.add_option(
+ '--load-info-json', '--load-info',
+ dest='load_info_filename', metavar='FILE',
+ help='JSON file containing the video information (created with the "--write-info-json" option)')
+ filesystem.add_option(
+ '--cookies',
+ dest='cookiefile', metavar='FILE',
+ help='File to read cookies from and dump cookie jar in')
+ filesystem.add_option(
+ '--cache-dir', dest='cachedir', default=None, metavar='DIR',
+ help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
+ filesystem.add_option(
+ '--no-cache-dir', action='store_const', const=False, dest='cachedir',
+ help='Disable filesystem caching')
+ filesystem.add_option(
+ '--rm-cache-dir',
+ action='store_true', dest='rm_cachedir',
+ help='Delete all filesystem cache files')
+
+ thumbnail = optparse.OptionGroup(parser, 'Thumbnail images')
+ thumbnail.add_option(
+ '--write-thumbnail',
+ action='store_true', dest='writethumbnail', default=False,
+ help='Write thumbnail image to disk')
+ thumbnail.add_option(
+ '--write-all-thumbnails',
+ action='store_true', dest='write_all_thumbnails', default=False,
+ help='Write all thumbnail image formats to disk')
+ thumbnail.add_option(
+ '--list-thumbnails',
+ action='store_true', dest='list_thumbnails', default=False,
+ help='Simulate and list all available thumbnail formats')
+
+ postproc = optparse.OptionGroup(parser, 'Post-processing Options')
+ postproc.add_option(
+ '-x', '--extract-audio',
+ action='store_true', dest='extractaudio', default=False,
+ help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
+ postproc.add_option(
+ '--audio-format', metavar='FORMAT', dest='audioformat', default='best',
+ help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')
+ postproc.add_option(
+ '--audio-quality', metavar='QUALITY',
+ dest='audioquality', default='5',
+ help='Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
+ postproc.add_option(
+ '--recode-video',
+ metavar='FORMAT', dest='recodevideo', default=None,
+ help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)')
+ postproc.add_option(
+ '--postprocessor-args',
+ dest='postprocessor_args', metavar='ARGS',
+ help='Give these arguments to the postprocessor')
+ postproc.add_option(
+ '-k', '--keep-video',
+ action='store_true', dest='keepvideo', default=False,
+ help='Keep the video file on disk after the post-processing; the video is erased by default')
+ postproc.add_option(
+ '--no-post-overwrites',
+ action='store_true', dest='nopostoverwrites', default=False,
+ help='Do not overwrite post-processed files; the post-processed files are overwritten by default')
+ postproc.add_option(
+ '--embed-subs',
+ action='store_true', dest='embedsubtitles', default=False,
+ help='Embed subtitles in the video (only for mp4, webm and mkv videos)')
+ postproc.add_option(
+ '--embed-thumbnail',
+ action='store_true', dest='embedthumbnail', default=False,
+ help='Embed thumbnail in the audio as cover art')
+ postproc.add_option(
+ '--add-metadata',
+ action='store_true', dest='addmetadata', default=False,
+ help='Write metadata to the video file')
+ postproc.add_option(
+ '--metadata-from-title',
+ metavar='FORMAT', dest='metafromtitle',
+ help='Parse additional metadata like song title / artist from the video title. '
+ 'The format syntax is the same as --output. Regular expression with '
+ 'named capture groups may also be used. '
+ 'The parsed parameters replace existing values. '
+ 'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
+ '"Coldplay - Paradise". '
+ 'Example (regex): --metadata-from-title "(?P<artist>.+?) - (?P<title>.+)"')
+ postproc.add_option(
+ '--xattrs',
+ action='store_true', dest='xattrs', default=False,
+ help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
+ postproc.add_option(
+ '--fixup',
+ metavar='POLICY', dest='fixup', default='detect_or_warn',
+ help='Automatically correct known faults of the file. '
+ 'One of never (do nothing), warn (only emit a warning), '
+ 'detect_or_warn (the default; fix file if we can, warn otherwise)')
+ postproc.add_option(
+ '--prefer-avconv',
+ action='store_false', dest='prefer_ffmpeg',
+ help='Prefer avconv over ffmpeg for running the postprocessors')
+ postproc.add_option(
+ '--prefer-ffmpeg',
+ action='store_true', dest='prefer_ffmpeg',
+ help='Prefer ffmpeg over avconv for running the postprocessors (default)')
+ postproc.add_option(
+ '--ffmpeg-location', '--avconv-location', metavar='PATH',
+ dest='ffmpeg_location',
+ help='Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.')
+ postproc.add_option(
+ '--exec',
+ metavar='CMD', dest='exec_cmd',
+ help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
+ postproc.add_option(
+ '--convert-subs', '--convert-subtitles',
+ metavar='FORMAT', dest='convertsubtitles', default=None,
+ help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)')
+
+ parser.add_option_group(general)
+ parser.add_option_group(network)
+ parser.add_option_group(geo)
+ parser.add_option_group(selection)
+ parser.add_option_group(downloader)
+ parser.add_option_group(filesystem)
+ parser.add_option_group(thumbnail)
+ parser.add_option_group(verbosity)
+ parser.add_option_group(workarounds)
+ parser.add_option_group(video_format)
+ parser.add_option_group(subtitles)
+ parser.add_option_group(authentication)
+ parser.add_option_group(adobe_pass)
+ parser.add_option_group(postproc)
+
+ if overrideArguments is not None:
+ opts, args = parser.parse_args(overrideArguments)
+ if opts.verbose:
+ write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
+ else:
+ def compat_conf(conf):
+ if sys.version_info < (3,):
+ return [a.decode(preferredencoding(), 'replace') for a in conf]
+ return conf
+
+ command_line_conf = compat_conf(sys.argv[1:])
+ opts, args = parser.parse_args(command_line_conf)
+
+ system_conf = user_conf = custom_conf = []
+
+ if '--config-location' in command_line_conf:
+ location = compat_expanduser(opts.config_location)
+ if os.path.isdir(location):
+ location = os.path.join(location, 'youtube-dl.conf')
+ if not os.path.exists(location):
+ parser.error('config-location %s does not exist.' % location)
+ custom_conf = _readOptions(location)
+ elif '--ignore-config' in command_line_conf:
+ pass
+ else:
+ system_conf = _readOptions('/etc/youtube-dl.conf')
+ if '--ignore-config' not in system_conf:
+ user_conf = _readUserConf()
+
+ argv = system_conf + user_conf + custom_conf + command_line_conf
+ opts, args = parser.parse_args(argv)
+ if opts.verbose:
+ for conf_label, conf in (
+ ('System config', system_conf),
+ ('User config', user_conf),
+ ('Custom config', custom_conf),
+ ('Command-line args', command_line_conf)):
+ write_string('[debug] %s: %s\n' % (conf_label, repr(_hide_login_info(conf))))
+
+ return parser, opts, args
diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py
new file mode 100644
index 0000000..3ea5183
--- /dev/null
+++ b/youtube_dl/postprocessor/__init__.py
@@ -0,0 +1,40 @@
+from __future__ import unicode_literals
+
+from .embedthumbnail import EmbedThumbnailPP
+from .ffmpeg import (
+ FFmpegPostProcessor,
+ FFmpegEmbedSubtitlePP,
+ FFmpegExtractAudioPP,
+ FFmpegFixupStretchedPP,
+ FFmpegFixupM3u8PP,
+ FFmpegFixupM4aPP,
+ FFmpegMergerPP,
+ FFmpegMetadataPP,
+ FFmpegVideoConvertorPP,
+ FFmpegSubtitlesConvertorPP,
+)
+from .xattrpp import XAttrMetadataPP
+from .execafterdownload import ExecAfterDownloadPP
+from .metadatafromtitle import MetadataFromTitlePP
+
+
+def get_postprocessor(key):
+ return globals()[key + 'PP']
+
+
+__all__ = [
+ 'EmbedThumbnailPP',
+ 'ExecAfterDownloadPP',
+ 'FFmpegEmbedSubtitlePP',
+ 'FFmpegExtractAudioPP',
+ 'FFmpegFixupM3u8PP',
+ 'FFmpegFixupM4aPP',
+ 'FFmpegFixupStretchedPP',
+ 'FFmpegMergerPP',
+ 'FFmpegMetadataPP',
+ 'FFmpegPostProcessor',
+ 'FFmpegSubtitlesConvertorPP',
+ 'FFmpegVideoConvertorPP',
+ 'MetadataFromTitlePP',
+ 'XAttrMetadataPP',
+]
diff --git a/youtube_dl/postprocessor/common.py b/youtube_dl/postprocessor/common.py
new file mode 100644
index 0000000..599dd1d
--- /dev/null
+++ b/youtube_dl/postprocessor/common.py
@@ -0,0 +1,69 @@
+from __future__ import unicode_literals
+
+import os
+
+from ..utils import (
+ PostProcessingError,
+ cli_configuration_args,
+ encodeFilename,
+)
+
+
+class PostProcessor(object):
+ """Post Processor class.
+
+ PostProcessor objects can be added to downloaders with their
+ add_post_processor() method. When the downloader has finished a
+ successful download, it will take its internal chain of PostProcessors
+ and start calling the run() method on each one of them, first with
+ an initial argument and then with the returned value of the previous
+ PostProcessor.
+
+ The chain will be stopped if one of them ever returns None or the end
+ of the chain is reached.
+
+ PostProcessor objects follow a "mutual registration" process similar
+ to InfoExtractor objects.
+
+ Optionally PostProcessor can use a list of additional command-line arguments
+ with self._configuration_args.
+ """
+
+ _downloader = None
+
+ def __init__(self, downloader=None):
+ self._downloader = downloader
+
+ def set_downloader(self, downloader):
+ """Sets the downloader for this PP."""
+ self._downloader = downloader
+
+ def run(self, information):
+ """Run the PostProcessor.
+
+ The "information" argument is a dictionary like the ones
+ composed by InfoExtractors. The only difference is that this
+ one has an extra field called "filepath" that points to the
+ downloaded file.
+
+ This method returns a tuple, the first element is a list of the files
+ that can be deleted, and the second of which is the updated
+ information.
+
+ In addition, this method may raise a PostProcessingError
+ exception if post processing fails.
+ """
+ return [], information # by default, keep file and do nothing
+
+ def try_utime(self, path, atime, mtime, errnote='Cannot update utime of file'):
+ try:
+ os.utime(encodeFilename(path), (atime, mtime))
+ except Exception:
+ self._downloader.report_warning(errnote)
+
+ def _configuration_args(self, default=[]):
+ return cli_configuration_args(self._downloader.params, 'postprocessor_args', default)
+
+
+class AudioConversionError(PostProcessingError):
+ pass
diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py
new file mode 100644
index 0000000..56be914
--- /dev/null
+++ b/youtube_dl/postprocessor/embedthumbnail.py
@@ -0,0 +1,93 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+
+import os
+import subprocess
+
+from .ffmpeg import FFmpegPostProcessor
+
+from ..utils import (
+ check_executable,
+ encodeArgument,
+ encodeFilename,
+ PostProcessingError,
+ prepend_extension,
+ shell_quote
+)
+
+
+class EmbedThumbnailPPError(PostProcessingError):
+ pass
+
+
+class EmbedThumbnailPP(FFmpegPostProcessor):
+ def __init__(self, downloader=None, already_have_thumbnail=False):
+ super(EmbedThumbnailPP, self).__init__(downloader)
+ self._already_have_thumbnail = already_have_thumbnail
+
+ def run(self, info):
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+
+ if not info.get('thumbnails'):
+ self._downloader.to_screen('[embedthumbnail] There aren\'t any thumbnails to embed')
+ return [], info
+
+ thumbnail_filename = info['thumbnails'][-1]['filename']
+
+ if not os.path.exists(encodeFilename(thumbnail_filename)):
+ self._downloader.report_warning(
+ 'Skipping embedding the thumbnail because the file is missing.')
+ return [], info
+
+ if info['ext'] == 'mp3':
+ options = [
+ '-c', 'copy', '-map', '0', '-map', '1',
+ '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"']
+
+ self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename)
+
+ self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
+
+ if not self._already_have_thumbnail:
+ os.remove(encodeFilename(thumbnail_filename))
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+ elif info['ext'] in ['m4a', 'mp4']:
+ if not check_executable('AtomicParsley', ['-v']):
+ raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
+
+ cmd = [encodeFilename('AtomicParsley', True),
+ encodeFilename(filename, True),
+ encodeArgument('--artwork'),
+ encodeFilename(thumbnail_filename, True),
+ encodeArgument('-o'),
+ encodeFilename(temp_filename, True)]
+
+ self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
+
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
+
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+
+ if p.returncode != 0:
+ msg = stderr.decode('utf-8', 'replace').strip()
+ raise EmbedThumbnailPPError(msg)
+
+ if not self._already_have_thumbnail:
+ os.remove(encodeFilename(thumbnail_filename))
+ # for formats that don't support thumbnails (like 3gp) AtomicParsley
+ # won't create to the temporary file
+ if b'No changes' in stdout:
+ self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail')
+ else:
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ else:
+ raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
+
+ return [], info
diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py
new file mode 100644
index 0000000..64dabe7
--- /dev/null
+++ b/youtube_dl/postprocessor/execafterdownload.py
@@ -0,0 +1,31 @@
+from __future__ import unicode_literals
+
+import subprocess
+
+from .common import PostProcessor
+from ..compat import compat_shlex_quote
+from ..utils import (
+ encodeArgument,
+ PostProcessingError,
+)
+
+
+class ExecAfterDownloadPP(PostProcessor):
+ def __init__(self, downloader, exec_cmd):
+ super(ExecAfterDownloadPP, self).__init__(downloader)
+ self.exec_cmd = exec_cmd
+
+ def run(self, information):
+ cmd = self.exec_cmd
+ if '{}' not in cmd:
+ cmd += ' {}'
+
+ cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
+
+ self._downloader.to_screen('[exec] Executing command: %s' % cmd)
+ retCode = subprocess.call(encodeArgument(cmd), shell=True)
+ if retCode != 0:
+ raise PostProcessingError(
+ 'Command returned error code %d' % retCode)
+
+ return [], information
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
new file mode 100644
index 0000000..757b496
--- /dev/null
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -0,0 +1,613 @@
+from __future__ import unicode_literals
+
+import io
+import os
+import subprocess
+import time
+import re
+
+
+from .common import AudioConversionError, PostProcessor
+
+from ..compat import (
+ compat_subprocess_get_DEVNULL,
+)
+from ..utils import (
+ encodeArgument,
+ encodeFilename,
+ get_exe_version,
+ is_outdated_version,
+ PostProcessingError,
+ prepend_extension,
+ shell_quote,
+ subtitles_filename,
+ dfxp2srt,
+ ISO639Utils,
+ replace_extension,
+)
+
+
+EXT_TO_OUT_FORMATS = {
+ 'aac': 'adts',
+ 'flac': 'flac',
+ 'm4a': 'ipod',
+ 'mka': 'matroska',
+ 'mkv': 'matroska',
+ 'mpg': 'mpeg',
+ 'ogv': 'ogg',
+ 'ts': 'mpegts',
+ 'wma': 'asf',
+ 'wmv': 'asf',
+}
+ACODECS = {
+ 'mp3': 'libmp3lame',
+ 'aac': 'aac',
+ 'flac': 'flac',
+ 'm4a': 'aac',
+ 'opus': 'libopus',
+ 'vorbis': 'libvorbis',
+ 'wav': None,
+}
+
+
+class FFmpegPostProcessorError(PostProcessingError):
+ pass
+
+
+class FFmpegPostProcessor(PostProcessor):
+ def __init__(self, downloader=None):
+ PostProcessor.__init__(self, downloader)
+ self._determine_executables()
+
+ def check_version(self):
+ if not self.available:
+ raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
+
+ required_version = '10-0' if self.basename == 'avconv' else '1.0'
+ if is_outdated_version(
+ self._versions[self.basename], required_version):
+ warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
+ self.basename, self.basename, required_version)
+ if self._downloader:
+ self._downloader.report_warning(warning)
+
+ @staticmethod
+ def get_versions(downloader=None):
+ return FFmpegPostProcessor(downloader)._versions
+
+ def _determine_executables(self):
+ programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
+ prefer_ffmpeg = True
+
+ self.basename = None
+ self.probe_basename = None
+
+ self._paths = None
+ self._versions = None
+ if self._downloader:
+ prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True)
+ location = self._downloader.params.get('ffmpeg_location')
+ if location is not None:
+ if not os.path.exists(location):
+ self._downloader.report_warning(
+ 'ffmpeg-location %s does not exist! '
+ 'Continuing without avconv/ffmpeg.' % (location))
+ self._versions = {}
+ return
+ elif not os.path.isdir(location):
+ basename = os.path.splitext(os.path.basename(location))[0]
+ if basename not in programs:
+ self._downloader.report_warning(
+ 'Cannot identify executable %s, its basename should be one of %s. '
+ 'Continuing without avconv/ffmpeg.' %
+ (location, ', '.join(programs)))
+ self._versions = {}
+ return None
+ location = os.path.dirname(os.path.abspath(location))
+ if basename in ('ffmpeg', 'ffprobe'):
+ prefer_ffmpeg = True
+
+ self._paths = dict(
+ (p, os.path.join(location, p)) for p in programs)
+ self._versions = dict(
+ (p, get_exe_version(self._paths[p], args=['-version']))
+ for p in programs)
+ if self._versions is None:
+ self._versions = dict(
+ (p, get_exe_version(p, args=['-version'])) for p in programs)
+ self._paths = dict((p, p) for p in programs)
+
+ if prefer_ffmpeg is False:
+ prefs = ('avconv', 'ffmpeg')
+ else:
+ prefs = ('ffmpeg', 'avconv')
+ for p in prefs:
+ if self._versions[p]:
+ self.basename = p
+ break
+
+ if prefer_ffmpeg is False:
+ prefs = ('avprobe', 'ffprobe')
+ else:
+ prefs = ('ffprobe', 'avprobe')
+ for p in prefs:
+ if self._versions[p]:
+ self.probe_basename = p
+ break
+
+ @property
+ def available(self):
+ return self.basename is not None
+
+ @property
+ def executable(self):
+ return self._paths[self.basename]
+
+ @property
+ def probe_available(self):
+ return self.probe_basename is not None
+
+ @property
+ def probe_executable(self):
+ return self._paths[self.probe_basename]
+
+ def get_audio_codec(self, path):
+ if not self.probe_available:
+ raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
+ try:
+ cmd = [
+ encodeFilename(self.probe_executable, True),
+ encodeArgument('-show_streams'),
+ encodeFilename(self._ffmpeg_filename_argument(path), True)]
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
+ handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
+ output = handle.communicate()[0]
+ if handle.wait() != 0:
+ return None
+ except (IOError, OSError):
+ return None
+ audio_codec = None
+ for line in output.decode('ascii', 'ignore').split('\n'):
+ if line.startswith('codec_name='):
+ audio_codec = line.split('=')[1].strip()
+ elif line.strip() == 'codec_type=audio' and audio_codec is not None:
+ return audio_codec
+ return None
+
+ def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
+ self.check_version()
+
+ oldest_mtime = min(
+ os.stat(encodeFilename(path)).st_mtime for path in input_paths)
+
+ opts += self._configuration_args()
+
+ files_cmd = []
+ for path in input_paths:
+ files_cmd.extend([
+ encodeArgument('-i'),
+ encodeFilename(self._ffmpeg_filename_argument(path), True)
+ ])
+ cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] +
+ files_cmd +
+ [encodeArgument(o) for o in opts] +
+ [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
+
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+ if p.returncode != 0:
+ stderr = stderr.decode('utf-8', 'replace')
+ msg = stderr.strip().split('\n')[-1]
+ raise FFmpegPostProcessorError(msg)
+ self.try_utime(out_path, oldest_mtime, oldest_mtime)
+
+ def run_ffmpeg(self, path, out_path, opts):
+ self.run_ffmpeg_multiple_files([path], out_path, opts)
+
+ def _ffmpeg_filename_argument(self, fn):
+ # Always use 'file:' because the filename may contain ':' (ffmpeg
+ # interprets that as a protocol) or can start with '-' (-- is broken in
+ # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
+ # Also leave '-' intact in order not to break streaming to stdout.
+ return 'file:' + fn if fn != '-' else fn
+
+
+class FFmpegExtractAudioPP(FFmpegPostProcessor):
+ def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
+ FFmpegPostProcessor.__init__(self, downloader)
+ if preferredcodec is None:
+ preferredcodec = 'best'
+ self._preferredcodec = preferredcodec
+ self._preferredquality = preferredquality
+ self._nopostoverwrites = nopostoverwrites
+
+ def run_ffmpeg(self, path, out_path, codec, more_opts):
+ if codec is None:
+ acodec_opts = []
+ else:
+ acodec_opts = ['-acodec', codec]
+ opts = ['-vn'] + acodec_opts + more_opts
+ try:
+ FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
+ except FFmpegPostProcessorError as err:
+ raise AudioConversionError(err.msg)
+
+ def run(self, information):
+ path = information['filepath']
+
+ filecodec = self.get_audio_codec(path)
+ if filecodec is None:
+ raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
+
+ more_opts = []
+ if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
+ if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
+ # Lossless, but in another container
+ acodec = 'copy'
+ extension = 'm4a'
+ more_opts = ['-bsf:a', 'aac_adtstoasc']
+ elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']:
+ # Lossless if possible
+ acodec = 'copy'
+ extension = filecodec
+ if filecodec == 'aac':
+ more_opts = ['-f', 'adts']
+ if filecodec == 'vorbis':
+ extension = 'ogg'
+ else:
+ # MP3 otherwise.
+ acodec = 'libmp3lame'
+ extension = 'mp3'
+ more_opts = []
+ if self._preferredquality is not None:
+ if int(self._preferredquality) < 10:
+ more_opts += ['-q:a', self._preferredquality]
+ else:
+ more_opts += ['-b:a', self._preferredquality + 'k']
+ else:
+ # We convert the audio (lossy if codec is lossy)
+ acodec = ACODECS[self._preferredcodec]
+ extension = self._preferredcodec
+ more_opts = []
+ if self._preferredquality is not None:
+ # The opus codec doesn't support the -aq option
+ if int(self._preferredquality) < 10 and extension != 'opus':
+ more_opts += ['-q:a', self._preferredquality]
+ else:
+ more_opts += ['-b:a', self._preferredquality + 'k']
+ if self._preferredcodec == 'aac':
+ more_opts += ['-f', 'adts']
+ if self._preferredcodec == 'm4a':
+ more_opts += ['-bsf:a', 'aac_adtstoasc']
+ if self._preferredcodec == 'vorbis':
+ extension = 'ogg'
+ if self._preferredcodec == 'wav':
+ extension = 'wav'
+ more_opts += ['-f', 'wav']
+
+ prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
+ new_path = prefix + sep + extension
+
+ information['filepath'] = new_path
+ information['ext'] = extension
+
+ # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
+ if (new_path == path or
+ (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
+ self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path)
+ return [], information
+
+ try:
+ self._downloader.to_screen('[ffmpeg] Destination: ' + new_path)
+ self.run_ffmpeg(path, new_path, acodec, more_opts)
+ except AudioConversionError as e:
+ raise PostProcessingError(
+ 'audio conversion failed: ' + e.msg)
+ except Exception:
+ raise PostProcessingError('error running ' + self.basename)
+
+ # Try to update the date time for extracted audio file.
+ if information.get('filetime') is not None:
+ self.try_utime(
+ new_path, time.time(), information['filetime'],
+ errnote='Cannot update utime of audio file')
+
+ return [path], information
+
+
+class FFmpegVideoConvertorPP(FFmpegPostProcessor):
+ def __init__(self, downloader=None, preferedformat=None):
+ super(FFmpegVideoConvertorPP, self).__init__(downloader)
+ self._preferedformat = preferedformat
+
+ def run(self, information):
+ path = information['filepath']
+ if information['ext'] == self._preferedformat:
+ self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
+ return [], information
+ options = []
+ if self._preferedformat == 'avi':
+ options.extend(['-c:v', 'libxvid', '-vtag', 'XVID'])
+ prefix, sep, ext = path.rpartition('.')
+ outpath = prefix + sep + self._preferedformat
+ self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
+ self.run_ffmpeg(path, outpath, options)
+ information['filepath'] = outpath
+ information['format'] = self._preferedformat
+ information['ext'] = self._preferedformat
+ return [path], information
+
+
+class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
+ def run(self, information):
+ if information['ext'] not in ('mp4', 'webm', 'mkv'):
+ self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4, webm or mkv files')
+ return [], information
+ subtitles = information.get('requested_subtitles')
+ if not subtitles:
+ self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
+ return [], information
+
+ filename = information['filepath']
+
+ ext = information['ext']
+ sub_langs = []
+ sub_filenames = []
+ webm_vtt_warn = False
+
+ for lang, sub_info in subtitles.items():
+ sub_ext = sub_info['ext']
+ if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
+ sub_langs.append(lang)
+ sub_filenames.append(subtitles_filename(filename, lang, sub_ext))
+ else:
+ if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
+ webm_vtt_warn = True
+ self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files')
+
+ if not sub_langs:
+ return [], information
+
+ input_files = [filename] + sub_filenames
+
+ opts = [
+ '-map', '0',
+ '-c', 'copy',
+ # Don't copy the existing subtitles, we may be running the
+ # postprocessor a second time
+ '-map', '-0:s',
+ ]
+ if information['ext'] == 'mp4':
+ opts += ['-c:s', 'mov_text']
+ for (i, lang) in enumerate(sub_langs):
+ opts.extend(['-map', '%d:0' % (i + 1)])
+ lang_code = ISO639Utils.short2long(lang)
+ if lang_code is not None:
+ opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
+
+ temp_filename = prepend_extension(filename, 'temp')
+ self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
+ self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+ return sub_filenames, information
+
+
+class FFmpegMetadataPP(FFmpegPostProcessor):
+ def run(self, info):
+ metadata = {}
+
+ def add(meta_list, info_list=None):
+ if not info_list:
+ info_list = meta_list
+ if not isinstance(meta_list, (list, tuple)):
+ meta_list = (meta_list,)
+ if not isinstance(info_list, (list, tuple)):
+ info_list = (info_list,)
+ for info_f in info_list:
+ if info.get(info_f) is not None:
+ for meta_f in meta_list:
+ metadata[meta_f] = info[info_f]
+ break
+
+ add('title', ('track', 'title'))
+ add('date', 'upload_date')
+ add(('description', 'comment'), 'description')
+ add('purl', 'webpage_url')
+ add('track', 'track_number')
+ add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
+ add('genre')
+ add('album')
+ add('album_artist')
+ add('disc', 'disc_number')
+
+ if not metadata:
+ self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
+ return [], info
+
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+ in_filenames = [filename]
+ options = []
+
+ if info['ext'] == 'm4a':
+ options.extend(['-vn', '-acodec', 'copy'])
+ else:
+ options.extend(['-c', 'copy'])
+
+ for (name, value) in metadata.items():
+ options.extend(['-metadata', '%s=%s' % (name, value)])
+
+ chapters = info.get('chapters', [])
+ if chapters:
+ metadata_filename = replace_extension(filename, 'meta')
+ with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
+ def ffmpeg_escape(text):
+ return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
+
+ metadata_file_content = ';FFMETADATA1\n'
+ for chapter in chapters:
+ metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
+ metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
+ metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
+ chapter_title = chapter.get('title')
+ if chapter_title:
+ metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
+ f.write(metadata_file_content)
+ in_filenames.append(metadata_filename)
+ options.extend(['-map_metadata', '1'])
+
+ self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
+ self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options)
+ if chapters:
+ os.remove(metadata_filename)
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ return [], info
+
+
+class FFmpegMergerPP(FFmpegPostProcessor):
+ def run(self, info):
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+ args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']
+ self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
+ self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args)
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ return info['__files_to_merge'], info
+
+ def can_merge(self):
+ # TODO: figure out merge-capable ffmpeg version
+ if self.basename != 'avconv':
+ return True
+
+ required_version = '10-0'
+ if is_outdated_version(
+ self._versions[self.basename], required_version):
+ warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
+ 'youtube-dl will download single file media. '
+ 'Update %s to version %s or newer to fix this.') % (
+ self.basename, self.basename, required_version)
+ if self._downloader:
+ self._downloader.report_warning(warning)
+ return False
+ return True
+
+
+class FFmpegFixupStretchedPP(FFmpegPostProcessor):
+ def run(self, info):
+ stretched_ratio = info.get('stretched_ratio')
+ if stretched_ratio is None or stretched_ratio == 1:
+ return [], info
+
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+
+ options = ['-c', 'copy', '-aspect', '%f' % stretched_ratio]
+ self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename)
+ self.run_ffmpeg(filename, temp_filename, options)
+
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+ return [], info
+
+
+class FFmpegFixupM4aPP(FFmpegPostProcessor):
+ def run(self, info):
+ if info.get('container') != 'm4a_dash':
+ return [], info
+
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+
+ options = ['-c', 'copy', '-f', 'mp4']
+ self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename)
+ self.run_ffmpeg(filename, temp_filename, options)
+
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+ return [], info
+
+
+class FFmpegFixupM3u8PP(FFmpegPostProcessor):
+ def run(self, info):
+ filename = info['filepath']
+ if self.get_audio_codec(filename) == 'aac':
+ temp_filename = prepend_extension(filename, 'temp')
+
+ options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
+ self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename)
+ self.run_ffmpeg(filename, temp_filename, options)
+
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ return [], info
+
+
+class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
+ def __init__(self, downloader=None, format=None):
+ super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
+ self.format = format
+
+ def run(self, info):
+ subs = info.get('requested_subtitles')
+ filename = info['filepath']
+ new_ext = self.format
+ new_format = new_ext
+ if new_format == 'vtt':
+ new_format = 'webvtt'
+ if subs is None:
+ self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
+ return [], info
+ self._downloader.to_screen('[ffmpeg] Converting subtitles')
+ sub_filenames = []
+ for lang, sub in subs.items():
+ ext = sub['ext']
+ if ext == new_ext:
+ self._downloader.to_screen(
+ '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
+ continue
+ old_file = subtitles_filename(filename, lang, ext)
+ sub_filenames.append(old_file)
+ new_file = subtitles_filename(filename, lang, new_ext)
+
+ if ext in ('dfxp', 'ttml', 'tt'):
+ self._downloader.report_warning(
+ 'You have requested to convert dfxp (TTML) subtitles into another format, '
+ 'which results in style information loss')
+
+ dfxp_file = old_file
+ srt_file = subtitles_filename(filename, lang, 'srt')
+
+ with open(dfxp_file, 'rb') as f:
+ srt_data = dfxp2srt(f.read())
+
+ with io.open(srt_file, 'wt', encoding='utf-8') as f:
+ f.write(srt_data)
+ old_file = srt_file
+
+ subs[lang] = {
+ 'ext': 'srt',
+ 'data': srt_data
+ }
+
+ if new_ext == 'srt':
+ continue
+ else:
+ sub_filenames.append(srt_file)
+
+ self.run_ffmpeg(old_file, new_file, ['-f', new_format])
+
+ with io.open(new_file, 'rt', encoding='utf-8') as f:
+ subs[lang] = {
+ 'ext': new_ext,
+ 'data': f.read(),
+ }
+
+ return sub_filenames, info
diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py
new file mode 100644
index 0000000..f5c14d9
--- /dev/null
+++ b/youtube_dl/postprocessor/metadatafromtitle.py
@@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import PostProcessor
+
+
+class MetadataFromTitlePP(PostProcessor):
+ def __init__(self, downloader, titleformat):
+ super(MetadataFromTitlePP, self).__init__(downloader)
+ self._titleformat = titleformat
+ self._titleregex = (self.format_to_regex(titleformat)
+ if re.search(r'%\(\w+\)s', titleformat)
+ else titleformat)
+
+ def format_to_regex(self, fmt):
+ r"""
+ Converts a string like
+ '%(title)s - %(artist)s'
+ to a regex like
+ '(?P<title>.+)\ \-\ (?P<artist>.+)'
+ """
+ lastpos = 0
+ regex = ''
+ # replace %(..)s with regex group and escape other string parts
+ for match in re.finditer(r'%\((\w+)\)s', fmt):
+ regex += re.escape(fmt[lastpos:match.start()])
+ regex += r'(?P<' + match.group(1) + '>.+)'
+ lastpos = match.end()
+ if lastpos < len(fmt):
+ regex += re.escape(fmt[lastpos:])
+ return regex
+
+ def run(self, info):
+ title = info['title']
+ match = re.match(self._titleregex, title)
+ if match is None:
+ self._downloader.to_screen(
+ '[fromtitle] Could not interpret title of video as "%s"'
+ % self._titleformat)
+ return [], info
+ for attribute, value in match.groupdict().items():
+ info[attribute] = value
+ self._downloader.to_screen(
+ '[fromtitle] parsed %s: %s'
+ % (attribute, value if value is not None else 'NA'))
+
+ return [], info
diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py
new file mode 100644
index 0000000..b0aed9c
--- /dev/null
+++ b/youtube_dl/postprocessor/xattrpp.py
@@ -0,0 +1,79 @@
+from __future__ import unicode_literals
+
+from .common import PostProcessor
+from ..compat import compat_os_name
+from ..utils import (
+ hyphenate_date,
+ write_xattr,
+ XAttrMetadataError,
+ XAttrUnavailableError,
+)
+
+
+class XAttrMetadataPP(PostProcessor):
+
+ #
+ # More info about extended attributes for media:
+ # http://freedesktop.org/wiki/CommonExtendedAttributes/
+ # http://www.freedesktop.org/wiki/PhreedomDraft/
+ # http://dublincore.org/documents/usageguide/elements.shtml
+ #
+ # TODO:
+ # * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated)
+ # * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution'
+ #
+
+ def run(self, info):
+ """ Set extended attributes on downloaded file (if xattr support is found). """
+
+ # Write the metadata to the file's xattrs
+ self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs')
+
+ filename = info['filepath']
+
+ try:
+ xattr_mapping = {
+ 'user.xdg.referrer.url': 'webpage_url',
+ # 'user.xdg.comment': 'description',
+ 'user.dublincore.title': 'title',
+ 'user.dublincore.date': 'upload_date',
+ 'user.dublincore.description': 'description',
+ 'user.dublincore.contributor': 'uploader',
+ 'user.dublincore.format': 'format',
+ }
+
+ num_written = 0
+ for xattrname, infoname in xattr_mapping.items():
+
+ value = info.get(infoname)
+
+ if value:
+ if infoname == 'upload_date':
+ value = hyphenate_date(value)
+
+ byte_value = value.encode('utf-8')
+ write_xattr(filename, xattrname, byte_value)
+ num_written += 1
+
+ return [], info
+
+ except XAttrUnavailableError as e:
+ self._downloader.report_error(str(e))
+ return [], info
+
+ except XAttrMetadataError as e:
+ if e.reason == 'NO_SPACE':
+ self._downloader.report_warning(
+ 'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. ' +
+ (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize())
+ elif e.reason == 'VALUE_TOO_LONG':
+ self._downloader.report_warning(
+ 'Unable to write extended attributes due to too long values.')
+ else:
+ msg = 'This filesystem doesn\'t support extended attributes. '
+ if compat_os_name == 'nt':
+ msg += 'You need to use NTFS.'
+ else:
+ msg += '(You may have to enable them in your /etc/fstab)'
+ self._downloader.report_error(msg)
+ return [], info
diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py
new file mode 100644
index 0000000..5d4adbe
--- /dev/null
+++ b/youtube_dl/socks.py
@@ -0,0 +1,273 @@
+# Public Domain SOCKS proxy protocol implementation
+# Adapted from https://gist.github.com/bluec0re/cafd3764412967417fd3
+
+from __future__ import unicode_literals
+
+# References:
+# SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol
+# SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol
+# SOCKS5 protocol https://tools.ietf.org/html/rfc1928
+# SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929
+
+import collections
+import socket
+
+from .compat import (
+ compat_ord,
+ compat_struct_pack,
+ compat_struct_unpack,
+)
+
+__author__ = 'Timo Schmid <coding@timoschmid.de>'
+
+SOCKS4_VERSION = 4
+SOCKS4_REPLY_VERSION = 0x00
+# Excerpt from SOCKS4A protocol:
+# if the client cannot resolve the destination host's domain name to find its
+# IP address, it should set the first three bytes of DSTIP to NULL and the last
+# byte to a non-zero value.
+SOCKS4_DEFAULT_DSTIP = compat_struct_pack('!BBBB', 0, 0, 0, 0xFF)
+
+SOCKS5_VERSION = 5
+SOCKS5_USER_AUTH_VERSION = 0x01
+SOCKS5_USER_AUTH_SUCCESS = 0x00
+
+
+class Socks4Command(object):
+ CMD_CONNECT = 0x01
+ CMD_BIND = 0x02
+
+
+class Socks5Command(Socks4Command):
+ CMD_UDP_ASSOCIATE = 0x03
+
+
+class Socks5Auth(object):
+ AUTH_NONE = 0x00
+ AUTH_GSSAPI = 0x01
+ AUTH_USER_PASS = 0x02
+ AUTH_NO_ACCEPTABLE = 0xFF # For server response
+
+
+class Socks5AddressType(object):
+ ATYP_IPV4 = 0x01
+ ATYP_DOMAINNAME = 0x03
+ ATYP_IPV6 = 0x04
+
+
+class ProxyError(socket.error):
+ ERR_SUCCESS = 0x00
+
+ def __init__(self, code=None, msg=None):
+ if code is not None and msg is None:
+ msg = self.CODES.get(code) or 'unknown error'
+ super(ProxyError, self).__init__(code, msg)
+
+
+class InvalidVersionError(ProxyError):
+ def __init__(self, expected_version, got_version):
+ msg = ('Invalid response version from server. Expected {0:02x} got '
+ '{1:02x}'.format(expected_version, got_version))
+ super(InvalidVersionError, self).__init__(0, msg)
+
+
+class Socks4Error(ProxyError):
+ ERR_SUCCESS = 90
+
+ CODES = {
+ 91: 'request rejected or failed',
+ 92: 'request rejected because SOCKS server cannot connect to identd on the client',
+ 93: 'request rejected because the client program and identd report different user-ids'
+ }
+
+
+class Socks5Error(ProxyError):
+ ERR_GENERAL_FAILURE = 0x01
+
+ CODES = {
+ 0x01: 'general SOCKS server failure',
+ 0x02: 'connection not allowed by ruleset',
+ 0x03: 'Network unreachable',
+ 0x04: 'Host unreachable',
+ 0x05: 'Connection refused',
+ 0x06: 'TTL expired',
+ 0x07: 'Command not supported',
+ 0x08: 'Address type not supported',
+ 0xFE: 'unknown username or invalid password',
+ 0xFF: 'all offered authentication methods were rejected'
+ }
+
+
+class ProxyType(object):
+ SOCKS4 = 0
+ SOCKS4A = 1
+ SOCKS5 = 2
+
+
+Proxy = collections.namedtuple('Proxy', (
+ 'type', 'host', 'port', 'username', 'password', 'remote_dns'))
+
+
+class sockssocket(socket.socket):
+ def __init__(self, *args, **kwargs):
+ self._proxy = None
+ super(sockssocket, self).__init__(*args, **kwargs)
+
+ def setproxy(self, proxytype, addr, port, rdns=True, username=None, password=None):
+ assert proxytype in (ProxyType.SOCKS4, ProxyType.SOCKS4A, ProxyType.SOCKS5)
+
+ self._proxy = Proxy(proxytype, addr, port, username, password, rdns)
+
+ def recvall(self, cnt):
+ data = b''
+ while len(data) < cnt:
+ cur = self.recv(cnt - len(data))
+ if not cur:
+ raise EOFError('{0} bytes missing'.format(cnt - len(data)))
+ data += cur
+ return data
+
+ def _recv_bytes(self, cnt):
+ data = self.recvall(cnt)
+ return compat_struct_unpack('!{0}B'.format(cnt), data)
+
+ @staticmethod
+ def _len_and_data(data):
+ return compat_struct_pack('!B', len(data)) + data
+
+ def _check_response_version(self, expected_version, got_version):
+ if got_version != expected_version:
+ self.close()
+ raise InvalidVersionError(expected_version, got_version)
+
+ def _resolve_address(self, destaddr, default, use_remote_dns):
+ try:
+ return socket.inet_aton(destaddr)
+ except socket.error:
+ if use_remote_dns and self._proxy.remote_dns:
+ return default
+ else:
+ return socket.inet_aton(socket.gethostbyname(destaddr))
+
+ def _setup_socks4(self, address, is_4a=False):
+ destaddr, port = address
+
+ ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a)
+
+ packet = compat_struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr
+
+ username = (self._proxy.username or '').encode('utf-8')
+ packet += username + b'\x00'
+
+ if is_4a and self._proxy.remote_dns:
+ packet += destaddr.encode('utf-8') + b'\x00'
+
+ self.sendall(packet)
+
+ version, resp_code, dstport, dsthost = compat_struct_unpack('!BBHI', self.recvall(8))
+
+ self._check_response_version(SOCKS4_REPLY_VERSION, version)
+
+ if resp_code != Socks4Error.ERR_SUCCESS:
+ self.close()
+ raise Socks4Error(resp_code)
+
+ return (dsthost, dstport)
+
+ def _setup_socks4a(self, address):
+ self._setup_socks4(address, is_4a=True)
+
+ def _socks5_auth(self):
+ packet = compat_struct_pack('!B', SOCKS5_VERSION)
+
+ auth_methods = [Socks5Auth.AUTH_NONE]
+ if self._proxy.username and self._proxy.password:
+ auth_methods.append(Socks5Auth.AUTH_USER_PASS)
+
+ packet += compat_struct_pack('!B', len(auth_methods))
+ packet += compat_struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods)
+
+ self.sendall(packet)
+
+ version, method = self._recv_bytes(2)
+
+ self._check_response_version(SOCKS5_VERSION, version)
+
+ if method == Socks5Auth.AUTH_NO_ACCEPTABLE or (
+ method == Socks5Auth.AUTH_USER_PASS and (not self._proxy.username or not self._proxy.password)):
+ self.close()
+ raise Socks5Error(Socks5Auth.AUTH_NO_ACCEPTABLE)
+
+ if method == Socks5Auth.AUTH_USER_PASS:
+ username = self._proxy.username.encode('utf-8')
+ password = self._proxy.password.encode('utf-8')
+ packet = compat_struct_pack('!B', SOCKS5_USER_AUTH_VERSION)
+ packet += self._len_and_data(username) + self._len_and_data(password)
+ self.sendall(packet)
+
+ version, status = self._recv_bytes(2)
+
+ self._check_response_version(SOCKS5_USER_AUTH_VERSION, version)
+
+ if status != SOCKS5_USER_AUTH_SUCCESS:
+ self.close()
+ raise Socks5Error(Socks5Error.ERR_GENERAL_FAILURE)
+
+ def _setup_socks5(self, address):
+ destaddr, port = address
+
+ ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True)
+
+ self._socks5_auth()
+
+ reserved = 0
+ packet = compat_struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved)
+ if ipaddr is None:
+ destaddr = destaddr.encode('utf-8')
+ packet += compat_struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME)
+ packet += self._len_and_data(destaddr)
+ else:
+ packet += compat_struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr
+ packet += compat_struct_pack('!H', port)
+
+ self.sendall(packet)
+
+ version, status, reserved, atype = self._recv_bytes(4)
+
+ self._check_response_version(SOCKS5_VERSION, version)
+
+ if status != Socks5Error.ERR_SUCCESS:
+ self.close()
+ raise Socks5Error(status)
+
+ if atype == Socks5AddressType.ATYP_IPV4:
+ destaddr = self.recvall(4)
+ elif atype == Socks5AddressType.ATYP_DOMAINNAME:
+ alen = compat_ord(self.recv(1))
+ destaddr = self.recvall(alen)
+ elif atype == Socks5AddressType.ATYP_IPV6:
+ destaddr = self.recvall(16)
+ destport = compat_struct_unpack('!H', self.recvall(2))[0]
+
+ return (destaddr, destport)
+
+ def _make_proxy(self, connect_func, address):
+ if not self._proxy:
+ return connect_func(self, address)
+
+ result = connect_func(self, (self._proxy.host, self._proxy.port))
+ if result != 0 and result is not None:
+ return result
+ setup_funcs = {
+ ProxyType.SOCKS4: self._setup_socks4,
+ ProxyType.SOCKS4A: self._setup_socks4a,
+ ProxyType.SOCKS5: self._setup_socks5,
+ }
+ setup_funcs[self._proxy.type](address)
+ return result
+
+ def connect(self, address):
+ self._make_proxy(socket.socket.connect, address)
+
+ def connect_ex(self, address):
+ return self._make_proxy(socket.socket.connect_ex, address)
diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py
new file mode 100644
index 0000000..0c71585
--- /dev/null
+++ b/youtube_dl/swfinterp.py
@@ -0,0 +1,834 @@
+from __future__ import unicode_literals
+
+import collections
+import io
+import zlib
+
+from .compat import (
+ compat_str,
+ compat_struct_unpack,
+)
+from .utils import (
+ ExtractorError,
+)
+
+
+def _extract_tags(file_contents):
+ if file_contents[1:3] != b'WS':
+ raise ExtractorError(
+ 'Not an SWF file; header is %r' % file_contents[:3])
+ if file_contents[:1] == b'C':
+ content = zlib.decompress(file_contents[8:])
+ else:
+ raise NotImplementedError(
+ 'Unsupported compression format %r' %
+ file_contents[:1])
+
+ # Determine number of bits in framesize rectangle
+ framesize_nbits = compat_struct_unpack('!B', content[:1])[0] >> 3
+ framesize_len = (5 + 4 * framesize_nbits + 7) // 8
+
+ pos = framesize_len + 2 + 2
+ while pos < len(content):
+ header16 = compat_struct_unpack('<H', content[pos:pos + 2])[0]
+ pos += 2
+ tag_code = header16 >> 6
+ tag_len = header16 & 0x3f
+ if tag_len == 0x3f:
+ tag_len = compat_struct_unpack('<I', content[pos:pos + 4])[0]
+ pos += 4
+ assert pos + tag_len <= len(content), \
+ ('Tag %d ends at %d+%d - that\'s longer than the file (%d)'
+ % (tag_code, pos, tag_len, len(content)))
+ yield (tag_code, content[pos:pos + tag_len])
+ pos += tag_len
+
+
+class _AVMClass_Object(object):
+ def __init__(self, avm_class):
+ self.avm_class = avm_class
+
+ def __repr__(self):
+ return '%s#%x' % (self.avm_class.name, id(self))
+
+
+class _ScopeDict(dict):
+ def __init__(self, avm_class):
+ super(_ScopeDict, self).__init__()
+ self.avm_class = avm_class
+
+ def __repr__(self):
+ return '%s__Scope(%s)' % (
+ self.avm_class.name,
+ super(_ScopeDict, self).__repr__())
+
+
+class _AVMClass(object):
+ def __init__(self, name_idx, name, static_properties=None):
+ self.name_idx = name_idx
+ self.name = name
+ self.method_names = {}
+ self.method_idxs = {}
+ self.methods = {}
+ self.method_pyfunctions = {}
+ self.static_properties = static_properties if static_properties else {}
+
+ self.variables = _ScopeDict(self)
+ self.constants = {}
+
+ def make_object(self):
+ return _AVMClass_Object(self)
+
+ def __repr__(self):
+ return '_AVMClass(%s)' % (self.name)
+
+ def register_methods(self, methods):
+ self.method_names.update(methods.items())
+ self.method_idxs.update(dict(
+ (idx, name)
+ for name, idx in methods.items()))
+
+
+class _Multiname(object):
+ def __init__(self, kind):
+ self.kind = kind
+
+ def __repr__(self):
+ return '[MULTINAME kind: 0x%x]' % self.kind
+
+
+def _read_int(reader):
+ res = 0
+ shift = 0
+ for _ in range(5):
+ buf = reader.read(1)
+ assert len(buf) == 1
+ b = compat_struct_unpack('<B', buf)[0]
+ res = res | ((b & 0x7f) << shift)
+ if b & 0x80 == 0:
+ break
+ shift += 7
+ return res
+
+
+def _u30(reader):
+ res = _read_int(reader)
+ assert res & 0xf0000000 == 0
+ return res
+
+
+_u32 = _read_int
+
+
+def _s32(reader):
+ v = _read_int(reader)
+ if v & 0x80000000 != 0:
+ v = - ((v ^ 0xffffffff) + 1)
+ return v
+
+
+def _s24(reader):
+ bs = reader.read(3)
+ assert len(bs) == 3
+ last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00'
+ return compat_struct_unpack('<i', bs + last_byte)[0]
+
+
+def _read_string(reader):
+ slen = _u30(reader)
+ resb = reader.read(slen)
+ assert len(resb) == slen
+ return resb.decode('utf-8')
+
+
+def _read_bytes(count, reader):
+ assert count >= 0
+ resb = reader.read(count)
+ assert len(resb) == count
+ return resb
+
+
+def _read_byte(reader):
+ resb = _read_bytes(1, reader=reader)
+ res = compat_struct_unpack('<B', resb)[0]
+ return res
+
+
+StringClass = _AVMClass('(no name idx)', 'String')
+ByteArrayClass = _AVMClass('(no name idx)', 'ByteArray')
+TimerClass = _AVMClass('(no name idx)', 'Timer')
+TimerEventClass = _AVMClass('(no name idx)', 'TimerEvent', {'TIMER': 'timer'})
+_builtin_classes = {
+ StringClass.name: StringClass,
+ ByteArrayClass.name: ByteArrayClass,
+ TimerClass.name: TimerClass,
+ TimerEventClass.name: TimerEventClass,
+}
+
+
+class _Undefined(object):
+ def __bool__(self):
+ return False
+ __nonzero__ = __bool__
+
+ def __hash__(self):
+ return 0
+
+ def __str__(self):
+ return 'undefined'
+ __repr__ = __str__
+
+
+undefined = _Undefined()
+
+
+class SWFInterpreter(object):
+ def __init__(self, file_contents):
+ self._patched_functions = {
+ (TimerClass, 'addEventListener'): lambda params: undefined,
+ }
+ code_tag = next(tag
+ for tag_code, tag in _extract_tags(file_contents)
+ if tag_code == 82)
+ p = code_tag.index(b'\0', 4) + 1
+ code_reader = io.BytesIO(code_tag[p:])
+
+ # Parse ABC (AVM2 ByteCode)
+
+ # Define a couple convenience methods
+ u30 = lambda *args: _u30(*args, reader=code_reader)
+ s32 = lambda *args: _s32(*args, reader=code_reader)
+ u32 = lambda *args: _u32(*args, reader=code_reader)
+ read_bytes = lambda *args: _read_bytes(*args, reader=code_reader)
+ read_byte = lambda *args: _read_byte(*args, reader=code_reader)
+
+ # minor_version + major_version
+ read_bytes(2 + 2)
+
+ # Constant pool
+ int_count = u30()
+ self.constant_ints = [0]
+ for _c in range(1, int_count):
+ self.constant_ints.append(s32())
+ self.constant_uints = [0]
+ uint_count = u30()
+ for _c in range(1, uint_count):
+ self.constant_uints.append(u32())
+ double_count = u30()
+ read_bytes(max(0, (double_count - 1)) * 8)
+ string_count = u30()
+ self.constant_strings = ['']
+ for _c in range(1, string_count):
+ s = _read_string(code_reader)
+ self.constant_strings.append(s)
+ namespace_count = u30()
+ for _c in range(1, namespace_count):
+ read_bytes(1) # kind
+ u30() # name
+ ns_set_count = u30()
+ for _c in range(1, ns_set_count):
+ count = u30()
+ for _c2 in range(count):
+ u30()
+ multiname_count = u30()
+ MULTINAME_SIZES = {
+ 0x07: 2, # QName
+ 0x0d: 2, # QNameA
+ 0x0f: 1, # RTQName
+ 0x10: 1, # RTQNameA
+ 0x11: 0, # RTQNameL
+ 0x12: 0, # RTQNameLA
+ 0x09: 2, # Multiname
+ 0x0e: 2, # MultinameA
+ 0x1b: 1, # MultinameL
+ 0x1c: 1, # MultinameLA
+ }
+ self.multinames = ['']
+ for _c in range(1, multiname_count):
+ kind = u30()
+ assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind
+ if kind == 0x07:
+ u30() # namespace_idx
+ name_idx = u30()
+ self.multinames.append(self.constant_strings[name_idx])
+ elif kind == 0x09:
+ name_idx = u30()
+ u30()
+ self.multinames.append(self.constant_strings[name_idx])
+ else:
+ self.multinames.append(_Multiname(kind))
+ for _c2 in range(MULTINAME_SIZES[kind]):
+ u30()
+
+ # Methods
+ method_count = u30()
+ MethodInfo = collections.namedtuple(
+ 'MethodInfo',
+ ['NEED_ARGUMENTS', 'NEED_REST'])
+ method_infos = []
+ for method_id in range(method_count):
+ param_count = u30()
+ u30() # return type
+ for _ in range(param_count):
+ u30() # param type
+ u30() # name index (always 0 for youtube)
+ flags = read_byte()
+ if flags & 0x08 != 0:
+ # Options present
+ option_count = u30()
+ for c in range(option_count):
+ u30() # val
+ read_bytes(1) # kind
+ if flags & 0x80 != 0:
+ # Param names present
+ for _ in range(param_count):
+ u30() # param name
+ mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
+ method_infos.append(mi)
+
+ # Metadata
+ metadata_count = u30()
+ for _c in range(metadata_count):
+ u30() # name
+ item_count = u30()
+ for _c2 in range(item_count):
+ u30() # key
+ u30() # value
+
+ def parse_traits_info():
+ trait_name_idx = u30()
+ kind_full = read_byte()
+ kind = kind_full & 0x0f
+ attrs = kind_full >> 4
+ methods = {}
+ constants = None
+ if kind == 0x00: # Slot
+ u30() # Slot id
+ u30() # type_name_idx
+ vindex = u30()
+ if vindex != 0:
+ read_byte() # vkind
+ elif kind == 0x06: # Const
+ u30() # Slot id
+ u30() # type_name_idx
+ vindex = u30()
+ vkind = 'any'
+ if vindex != 0:
+ vkind = read_byte()
+ if vkind == 0x03: # Constant_Int
+ value = self.constant_ints[vindex]
+ elif vkind == 0x04: # Constant_UInt
+ value = self.constant_uints[vindex]
+ else:
+ return {}, None # Ignore silently for now
+ constants = {self.multinames[trait_name_idx]: value}
+ elif kind in (0x01, 0x02, 0x03): # Method / Getter / Setter
+ u30() # disp_id
+ method_idx = u30()
+ methods[self.multinames[trait_name_idx]] = method_idx
+ elif kind == 0x04: # Class
+ u30() # slot_id
+ u30() # classi
+ elif kind == 0x05: # Function
+ u30() # slot_id
+ function_idx = u30()
+ methods[function_idx] = self.multinames[trait_name_idx]
+ else:
+ raise ExtractorError('Unsupported trait kind %d' % kind)
+
+ if attrs & 0x4 != 0: # Metadata present
+ metadata_count = u30()
+ for _c3 in range(metadata_count):
+ u30() # metadata index
+
+ return methods, constants
+
+ # Classes
+ class_count = u30()
+ classes = []
+ for class_id in range(class_count):
+ name_idx = u30()
+
+ cname = self.multinames[name_idx]
+ avm_class = _AVMClass(name_idx, cname)
+ classes.append(avm_class)
+
+ u30() # super_name idx
+ flags = read_byte()
+ if flags & 0x08 != 0: # Protected namespace is present
+ u30() # protected_ns_idx
+ intrf_count = u30()
+ for _c2 in range(intrf_count):
+ u30()
+ u30() # iinit
+ trait_count = u30()
+ for _c2 in range(trait_count):
+ trait_methods, trait_constants = parse_traits_info()
+ avm_class.register_methods(trait_methods)
+ if trait_constants:
+ avm_class.constants.update(trait_constants)
+
+ assert len(classes) == class_count
+ self._classes_by_name = dict((c.name, c) for c in classes)
+
+ for avm_class in classes:
+ avm_class.cinit_idx = u30()
+ trait_count = u30()
+ for _c2 in range(trait_count):
+ trait_methods, trait_constants = parse_traits_info()
+ avm_class.register_methods(trait_methods)
+ if trait_constants:
+ avm_class.constants.update(trait_constants)
+
+ # Scripts
+ script_count = u30()
+ for _c in range(script_count):
+ u30() # init
+ trait_count = u30()
+ for _c2 in range(trait_count):
+ parse_traits_info()
+
+ # Method bodies
+ method_body_count = u30()
+ Method = collections.namedtuple('Method', ['code', 'local_count'])
+ self._all_methods = []
+ for _c in range(method_body_count):
+ method_idx = u30()
+ u30() # max_stack
+ local_count = u30()
+ u30() # init_scope_depth
+ u30() # max_scope_depth
+ code_length = u30()
+ code = read_bytes(code_length)
+ m = Method(code, local_count)
+ self._all_methods.append(m)
+ for avm_class in classes:
+ if method_idx in avm_class.method_idxs:
+ avm_class.methods[avm_class.method_idxs[method_idx]] = m
+ exception_count = u30()
+ for _c2 in range(exception_count):
+ u30() # from
+ u30() # to
+ u30() # target
+ u30() # exc_type
+ u30() # var_name
+ trait_count = u30()
+ for _c2 in range(trait_count):
+ parse_traits_info()
+
+ assert p + code_reader.tell() == len(code_tag)
+
+ def patch_function(self, avm_class, func_name, f):
+ self._patched_functions[(avm_class, func_name)] = f
+
+ def extract_class(self, class_name, call_cinit=True):
+ try:
+ res = self._classes_by_name[class_name]
+ except KeyError:
+ raise ExtractorError('Class %r not found' % class_name)
+
+ if call_cinit and hasattr(res, 'cinit_idx'):
+ res.register_methods({'$cinit': res.cinit_idx})
+ res.methods['$cinit'] = self._all_methods[res.cinit_idx]
+ cinit = self.extract_function(res, '$cinit')
+ cinit([])
+
+ return res
+
+ def extract_function(self, avm_class, func_name):
+ p = self._patched_functions.get((avm_class, func_name))
+ if p:
+ return p
+ if func_name in avm_class.method_pyfunctions:
+ return avm_class.method_pyfunctions[func_name]
+ if func_name in self._classes_by_name:
+ return self._classes_by_name[func_name].make_object()
+ if func_name not in avm_class.methods:
+ raise ExtractorError('Cannot find function %s.%s' % (
+ avm_class.name, func_name))
+ m = avm_class.methods[func_name]
+
+ def resfunc(args):
+ # Helper functions
+ coder = io.BytesIO(m.code)
+ s24 = lambda: _s24(coder)
+ u30 = lambda: _u30(coder)
+
+ registers = [avm_class.variables] + list(args) + [None] * m.local_count
+ stack = []
+ scopes = collections.deque([
+ self._classes_by_name, avm_class.constants, avm_class.variables])
+ while True:
+ opcode = _read_byte(coder)
+ if opcode == 9: # label
+ pass # Spec says: "Do nothing."
+ elif opcode == 16: # jump
+ offset = s24()
+ coder.seek(coder.tell() + offset)
+ elif opcode == 17: # iftrue
+ offset = s24()
+ value = stack.pop()
+ if value:
+ coder.seek(coder.tell() + offset)
+ elif opcode == 18: # iffalse
+ offset = s24()
+ value = stack.pop()
+ if not value:
+ coder.seek(coder.tell() + offset)
+ elif opcode == 19: # ifeq
+ offset = s24()
+ value2 = stack.pop()
+ value1 = stack.pop()
+ if value2 == value1:
+ coder.seek(coder.tell() + offset)
+ elif opcode == 20: # ifne
+ offset = s24()
+ value2 = stack.pop()
+ value1 = stack.pop()
+ if value2 != value1:
+ coder.seek(coder.tell() + offset)
+ elif opcode == 21: # iflt
+ offset = s24()
+ value2 = stack.pop()
+ value1 = stack.pop()
+ if value1 < value2:
+ coder.seek(coder.tell() + offset)
+ elif opcode == 32: # pushnull
+ stack.append(None)
+ elif opcode == 33: # pushundefined
+ stack.append(undefined)
+ elif opcode == 36: # pushbyte
+ v = _read_byte(coder)
+ stack.append(v)
+ elif opcode == 37: # pushshort
+ v = u30()
+ stack.append(v)
+ elif opcode == 38: # pushtrue
+ stack.append(True)
+ elif opcode == 39: # pushfalse
+ stack.append(False)
+ elif opcode == 40: # pushnan
+ stack.append(float('NaN'))
+ elif opcode == 42: # dup
+ value = stack[-1]
+ stack.append(value)
+ elif opcode == 44: # pushstring
+ idx = u30()
+ stack.append(self.constant_strings[idx])
+ elif opcode == 48: # pushscope
+ new_scope = stack.pop()
+ scopes.append(new_scope)
+ elif opcode == 66: # construct
+ arg_count = u30()
+ args = list(reversed(
+ [stack.pop() for _ in range(arg_count)]))
+ obj = stack.pop()
+ res = obj.avm_class.make_object()
+ stack.append(res)
+ elif opcode == 70: # callproperty
+ index = u30()
+ mname = self.multinames[index]
+ arg_count = u30()
+ args = list(reversed(
+ [stack.pop() for _ in range(arg_count)]))
+ obj = stack.pop()
+
+ if obj == StringClass:
+ if mname == 'String':
+ assert len(args) == 1
+ assert isinstance(args[0], (
+ int, compat_str, _Undefined))
+ if args[0] == undefined:
+ res = 'undefined'
+ else:
+ res = compat_str(args[0])
+ stack.append(res)
+ continue
+ else:
+ raise NotImplementedError(
+ 'Function String.%s is not yet implemented'
+ % mname)
+ elif isinstance(obj, _AVMClass_Object):
+ func = self.extract_function(obj.avm_class, mname)
+ res = func(args)
+ stack.append(res)
+ continue
+ elif isinstance(obj, _AVMClass):
+ func = self.extract_function(obj, mname)
+ res = func(args)
+ stack.append(res)
+ continue
+ elif isinstance(obj, _ScopeDict):
+ if mname in obj.avm_class.method_names:
+ func = self.extract_function(obj.avm_class, mname)
+ res = func(args)
+ else:
+ res = obj[mname]
+ stack.append(res)
+ continue
+ elif isinstance(obj, compat_str):
+ if mname == 'split':
+ assert len(args) == 1
+ assert isinstance(args[0], compat_str)
+ if args[0] == '':
+ res = list(obj)
+ else:
+ res = obj.split(args[0])
+ stack.append(res)
+ continue
+ elif mname == 'charCodeAt':
+ assert len(args) <= 1
+ idx = 0 if len(args) == 0 else args[0]
+ assert isinstance(idx, int)
+ res = ord(obj[idx])
+ stack.append(res)
+ continue
+ elif isinstance(obj, list):
+ if mname == 'slice':
+ assert len(args) == 1
+ assert isinstance(args[0], int)
+ res = obj[args[0]:]
+ stack.append(res)
+ continue
+ elif mname == 'join':
+ assert len(args) == 1
+ assert isinstance(args[0], compat_str)
+ res = args[0].join(obj)
+ stack.append(res)
+ continue
+ raise NotImplementedError(
+ 'Unsupported property %r on %r'
+ % (mname, obj))
+ elif opcode == 71: # returnvoid
+ res = undefined
+ return res
+ elif opcode == 72: # returnvalue
+ res = stack.pop()
+ return res
+ elif opcode == 73: # constructsuper
+ # Not yet implemented, just hope it works without it
+ arg_count = u30()
+ args = list(reversed(
+ [stack.pop() for _ in range(arg_count)]))
+ obj = stack.pop()
+ elif opcode == 74: # constructproperty
+ index = u30()
+ arg_count = u30()
+ args = list(reversed(
+ [stack.pop() for _ in range(arg_count)]))
+ obj = stack.pop()
+
+ mname = self.multinames[index]
+ assert isinstance(obj, _AVMClass)
+
+ # We do not actually call the constructor for now;
+ # we just pretend it does nothing
+ stack.append(obj.make_object())
+ elif opcode == 79: # callpropvoid
+ index = u30()
+ mname = self.multinames[index]
+ arg_count = u30()
+ args = list(reversed(
+ [stack.pop() for _ in range(arg_count)]))
+ obj = stack.pop()
+ if isinstance(obj, _AVMClass_Object):
+ func = self.extract_function(obj.avm_class, mname)
+ res = func(args)
+ assert res is undefined
+ continue
+ if isinstance(obj, _ScopeDict):
+ assert mname in obj.avm_class.method_names
+ func = self.extract_function(obj.avm_class, mname)
+ res = func(args)
+ assert res is undefined
+ continue
+ if mname == 'reverse':
+ assert isinstance(obj, list)
+ obj.reverse()
+ else:
+ raise NotImplementedError(
+ 'Unsupported (void) property %r on %r'
+ % (mname, obj))
+ elif opcode == 86: # newarray
+ arg_count = u30()
+ arr = []
+ for i in range(arg_count):
+ arr.append(stack.pop())
+ arr = arr[::-1]
+ stack.append(arr)
+ elif opcode == 93: # findpropstrict
+ index = u30()
+ mname = self.multinames[index]
+ for s in reversed(scopes):
+ if mname in s:
+ res = s
+ break
+ else:
+ res = scopes[0]
+ if mname not in res and mname in _builtin_classes:
+ stack.append(_builtin_classes[mname])
+ else:
+ stack.append(res[mname])
+ elif opcode == 94: # findproperty
+ index = u30()
+ mname = self.multinames[index]
+ for s in reversed(scopes):
+ if mname in s:
+ res = s
+ break
+ else:
+ res = avm_class.variables
+ stack.append(res)
+ elif opcode == 96: # getlex
+ index = u30()
+ mname = self.multinames[index]
+ for s in reversed(scopes):
+ if mname in s:
+ scope = s
+ break
+ else:
+ scope = avm_class.variables
+
+ if mname in scope:
+ res = scope[mname]
+ elif mname in _builtin_classes:
+ res = _builtin_classes[mname]
+ else:
+ # Assume uninitialized
+ # TODO warn here
+ res = undefined
+ stack.append(res)
+ elif opcode == 97: # setproperty
+ index = u30()
+ value = stack.pop()
+ idx = self.multinames[index]
+ if isinstance(idx, _Multiname):
+ idx = stack.pop()
+ obj = stack.pop()
+ obj[idx] = value
+ elif opcode == 98: # getlocal
+ index = u30()
+ stack.append(registers[index])
+ elif opcode == 99: # setlocal
+ index = u30()
+ value = stack.pop()
+ registers[index] = value
+ elif opcode == 102: # getproperty
+ index = u30()
+ pname = self.multinames[index]
+ if pname == 'length':
+ obj = stack.pop()
+ assert isinstance(obj, (compat_str, list))
+ stack.append(len(obj))
+ elif isinstance(pname, compat_str): # Member access
+ obj = stack.pop()
+ if isinstance(obj, _AVMClass):
+ res = obj.static_properties[pname]
+ stack.append(res)
+ continue
+
+ assert isinstance(obj, (dict, _ScopeDict)),\
+ 'Accessing member %r on %r' % (pname, obj)
+ res = obj.get(pname, undefined)
+ stack.append(res)
+ else: # Assume attribute access
+ idx = stack.pop()
+ assert isinstance(idx, int)
+ obj = stack.pop()
+ assert isinstance(obj, list)
+ stack.append(obj[idx])
+ elif opcode == 104: # initproperty
+ index = u30()
+ value = stack.pop()
+ idx = self.multinames[index]
+ if isinstance(idx, _Multiname):
+ idx = stack.pop()
+ obj = stack.pop()
+ obj[idx] = value
+ elif opcode == 115: # convert_
+ value = stack.pop()
+ intvalue = int(value)
+ stack.append(intvalue)
+ elif opcode == 128: # coerce
+ u30()
+ elif opcode == 130: # coerce_a
+ value = stack.pop()
+ # um, yes, it's any value
+ stack.append(value)
+ elif opcode == 133: # coerce_s
+ assert isinstance(stack[-1], (type(None), compat_str))
+ elif opcode == 147: # decrement
+ value = stack.pop()
+ assert isinstance(value, int)
+ stack.append(value - 1)
+ elif opcode == 149: # typeof
+ value = stack.pop()
+ return {
+ _Undefined: 'undefined',
+ compat_str: 'String',
+ int: 'Number',
+ float: 'Number',
+ }[type(value)]
+ elif opcode == 160: # add
+ value2 = stack.pop()
+ value1 = stack.pop()
+ res = value1 + value2
+ stack.append(res)
+ elif opcode == 161: # subtract
+ value2 = stack.pop()
+ value1 = stack.pop()
+ res = value1 - value2
+ stack.append(res)
+ elif opcode == 162: # multiply
+ value2 = stack.pop()
+ value1 = stack.pop()
+ res = value1 * value2
+ stack.append(res)
+ elif opcode == 164: # modulo
+ value2 = stack.pop()
+ value1 = stack.pop()
+ res = value1 % value2
+ stack.append(res)
+ elif opcode == 168: # bitand
+ value2 = stack.pop()
+ value1 = stack.pop()
+ assert isinstance(value1, int)
+ assert isinstance(value2, int)
+ res = value1 & value2
+ stack.append(res)
+ elif opcode == 171: # equals
+ value2 = stack.pop()
+ value1 = stack.pop()
+ result = value1 == value2
+ stack.append(result)
+ elif opcode == 175: # greaterequals
+ value2 = stack.pop()
+ value1 = stack.pop()
+ result = value1 >= value2
+ stack.append(result)
+ elif opcode == 192: # increment_i
+ value = stack.pop()
+ assert isinstance(value, int)
+ stack.append(value + 1)
+ elif opcode == 208: # getlocal_0
+ stack.append(registers[0])
+ elif opcode == 209: # getlocal_1
+ stack.append(registers[1])
+ elif opcode == 210: # getlocal_2
+ stack.append(registers[2])
+ elif opcode == 211: # getlocal_3
+ stack.append(registers[3])
+ elif opcode == 212: # setlocal_0
+ registers[0] = stack.pop()
+ elif opcode == 213: # setlocal_1
+ registers[1] = stack.pop()
+ elif opcode == 214: # setlocal_2
+ registers[2] = stack.pop()
+ elif opcode == 215: # setlocal_3
+ registers[3] = stack.pop()
+ else:
+ raise NotImplementedError(
+ 'Unsupported opcode %d' % opcode)
+
+ avm_class.method_pyfunctions[func_name] = resfunc
+ return resfunc
diff --git a/youtube_dl/update.py b/youtube_dl/update.py
new file mode 100644
index 0000000..ebce966
--- /dev/null
+++ b/youtube_dl/update.py
@@ -0,0 +1,187 @@
+from __future__ import unicode_literals
+
+import io
+import json
+import traceback
+import hashlib
+import os
+import subprocess
+import sys
+from zipimport import zipimporter
+
+from .utils import encode_compat_str
+
+from .version import __version__
+
+
+def rsa_verify(message, signature, key):
+ from hashlib import sha256
+ assert isinstance(message, bytes)
+ byte_size = (len(bin(key[0])) - 2 + 8 - 1) // 8
+ signature = ('%x' % pow(int(signature, 16), key[1], key[0])).encode()
+ signature = (byte_size * 2 - len(signature)) * b'0' + signature
+ asn1 = b'3031300d060960864801650304020105000420'
+ asn1 += sha256(message).hexdigest().encode()
+ if byte_size < len(asn1) // 2 + 11:
+ return False
+ expected = b'0001' + (byte_size - len(asn1) // 2 - 3) * b'ff' + b'00' + asn1
+ return expected == signature
+
+
+def update_self(to_screen, verbose, opener):
+ """Update the program file with the latest version from the repository"""
+
+ UPDATE_URL = 'https://rg3.github.io/youtube-dl/update/'
+ VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
+ JSON_URL = UPDATE_URL + 'versions.json'
+ UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
+
+ if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, 'frozen'):
+ to_screen('It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
+ return
+
+ # Check if there is a new version
+ try:
+ newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
+ except Exception:
+ if verbose:
+ to_screen(encode_compat_str(traceback.format_exc()))
+ to_screen('ERROR: can\'t find the current version. Please try again later.')
+ return
+ if newversion == __version__:
+ to_screen('youtube-dl is up-to-date (' + __version__ + ')')
+ return
+
+ # Download and check versions info
+ try:
+ versions_info = opener.open(JSON_URL).read().decode('utf-8')
+ versions_info = json.loads(versions_info)
+ except Exception:
+ if verbose:
+ to_screen(encode_compat_str(traceback.format_exc()))
+ to_screen('ERROR: can\'t obtain versions info. Please try again later.')
+ return
+ if 'signature' not in versions_info:
+ to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
+ return
+ signature = versions_info['signature']
+ del versions_info['signature']
+ if not rsa_verify(json.dumps(versions_info, sort_keys=True).encode('utf-8'), signature, UPDATES_RSA_KEY):
+ to_screen('ERROR: the versions file signature is invalid. Aborting.')
+ return
+
+ version_id = versions_info['latest']
+
+ def version_tuple(version_str):
+ return tuple(map(int, version_str.split('.')))
+ if version_tuple(__version__) >= version_tuple(version_id):
+ to_screen('youtube-dl is up to date (%s)' % __version__)
+ return
+
+ to_screen('Updating to version ' + version_id + ' ...')
+ version = versions_info['versions'][version_id]
+
+ print_notes(to_screen, versions_info['versions'])
+
+ # sys.executable is set to the full pathname of the exe-file for py2exe
+ filename = sys.executable if hasattr(sys, 'frozen') else sys.argv[0]
+
+ if not os.access(filename, os.W_OK):
+ to_screen('ERROR: no write permissions on %s' % filename)
+ return
+
+ # Py2EXE
+ if hasattr(sys, 'frozen'):
+ exe = filename
+ directory = os.path.dirname(exe)
+ if not os.access(directory, os.W_OK):
+ to_screen('ERROR: no write permissions on %s' % directory)
+ return
+
+ try:
+ urlh = opener.open(version['exe'][0])
+ newcontent = urlh.read()
+ urlh.close()
+ except (IOError, OSError):
+ if verbose:
+ to_screen(encode_compat_str(traceback.format_exc()))
+ to_screen('ERROR: unable to download latest version')
+ return
+
+ newcontent_hash = hashlib.sha256(newcontent).hexdigest()
+ if newcontent_hash != version['exe'][1]:
+ to_screen('ERROR: the downloaded file hash does not match. Aborting.')
+ return
+
+ try:
+ with open(exe + '.new', 'wb') as outf:
+ outf.write(newcontent)
+ except (IOError, OSError):
+ if verbose:
+ to_screen(encode_compat_str(traceback.format_exc()))
+ to_screen('ERROR: unable to write the new version')
+ return
+
+ try:
+ bat = os.path.join(directory, 'youtube-dl-updater.bat')
+ with io.open(bat, 'w') as batfile:
+ batfile.write('''
+@echo off
+echo Waiting for file handle to be closed ...
+ping 127.0.0.1 -n 5 -w 1000 > NUL
+move /Y "%s.new" "%s" > NUL
+echo Updated youtube-dl to version %s.
+start /b "" cmd /c del "%%~f0"&exit /b"
+ \n''' % (exe, exe, version_id))
+
+ subprocess.Popen([bat]) # Continues to run in the background
+ return # Do not show premature success messages
+ except (IOError, OSError):
+ if verbose:
+ to_screen(encode_compat_str(traceback.format_exc()))
+ to_screen('ERROR: unable to overwrite current version')
+ return
+
+ # Zip unix package
+ elif isinstance(globals().get('__loader__'), zipimporter):
+ try:
+ urlh = opener.open(version['bin'][0])
+ newcontent = urlh.read()
+ urlh.close()
+ except (IOError, OSError):
+ if verbose:
+ to_screen(encode_compat_str(traceback.format_exc()))
+ to_screen('ERROR: unable to download latest version')
+ return
+
+ newcontent_hash = hashlib.sha256(newcontent).hexdigest()
+ if newcontent_hash != version['bin'][1]:
+ to_screen('ERROR: the downloaded file hash does not match. Aborting.')
+ return
+
+ try:
+ with open(filename, 'wb') as outf:
+ outf.write(newcontent)
+ except (IOError, OSError):
+ if verbose:
+ to_screen(encode_compat_str(traceback.format_exc()))
+ to_screen('ERROR: unable to overwrite current version')
+ return
+
+ to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.')
+
+
+def get_notes(versions, fromVersion):
+ notes = []
+ for v, vdata in sorted(versions.items()):
+ if v > fromVersion:
+ notes.extend(vdata.get('notes', []))
+ return notes
+
+
+def print_notes(to_screen, versions, fromVersion=__version__):
+ notes = get_notes(versions, fromVersion)
+ if notes:
+ to_screen('PLEASE NOTE:')
+ for note in notes:
+ to_screen(note)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
new file mode 100644
index 0000000..8c45166
--- /dev/null
+++ b/youtube_dl/utils.py
@@ -0,0 +1,3911 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import base64
+import binascii
+import calendar
+import codecs
+import contextlib
+import ctypes
+import datetime
+import email.utils
+import email.header
+import errno
+import functools
+import gzip
+import io
+import itertools
+import json
+import locale
+import math
+import operator
+import os
+import platform
+import random
+import re
+import socket
+import ssl
+import subprocess
+import sys
+import tempfile
+import traceback
+import xml.etree.ElementTree
+import zlib
+
+from .compat import (
+ compat_HTMLParseError,
+ compat_HTMLParser,
+ compat_basestring,
+ compat_chr,
+ compat_ctypes_WINFUNCTYPE,
+ compat_etree_fromstring,
+ compat_expanduser,
+ compat_html_entities,
+ compat_html_entities_html5,
+ compat_http_client,
+ compat_kwargs,
+ compat_os_name,
+ compat_parse_qs,
+ compat_shlex_quote,
+ compat_socket_create_connection,
+ compat_str,
+ compat_struct_pack,
+ compat_struct_unpack,
+ compat_urllib_error,
+ compat_urllib_parse,
+ compat_urllib_parse_urlencode,
+ compat_urllib_parse_urlparse,
+ compat_urllib_parse_unquote_plus,
+ compat_urllib_request,
+ compat_urlparse,
+ compat_xpath,
+)
+
+from .socks import (
+ ProxyType,
+ sockssocket,
+)
+
+
+def register_socks_protocols():
+ # "Register" SOCKS protocols
+ # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
+ # URLs with protocols not in urlparse.uses_netloc are not handled correctly
+ for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
+ if scheme not in compat_urlparse.uses_netloc:
+ compat_urlparse.uses_netloc.append(scheme)
+
+
+# This is not clearly defined otherwise
+compiled_regex_type = type(re.compile(''))
+
+std_headers = {
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0 (Chrome)',
+ 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+ 'Accept-Encoding': 'gzip, deflate',
+ 'Accept-Language': 'en-us,en;q=0.5',
+}
+
+
+USER_AGENTS = {
+ 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
+}
+
+
+NO_DEFAULT = object()
+
+ENGLISH_MONTH_NAMES = [
+ 'January', 'February', 'March', 'April', 'May', 'June',
+ 'July', 'August', 'September', 'October', 'November', 'December']
+
+MONTH_NAMES = {
+ 'en': ENGLISH_MONTH_NAMES,
+ 'fr': [
+ 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
+ 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
+}
+
+KNOWN_EXTENSIONS = (
+ 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
+ 'flv', 'f4v', 'f4a', 'f4b',
+ 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
+ 'mkv', 'mka', 'mk3d',
+ 'avi', 'divx',
+ 'mov',
+ 'asf', 'wmv', 'wma',
+ '3gp', '3g2',
+ 'mp3',
+ 'flac',
+ 'ape',
+ 'wav',
+ 'f4f', 'f4m', 'm3u8', 'smil')
+
+# needed for sanitizing filenames in restricted mode
+ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
+ itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
+ 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
+
+DATE_FORMATS = (
+ '%d %B %Y',
+ '%d %b %Y',
+ '%B %d %Y',
+ '%B %dst %Y',
+ '%B %dnd %Y',
+ '%B %dth %Y',
+ '%b %d %Y',
+ '%b %dst %Y',
+ '%b %dnd %Y',
+ '%b %dth %Y',
+ '%b %dst %Y %I:%M',
+ '%b %dnd %Y %I:%M',
+ '%b %dth %Y %I:%M',
+ '%Y %m %d',
+ '%Y-%m-%d',
+ '%Y/%m/%d',
+ '%Y/%m/%d %H:%M',
+ '%Y/%m/%d %H:%M:%S',
+ '%Y-%m-%d %H:%M',
+ '%Y-%m-%d %H:%M:%S',
+ '%Y-%m-%d %H:%M:%S.%f',
+ '%d.%m.%Y %H:%M',
+ '%d.%m.%Y %H.%M',
+ '%Y-%m-%dT%H:%M:%SZ',
+ '%Y-%m-%dT%H:%M:%S.%fZ',
+ '%Y-%m-%dT%H:%M:%S.%f0Z',
+ '%Y-%m-%dT%H:%M:%S',
+ '%Y-%m-%dT%H:%M:%S.%f',
+ '%Y-%m-%dT%H:%M',
+ '%b %d %Y at %H:%M',
+ '%b %d %Y at %H:%M:%S',
+ '%B %d %Y at %H:%M',
+ '%B %d %Y at %H:%M:%S',
+)
+
+DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
+DATE_FORMATS_DAY_FIRST.extend([
+ '%d-%m-%Y',
+ '%d.%m.%Y',
+ '%d.%m.%y',
+ '%d/%m/%Y',
+ '%d/%m/%y',
+ '%d/%m/%Y %H:%M:%S',
+])
+
+DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
+DATE_FORMATS_MONTH_FIRST.extend([
+ '%m-%d-%Y',
+ '%m.%d.%Y',
+ '%m/%d/%Y',
+ '%m/%d/%y',
+ '%m/%d/%Y %H:%M:%S',
+])
+
+PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
+JSON_LD_RE = r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
+
+
+def preferredencoding():
+ """Get preferred encoding.
+
+ Returns the best encoding scheme for the system, based on
+ locale.getpreferredencoding() and some further tweaks.
+ """
+ try:
+ pref = locale.getpreferredencoding()
+ 'TEST'.encode(pref)
+ except Exception:
+ pref = 'UTF-8'
+
+ return pref
+
+
+def write_json_file(obj, fn):
+ """ Encode obj as JSON and write it to fn, atomically if possible """
+
+ fn = encodeFilename(fn)
+ if sys.version_info < (3, 0) and sys.platform != 'win32':
+ encoding = get_filesystem_encoding()
+ # os.path.basename returns a bytes object, but NamedTemporaryFile
+ # will fail if the filename contains non ascii characters unless we
+ # use a unicode object
+ path_basename = lambda f: os.path.basename(fn).decode(encoding)
+ # the same for os.path.dirname
+ path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
+ else:
+ path_basename = os.path.basename
+ path_dirname = os.path.dirname
+
+ args = {
+ 'suffix': '.tmp',
+ 'prefix': path_basename(fn) + '.',
+ 'dir': path_dirname(fn),
+ 'delete': False,
+ }
+
+ # In Python 2.x, json.dump expects a bytestream.
+ # In Python 3.x, it writes to a character stream
+ if sys.version_info < (3, 0):
+ args['mode'] = 'wb'
+ else:
+ args.update({
+ 'mode': 'w',
+ 'encoding': 'utf-8',
+ })
+
+ tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
+
+ try:
+ with tf:
+ json.dump(obj, tf)
+ if sys.platform == 'win32':
+ # Need to remove existing file on Windows, else os.rename raises
+ # WindowsError or FileExistsError.
+ try:
+ os.unlink(fn)
+ except OSError:
+ pass
+ os.rename(tf.name, fn)
+ except Exception:
+ try:
+ os.remove(tf.name)
+ except OSError:
+ pass
+ raise
+
+
+if sys.version_info >= (2, 7):
+ def find_xpath_attr(node, xpath, key, val=None):
+ """ Find the xpath xpath[@key=val] """
+ assert re.match(r'^[a-zA-Z_-]+$', key)
+ expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
+ return node.find(expr)
+else:
+ def find_xpath_attr(node, xpath, key, val=None):
+ for f in node.findall(compat_xpath(xpath)):
+ if key not in f.attrib:
+ continue
+ if val is None or f.attrib.get(key) == val:
+ return f
+ return None
+
+# On python2.6 the xml.etree.ElementTree.Element methods don't support
+# the namespace parameter
+
+
+def xpath_with_ns(path, ns_map):
+ components = [c.split(':') for c in path.split('/')]
+ replaced = []
+ for c in components:
+ if len(c) == 1:
+ replaced.append(c[0])
+ else:
+ ns, tag = c
+ replaced.append('{%s}%s' % (ns_map[ns], tag))
+ return '/'.join(replaced)
+
+
+def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
+ def _find_xpath(xpath):
+ return node.find(compat_xpath(xpath))
+
+ if isinstance(xpath, (str, compat_str)):
+ n = _find_xpath(xpath)
+ else:
+ for xp in xpath:
+ n = _find_xpath(xp)
+ if n is not None:
+ break
+
+ if n is None:
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ name = xpath if name is None else name
+ raise ExtractorError('Could not find XML element %s' % name)
+ else:
+ return None
+ return n
+
+
+def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
+ n = xpath_element(node, xpath, name, fatal=fatal, default=default)
+ if n is None or n == default:
+ return n
+ if n.text is None:
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ name = xpath if name is None else name
+ raise ExtractorError('Could not find XML element\'s text %s' % name)
+ else:
+ return None
+ return n.text
+
+
+def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
+ n = find_xpath_attr(node, xpath, key)
+ if n is None:
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ name = '%s[@%s]' % (xpath, key) if name is None else name
+ raise ExtractorError('Could not find XML attribute %s' % name)
+ else:
+ return None
+ return n.attrib[key]
+
+
+def get_element_by_id(id, html):
+ """Return the content of the tag with the specified ID in the passed HTML document"""
+ return get_element_by_attribute('id', id, html)
+
+
+def get_element_by_class(class_name, html):
+ """Return the content of the first tag with the specified class in the passed HTML document"""
+ retval = get_elements_by_class(class_name, html)
+ return retval[0] if retval else None
+
+
+def get_element_by_attribute(attribute, value, html, escape_value=True):
+ retval = get_elements_by_attribute(attribute, value, html, escape_value)
+ return retval[0] if retval else None
+
+
+def get_elements_by_class(class_name, html):
+ """Return the content of all tags with the specified class in the passed HTML document as a list"""
+ return get_elements_by_attribute(
+ 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
+ html, escape_value=False)
+
+
+def get_elements_by_attribute(attribute, value, html, escape_value=True):
+ """Return the content of the tag with the specified attribute in the passed HTML document"""
+
+ value = re.escape(value) if escape_value else value
+
+ retlist = []
+ for m in re.finditer(r'''(?xs)
+ <([a-zA-Z0-9:._-]+)
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
+ \s+%s=['"]?%s['"]?
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
+ \s*>
+ (?P<content>.*?)
+ </\1>
+ ''' % (re.escape(attribute), value), html):
+ res = m.group('content')
+
+ if res.startswith('"') or res.startswith("'"):
+ res = res[1:-1]
+
+ retlist.append(unescapeHTML(res))
+
+ return retlist
+
+
+class HTMLAttributeParser(compat_HTMLParser):
+ """Trivial HTML parser to gather the attributes for a single element"""
+ def __init__(self):
+ self.attrs = {}
+ compat_HTMLParser.__init__(self)
+
+ def handle_starttag(self, tag, attrs):
+ self.attrs = dict(attrs)
+
+
+def extract_attributes(html_element):
+ """Given a string for an HTML element such as
+ <el
+ a="foo" B="bar" c="&98;az" d=boz
+ empty= noval entity="&amp;"
+ sq='"' dq="'"
+ >
+ Decode and return a dictionary of attributes.
+ {
+ 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
+ 'empty': '', 'noval': None, 'entity': '&',
+ 'sq': '"', 'dq': '\''
+ }.
+ NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
+ but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
+ """
+ parser = HTMLAttributeParser()
+ try:
+ parser.feed(html_element)
+ parser.close()
+ # Older Python may throw HTMLParseError in case of malformed HTML
+ except compat_HTMLParseError:
+ pass
+ return parser.attrs
+
+
+def clean_html(html):
+ """Clean an HTML snippet into a readable string"""
+
+ if html is None: # Convenience for sanitizing descriptions etc.
+ return html
+
+ # Newline vs <br />
+ html = html.replace('\n', ' ')
+ html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
+ html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
+ # Strip html tags
+ html = re.sub('<.*?>', '', html)
+ # Replace html entities
+ html = unescapeHTML(html)
+ return html.strip()
+
+
+def sanitize_open(filename, open_mode):
+ """Try to open the given filename, and slightly tweak it if this fails.
+
+ Attempts to open the given filename. If this fails, it tries to change
+ the filename slightly, step by step, until it's either able to open it
+ or it fails and raises a final exception, like the standard open()
+ function.
+
+ It returns the tuple (stream, definitive_file_name).
+ """
+ try:
+ if filename == '-':
+ if sys.platform == 'win32':
+ import msvcrt
+ msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
+ return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
+ stream = open(encodeFilename(filename), open_mode)
+ return (stream, filename)
+ except (IOError, OSError) as err:
+ if err.errno in (errno.EACCES,):
+ raise
+
+ # In case of error, try to remove win32 forbidden chars
+ alt_filename = sanitize_path(filename)
+ if alt_filename == filename:
+ raise
+ else:
+ # An exception here should be caught in the caller
+ stream = open(encodeFilename(alt_filename), open_mode)
+ return (stream, alt_filename)
+
+
+def timeconvert(timestr):
+ """Convert RFC 2822 defined time string into system timestamp"""
+ timestamp = None
+ timetuple = email.utils.parsedate_tz(timestr)
+ if timetuple is not None:
+ timestamp = email.utils.mktime_tz(timetuple)
+ return timestamp
+
+
+def sanitize_filename(s, restricted=False, is_id=False):
+ """Sanitizes a string so it could be used as part of a filename.
+ If restricted is set, use a stricter subset of allowed characters.
+ Set is_id if this is not an arbitrary string, but an ID that should be kept
+ if possible.
+ """
+ def replace_insane(char):
+ if restricted and char in ACCENT_CHARS:
+ return ACCENT_CHARS[char]
+ if char == '?' or ord(char) < 32 or ord(char) == 127:
+ return ''
+ elif char == '"':
+ return '' if restricted else '\''
+ elif char == ':':
+ return '_-' if restricted else ' -'
+ elif char in '\\/|*<>':
+ return '_'
+ if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
+ return '_'
+ if restricted and ord(char) > 127:
+ return '_'
+ return char
+
+ # Handle timestamps
+ s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
+ result = ''.join(map(replace_insane, s))
+ if not is_id:
+ while '__' in result:
+ result = result.replace('__', '_')
+ result = result.strip('_')
+ # Common case of "Foreign band name - English song title"
+ if restricted and result.startswith('-_'):
+ result = result[2:]
+ if result.startswith('-'):
+ result = '_' + result[len('-'):]
+ result = result.lstrip('.')
+ if not result:
+ result = '_'
+ return result
+
+
+def sanitize_path(s):
+ """Sanitizes and normalizes path on Windows"""
+ if sys.platform != 'win32':
+ return s
+ drive_or_unc, _ = os.path.splitdrive(s)
+ if sys.version_info < (2, 7) and not drive_or_unc:
+ drive_or_unc, _ = os.path.splitunc(s)
+ norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
+ if drive_or_unc:
+ norm_path.pop(0)
+ sanitized_path = [
+ path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
+ for path_part in norm_path]
+ if drive_or_unc:
+ sanitized_path.insert(0, drive_or_unc + os.path.sep)
+ return os.path.join(*sanitized_path)
+
+
+def sanitize_url(url):
+ # Prepend protocol-less URLs with `http:` scheme in order to mitigate
+ # the number of unwanted failures due to missing protocol
+ if url.startswith('//'):
+ return 'http:%s' % url
+ # Fix some common typos seen so far
+ COMMON_TYPOS = (
+ # https://github.com/rg3/youtube-dl/issues/15649
+ (r'^httpss://', r'https://'),
+ # https://bx1.be/lives/direct-tv/
+ (r'^rmtp([es]?)://', r'rtmp\1://'),
+ )
+ for mistake, fixup in COMMON_TYPOS:
+ if re.match(mistake, url):
+ return re.sub(mistake, fixup, url)
+ return url
+
+
+def sanitized_Request(url, *args, **kwargs):
+ return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
+
+
+def expand_path(s):
+ """Expand shell variables and ~"""
+ return os.path.expandvars(compat_expanduser(s))
+
+
+def orderedSet(iterable):
+ """ Remove all duplicates from the input iterable """
+ res = []
+ for el in iterable:
+ if el not in res:
+ res.append(el)
+ return res
+
+
+def _htmlentity_transform(entity_with_semicolon):
+ """Transforms an HTML entity to a character."""
+ entity = entity_with_semicolon[:-1]
+
+ # Known non-numeric HTML entity
+ if entity in compat_html_entities.name2codepoint:
+ return compat_chr(compat_html_entities.name2codepoint[entity])
+
+ # TODO: HTML5 allows entities without a semicolon. For example,
+ # '&Eacuteric' should be decoded as 'Éric'.
+ if entity_with_semicolon in compat_html_entities_html5:
+ return compat_html_entities_html5[entity_with_semicolon]
+
+ mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
+ if mobj is not None:
+ numstr = mobj.group(1)
+ if numstr.startswith('x'):
+ base = 16
+ numstr = '0%s' % numstr
+ else:
+ base = 10
+ # See https://github.com/rg3/youtube-dl/issues/7518
+ try:
+ return compat_chr(int(numstr, base))
+ except ValueError:
+ pass
+
+ # Unknown entity in name, return its literal representation
+ return '&%s;' % entity
+
+
+def unescapeHTML(s):
+ if s is None:
+ return None
+ assert type(s) == compat_str
+
+ return re.sub(
+ r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
+
+
+def get_subprocess_encoding():
+ if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+ # For subprocess calls, encode with locale encoding
+ # Refer to http://stackoverflow.com/a/9951851/35070
+ encoding = preferredencoding()
+ else:
+ encoding = sys.getfilesystemencoding()
+ if encoding is None:
+ encoding = 'utf-8'
+ return encoding
+
+
+def encodeFilename(s, for_subprocess=False):
+ """
+ @param s The name of the file
+ """
+
+ assert type(s) == compat_str
+
+ # Python 3 has a Unicode API
+ if sys.version_info >= (3, 0):
+ return s
+
+ # Pass '' directly to use Unicode APIs on Windows 2000 and up
+ # (Detecting Windows NT 4 is tricky because 'major >= 4' would
+ # match Windows 9x series as well. Besides, NT 4 is obsolete.)
+ if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+ return s
+
+ # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
+ if sys.platform.startswith('java'):
+ return s
+
+ return s.encode(get_subprocess_encoding(), 'ignore')
+
+
+def decodeFilename(b, for_subprocess=False):
+
+ if sys.version_info >= (3, 0):
+ return b
+
+ if not isinstance(b, bytes):
+ return b
+
+ return b.decode(get_subprocess_encoding(), 'ignore')
+
+
+def encodeArgument(s):
+ if not isinstance(s, compat_str):
+ # Legacy code that uses byte strings
+ # Uncomment the following line after fixing all post processors
+ # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
+ s = s.decode('ascii')
+ return encodeFilename(s, True)
+
+
+def decodeArgument(b):
+ return decodeFilename(b, True)
+
+
+def decodeOption(optval):
+ if optval is None:
+ return optval
+ if isinstance(optval, bytes):
+ optval = optval.decode(preferredencoding())
+
+ assert isinstance(optval, compat_str)
+ return optval
+
+
+def formatSeconds(secs):
+ if secs > 3600:
+ return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
+ elif secs > 60:
+ return '%d:%02d' % (secs // 60, secs % 60)
+ else:
+ return '%d' % secs
+
+
+def make_HTTPS_handler(params, **kwargs):
+ opts_no_check_certificate = params.get('nocheckcertificate', False)
+ if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
+ context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
+ if opts_no_check_certificate:
+ context.check_hostname = False
+ context.verify_mode = ssl.CERT_NONE
+ try:
+ return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
+ except TypeError:
+ # Python 2.7.8
+ # (create_default_context present but HTTPSHandler has no context=)
+ pass
+
+ if sys.version_info < (3, 2):
+ return YoutubeDLHTTPSHandler(params, **kwargs)
+ else: # Python < 3.4
+ context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
+ context.verify_mode = (ssl.CERT_NONE
+ if opts_no_check_certificate
+ else ssl.CERT_REQUIRED)
+ context.set_default_verify_paths()
+ return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
+
+
+def bug_reports_message():
+ if ytdl_is_updateable():
+ update_cmd = 'type youtube-dl -U to update'
+ else:
+ update_cmd = 'see https://yt-dl.org/update on how to update'
+ msg = '; please report this issue on https://yt-dl.org/bug .'
+ msg += ' Make sure you are using the latest version; %s.' % update_cmd
+ msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
+ return msg
+
+
+class YoutubeDLError(Exception):
+ """Base exception for YoutubeDL errors."""
+ pass
+
+
+class ExtractorError(YoutubeDLError):
+ """Error during info extraction."""
+
+ def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
+ """ tb, if given, is the original traceback (so that it can be printed out).
+ If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
+ """
+
+ if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
+ expected = True
+ if video_id is not None:
+ msg = video_id + ': ' + msg
+ if cause:
+ msg += ' (caused by %r)' % cause
+ if not expected:
+ msg += bug_reports_message()
+ super(ExtractorError, self).__init__(msg)
+
+ self.traceback = tb
+ self.exc_info = sys.exc_info() # preserve original exception
+ self.cause = cause
+ self.video_id = video_id
+
+ def format_traceback(self):
+ if self.traceback is None:
+ return None
+ return ''.join(traceback.format_tb(self.traceback))
+
+
+class UnsupportedError(ExtractorError):
+ def __init__(self, url):
+ super(UnsupportedError, self).__init__(
+ 'Unsupported URL: %s' % url, expected=True)
+ self.url = url
+
+
+class RegexNotFoundError(ExtractorError):
+ """Error when a regex didn't match"""
+ pass
+
+
+class GeoRestrictedError(ExtractorError):
+ """Geographic restriction Error exception.
+
+ This exception may be thrown when a video is not available from your
+ geographic location due to geographic restrictions imposed by a website.
+ """
+ def __init__(self, msg, countries=None):
+ super(GeoRestrictedError, self).__init__(msg, expected=True)
+ self.msg = msg
+ self.countries = countries
+
+
+class DownloadError(YoutubeDLError):
+ """Download Error exception.
+
+ This exception may be thrown by FileDownloader objects if they are not
+ configured to continue on errors. They will contain the appropriate
+ error message.
+ """
+
+ def __init__(self, msg, exc_info=None):
+ """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
+ super(DownloadError, self).__init__(msg)
+ self.exc_info = exc_info
+
+
+class SameFileError(YoutubeDLError):
+ """Same File exception.
+
+ This exception will be thrown by FileDownloader objects if they detect
+ multiple files would have to be downloaded to the same file on disk.
+ """
+ pass
+
+
+class PostProcessingError(YoutubeDLError):
+ """Post Processing exception.
+
+ This exception may be raised by PostProcessor's .run() method to
+ indicate an error in the postprocessing task.
+ """
+
+ def __init__(self, msg):
+ super(PostProcessingError, self).__init__(msg)
+ self.msg = msg
+
+
+class MaxDownloadsReached(YoutubeDLError):
+ """ --max-downloads limit has been reached. """
+ pass
+
+
+class UnavailableVideoError(YoutubeDLError):
+ """Unavailable Format exception.
+
+ This exception will be thrown when a video is requested
+ in a format that is not available for that video.
+ """
+ pass
+
+
+class ContentTooShortError(YoutubeDLError):
+ """Content Too Short exception.
+
+ This exception may be raised by FileDownloader objects when a file they
+ download is too small for what the server announced first, indicating
+ the connection was probably interrupted.
+ """
+
+ def __init__(self, downloaded, expected):
+ super(ContentTooShortError, self).__init__(
+ 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
+ )
+ # Both in bytes
+ self.downloaded = downloaded
+ self.expected = expected
+
+
+class XAttrMetadataError(YoutubeDLError):
+ def __init__(self, code=None, msg='Unknown error'):
+ super(XAttrMetadataError, self).__init__(msg)
+ self.code = code
+ self.msg = msg
+
+ # Parsing code and msg
+ if (self.code in (errno.ENOSPC, errno.EDQUOT) or
+ 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+ self.reason = 'NO_SPACE'
+ elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
+ self.reason = 'VALUE_TOO_LONG'
+ else:
+ self.reason = 'NOT_SUPPORTED'
+
+
+class XAttrUnavailableError(YoutubeDLError):
+ pass
+
+
+def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
+ # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
+ # expected HTTP responses to meet HTTP/1.0 or later (see also
+ # https://github.com/rg3/youtube-dl/issues/6727)
+ if sys.version_info < (3, 0):
+ kwargs['strict'] = True
+ hc = http_class(*args, **compat_kwargs(kwargs))
+ source_address = ydl_handler._params.get('source_address')
+ if source_address is not None:
+ sa = (source_address, 0)
+ if hasattr(hc, 'source_address'): # Python 2.7+
+ hc.source_address = sa
+ else: # Python 2.6
+ def _hc_connect(self, *args, **kwargs):
+ sock = compat_socket_create_connection(
+ (self.host, self.port), self.timeout, sa)
+ if is_https:
+ self.sock = ssl.wrap_socket(
+ sock, self.key_file, self.cert_file,
+ ssl_version=ssl.PROTOCOL_TLSv1)
+ else:
+ self.sock = sock
+ hc.connect = functools.partial(_hc_connect, hc)
+
+ return hc
+
+
+def handle_youtubedl_headers(headers):
+ filtered_headers = headers
+
+ if 'Youtubedl-no-compression' in filtered_headers:
+ filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
+ del filtered_headers['Youtubedl-no-compression']
+
+ return filtered_headers
+
+
+class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
+ """Handler for HTTP requests and responses.
+
+ This class, when installed with an OpenerDirector, automatically adds
+ the standard headers to every HTTP request and handles gzipped and
+ deflated responses from web servers. If compression is to be avoided in
+ a particular request, the original request in the program code only has
+ to include the HTTP header "Youtubedl-no-compression", which will be
+ removed before making the real request.
+
+ Part of this code was copied from:
+
+ http://techknack.net/python-urllib2-handlers/
+
+ Andrew Rowls, the author of that code, agreed to release it to the
+ public domain.
+ """
+
+ def __init__(self, params, *args, **kwargs):
+ compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
+ self._params = params
+
+ def http_open(self, req):
+ conn_class = compat_http_client.HTTPConnection
+
+ socks_proxy = req.headers.get('Ytdl-socks-proxy')
+ if socks_proxy:
+ conn_class = make_socks_conn_class(conn_class, socks_proxy)
+ del req.headers['Ytdl-socks-proxy']
+
+ return self.do_open(functools.partial(
+ _create_http_connection, self, conn_class, False),
+ req)
+
+ @staticmethod
+ def deflate(data):
+ try:
+ return zlib.decompress(data, -zlib.MAX_WBITS)
+ except zlib.error:
+ return zlib.decompress(data)
+
+ def http_request(self, req):
+ # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
+ # always respected by websites, some tend to give out URLs with non percent-encoded
+ # non-ASCII characters (see telemb.py, ard.py [#3412])
+ # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
+ # To work around aforementioned issue we will replace request's original URL with
+ # percent-encoded one
+ # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
+ # the code of this workaround has been moved here from YoutubeDL.urlopen()
+ url = req.get_full_url()
+ url_escaped = escape_url(url)
+
+ # Substitute URL if any change after escaping
+ if url != url_escaped:
+ req = update_Request(req, url=url_escaped)
+
+ for h, v in std_headers.items():
+ # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
+ # The dict keys are capitalized because of this bug by urllib
+ if h.capitalize() not in req.headers:
+ req.add_header(h, v)
+
+ req.headers = handle_youtubedl_headers(req.headers)
+
+ if sys.version_info < (2, 7) and '#' in req.get_full_url():
+ # Python 2.6 is brain-dead when it comes to fragments
+ req._Request__original = req._Request__original.partition('#')[0]
+ req._Request__r_type = req._Request__r_type.partition('#')[0]
+
+ return req
+
+ def http_response(self, req, resp):
+ old_resp = resp
+ # gzip
+ if resp.headers.get('Content-encoding', '') == 'gzip':
+ content = resp.read()
+ gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
+ try:
+ uncompressed = io.BytesIO(gz.read())
+ except IOError as original_ioerror:
+ # There may be junk add the end of the file
+ # See http://stackoverflow.com/q/4928560/35070 for details
+ for i in range(1, 1024):
+ try:
+ gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
+ uncompressed = io.BytesIO(gz.read())
+ except IOError:
+ continue
+ break
+ else:
+ raise original_ioerror
+ resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
+ resp.msg = old_resp.msg
+ del resp.headers['Content-encoding']
+ # deflate
+ if resp.headers.get('Content-encoding', '') == 'deflate':
+ gz = io.BytesIO(self.deflate(resp.read()))
+ resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
+ resp.msg = old_resp.msg
+ del resp.headers['Content-encoding']
+ # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
+ # https://github.com/rg3/youtube-dl/issues/6457).
+ if 300 <= resp.code < 400:
+ location = resp.headers.get('Location')
+ if location:
+ # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
+ if sys.version_info >= (3, 0):
+ location = location.encode('iso-8859-1').decode('utf-8')
+ else:
+ location = location.decode('utf-8')
+ location_escaped = escape_url(location)
+ if location != location_escaped:
+ del resp.headers['Location']
+ if sys.version_info < (3, 0):
+ location_escaped = location_escaped.encode('utf-8')
+ resp.headers['Location'] = location_escaped
+ return resp
+
+ https_request = http_request
+ https_response = http_response
+
+
+def make_socks_conn_class(base_class, socks_proxy):
+ assert issubclass(base_class, (
+ compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
+
+ url_components = compat_urlparse.urlparse(socks_proxy)
+ if url_components.scheme.lower() == 'socks5':
+ socks_type = ProxyType.SOCKS5
+ elif url_components.scheme.lower() in ('socks', 'socks4'):
+ socks_type = ProxyType.SOCKS4
+ elif url_components.scheme.lower() == 'socks4a':
+ socks_type = ProxyType.SOCKS4A
+
+ def unquote_if_non_empty(s):
+ if not s:
+ return s
+ return compat_urllib_parse_unquote_plus(s)
+
+ proxy_args = (
+ socks_type,
+ url_components.hostname, url_components.port or 1080,
+ True, # Remote DNS
+ unquote_if_non_empty(url_components.username),
+ unquote_if_non_empty(url_components.password),
+ )
+
+ class SocksConnection(base_class):
+ def connect(self):
+ self.sock = sockssocket()
+ self.sock.setproxy(*proxy_args)
+ if type(self.timeout) in (int, float):
+ self.sock.settimeout(self.timeout)
+ self.sock.connect((self.host, self.port))
+
+ if isinstance(self, compat_http_client.HTTPSConnection):
+ if hasattr(self, '_context'): # Python > 2.6
+ self.sock = self._context.wrap_socket(
+ self.sock, server_hostname=self.host)
+ else:
+ self.sock = ssl.wrap_socket(self.sock)
+
+ return SocksConnection
+
+
+class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
+ def __init__(self, params, https_conn_class=None, *args, **kwargs):
+ compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
+ self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
+ self._params = params
+
+ def https_open(self, req):
+ kwargs = {}
+ conn_class = self._https_conn_class
+
+ if hasattr(self, '_context'): # python > 2.6
+ kwargs['context'] = self._context
+ if hasattr(self, '_check_hostname'): # python 3.x
+ kwargs['check_hostname'] = self._check_hostname
+
+ socks_proxy = req.headers.get('Ytdl-socks-proxy')
+ if socks_proxy:
+ conn_class = make_socks_conn_class(conn_class, socks_proxy)
+ del req.headers['Ytdl-socks-proxy']
+
+ return self.do_open(functools.partial(
+ _create_http_connection, self, conn_class, True),
+ req, **kwargs)
+
+
+class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
+ def __init__(self, cookiejar=None):
+ compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
+
+ def http_response(self, request, response):
+ # Python 2 will choke on next HTTP request in row if there are non-ASCII
+ # characters in Set-Cookie HTTP header of last response (see
+ # https://github.com/rg3/youtube-dl/issues/6769).
+ # In order to at least prevent crashing we will percent encode Set-Cookie
+ # header before HTTPCookieProcessor starts processing it.
+ # if sys.version_info < (3, 0) and response.headers:
+ # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
+ # set_cookie = response.headers.get(set_cookie_header)
+ # if set_cookie:
+ # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
+ # if set_cookie != set_cookie_escaped:
+ # del response.headers[set_cookie_header]
+ # response.headers[set_cookie_header] = set_cookie_escaped
+ return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
+
+ https_request = compat_urllib_request.HTTPCookieProcessor.http_request
+ https_response = http_response
+
+
+def extract_timezone(date_str):
+ m = re.search(
+ r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
+ date_str)
+ if not m:
+ timezone = datetime.timedelta()
+ else:
+ date_str = date_str[:-len(m.group('tz'))]
+ if not m.group('sign'):
+ timezone = datetime.timedelta()
+ else:
+ sign = 1 if m.group('sign') == '+' else -1
+ timezone = datetime.timedelta(
+ hours=sign * int(m.group('hours')),
+ minutes=sign * int(m.group('minutes')))
+ return timezone, date_str
+
+
+def parse_iso8601(date_str, delimiter='T', timezone=None):
+ """ Return a UNIX timestamp from the given date """
+
+ if date_str is None:
+ return None
+
+ date_str = re.sub(r'\.[0-9]+', '', date_str)
+
+ if timezone is None:
+ timezone, date_str = extract_timezone(date_str)
+
+ try:
+ date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
+ dt = datetime.datetime.strptime(date_str, date_format) - timezone
+ return calendar.timegm(dt.timetuple())
+ except ValueError:
+ pass
+
+
+def date_formats(day_first=True):
+ return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
+
+
+def unified_strdate(date_str, day_first=True):
+ """Return a string with the date in the format YYYYMMDD"""
+
+ if date_str is None:
+ return None
+ upload_date = None
+ # Replace commas
+ date_str = date_str.replace(',', ' ')
+ # Remove AM/PM + timezone
+ date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
+ _, date_str = extract_timezone(date_str)
+
+ for expression in date_formats(day_first):
+ try:
+ upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
+ except ValueError:
+ pass
+ if upload_date is None:
+ timetuple = email.utils.parsedate_tz(date_str)
+ if timetuple:
+ try:
+ upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
+ except ValueError:
+ pass
+ if upload_date is not None:
+ return compat_str(upload_date)
+
+
+def unified_timestamp(date_str, day_first=True):
+ if date_str is None:
+ return None
+
+ date_str = re.sub(r'[,|]', '', date_str)
+
+ pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
+ timezone, date_str = extract_timezone(date_str)
+
+ # Remove AM/PM + timezone
+ date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
+
+ # Remove unrecognized timezones from ISO 8601 alike timestamps
+ m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
+ if m:
+ date_str = date_str[:-len(m.group('tz'))]
+
+ # Python only supports microseconds, so remove nanoseconds
+ m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
+ if m:
+ date_str = m.group(1)
+
+ for expression in date_formats(day_first):
+ try:
+ dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
+ return calendar.timegm(dt.timetuple())
+ except ValueError:
+ pass
+ timetuple = email.utils.parsedate_tz(date_str)
+ if timetuple:
+ return calendar.timegm(timetuple) + pm_delta * 3600
+
+
+def determine_ext(url, default_ext='unknown_video'):
+ if url is None or '.' not in url:
+ return default_ext
+ guess = url.partition('?')[0].rpartition('.')[2]
+ if re.match(r'^[A-Za-z0-9]+$', guess):
+ return guess
+ # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
+ elif guess.rstrip('/') in KNOWN_EXTENSIONS:
+ return guess.rstrip('/')
+ else:
+ return default_ext
+
+
+def subtitles_filename(filename, sub_lang, sub_format):
+ return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
+
+
+def date_from_str(date_str):
+ """
+ Return a datetime object from a string in the format YYYYMMDD or
+ (now|today)[+-][0-9](day|week|month|year)(s)?"""
+ today = datetime.date.today()
+ if date_str in ('now', 'today'):
+ return today
+ if date_str == 'yesterday':
+ return today - datetime.timedelta(days=1)
+ match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
+ if match is not None:
+ sign = match.group('sign')
+ time = int(match.group('time'))
+ if sign == '-':
+ time = -time
+ unit = match.group('unit')
+ # A bad approximation?
+ if unit == 'month':
+ unit = 'day'
+ time *= 30
+ elif unit == 'year':
+ unit = 'day'
+ time *= 365
+ unit += 's'
+ delta = datetime.timedelta(**{unit: time})
+ return today + delta
+ return datetime.datetime.strptime(date_str, '%Y%m%d').date()
+
+
+def hyphenate_date(date_str):
+ """
+ Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
+ match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
+ if match is not None:
+ return '-'.join(match.groups())
+ else:
+ return date_str
+
+
+class DateRange(object):
+ """Represents a time interval between two dates"""
+
+ def __init__(self, start=None, end=None):
+ """start and end must be strings in the format accepted by date"""
+ if start is not None:
+ self.start = date_from_str(start)
+ else:
+ self.start = datetime.datetime.min.date()
+ if end is not None:
+ self.end = date_from_str(end)
+ else:
+ self.end = datetime.datetime.max.date()
+ if self.start > self.end:
+ raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
+
+ @classmethod
+ def day(cls, day):
+ """Returns a range that only contains the given day"""
+ return cls(day, day)
+
+ def __contains__(self, date):
+ """Check if the date is in the range"""
+ if not isinstance(date, datetime.date):
+ date = date_from_str(date)
+ return self.start <= date <= self.end
+
+ def __str__(self):
+ return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
+
+
+def platform_name():
+ """ Returns the platform name as a compat_str """
+ res = platform.platform()
+ if isinstance(res, bytes):
+ res = res.decode(preferredencoding())
+
+ assert isinstance(res, compat_str)
+ return res
+
+
+def _windows_write_string(s, out):
+ """ Returns True if the string was written using special methods,
+ False if it has yet to be written out."""
+ # Adapted from http://stackoverflow.com/a/3259271/35070
+
+ import ctypes
+ import ctypes.wintypes
+
+ WIN_OUTPUT_IDS = {
+ 1: -11,
+ 2: -12,
+ }
+
+ try:
+ fileno = out.fileno()
+ except AttributeError:
+ # If the output stream doesn't have a fileno, it's virtual
+ return False
+ except io.UnsupportedOperation:
+ # Some strange Windows pseudo files?
+ return False
+ if fileno not in WIN_OUTPUT_IDS:
+ return False
+
+ GetStdHandle = compat_ctypes_WINFUNCTYPE(
+ ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
+ ('GetStdHandle', ctypes.windll.kernel32))
+ h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
+
+ WriteConsoleW = compat_ctypes_WINFUNCTYPE(
+ ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
+ ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
+ ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
+ written = ctypes.wintypes.DWORD(0)
+
+ GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
+ FILE_TYPE_CHAR = 0x0002
+ FILE_TYPE_REMOTE = 0x8000
+ GetConsoleMode = compat_ctypes_WINFUNCTYPE(
+ ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
+ ctypes.POINTER(ctypes.wintypes.DWORD))(
+ ('GetConsoleMode', ctypes.windll.kernel32))
+ INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
+
+ def not_a_console(handle):
+ if handle == INVALID_HANDLE_VALUE or handle is None:
+ return True
+ return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
+ GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
+
+ if not_a_console(h):
+ return False
+
+ def next_nonbmp_pos(s):
+ try:
+ return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
+ except StopIteration:
+ return len(s)
+
+ while s:
+ count = min(next_nonbmp_pos(s), 1024)
+
+ ret = WriteConsoleW(
+ h, s, count if count else 2, ctypes.byref(written), None)
+ if ret == 0:
+ raise OSError('Failed to write string')
+ if not count: # We just wrote a non-BMP character
+ assert written.value == 2
+ s = s[1:]
+ else:
+ assert written.value > 0
+ s = s[written.value:]
+ return True
+
+
+def write_string(s, out=None, encoding=None):
+ if out is None:
+ out = sys.stderr
+ assert type(s) == compat_str
+
+ if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
+ if _windows_write_string(s, out):
+ return
+
+ if ('b' in getattr(out, 'mode', '') or
+ sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
+ byt = s.encode(encoding or preferredencoding(), 'ignore')
+ out.write(byt)
+ elif hasattr(out, 'buffer'):
+ enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
+ byt = s.encode(enc, 'ignore')
+ out.buffer.write(byt)
+ else:
+ out.write(s)
+ out.flush()
+
+
+def bytes_to_intlist(bs):
+ if not bs:
+ return []
+ if isinstance(bs[0], int): # Python 3
+ return list(bs)
+ else:
+ return [ord(c) for c in bs]
+
+
+def intlist_to_bytes(xs):
+ if not xs:
+ return b''
+ return compat_struct_pack('%dB' % len(xs), *xs)
+
+
+# Cross-platform file locking
+if sys.platform == 'win32':
+ import ctypes.wintypes
+ import msvcrt
+
+ class OVERLAPPED(ctypes.Structure):
+ _fields_ = [
+ ('Internal', ctypes.wintypes.LPVOID),
+ ('InternalHigh', ctypes.wintypes.LPVOID),
+ ('Offset', ctypes.wintypes.DWORD),
+ ('OffsetHigh', ctypes.wintypes.DWORD),
+ ('hEvent', ctypes.wintypes.HANDLE),
+ ]
+
+ kernel32 = ctypes.windll.kernel32
+ LockFileEx = kernel32.LockFileEx
+ LockFileEx.argtypes = [
+ ctypes.wintypes.HANDLE, # hFile
+ ctypes.wintypes.DWORD, # dwFlags
+ ctypes.wintypes.DWORD, # dwReserved
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
+ ctypes.POINTER(OVERLAPPED) # Overlapped
+ ]
+ LockFileEx.restype = ctypes.wintypes.BOOL
+ UnlockFileEx = kernel32.UnlockFileEx
+ UnlockFileEx.argtypes = [
+ ctypes.wintypes.HANDLE, # hFile
+ ctypes.wintypes.DWORD, # dwReserved
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
+ ctypes.POINTER(OVERLAPPED) # Overlapped
+ ]
+ UnlockFileEx.restype = ctypes.wintypes.BOOL
+ whole_low = 0xffffffff
+ whole_high = 0x7fffffff
+
+ def _lock_file(f, exclusive):
+ overlapped = OVERLAPPED()
+ overlapped.Offset = 0
+ overlapped.OffsetHigh = 0
+ overlapped.hEvent = 0
+ f._lock_file_overlapped_p = ctypes.pointer(overlapped)
+ handle = msvcrt.get_osfhandle(f.fileno())
+ if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
+ whole_low, whole_high, f._lock_file_overlapped_p):
+ raise OSError('Locking file failed: %r' % ctypes.FormatError())
+
+ def _unlock_file(f):
+ assert f._lock_file_overlapped_p
+ handle = msvcrt.get_osfhandle(f.fileno())
+ if not UnlockFileEx(handle, 0,
+ whole_low, whole_high, f._lock_file_overlapped_p):
+ raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
+
+else:
+ # Some platforms, such as Jython, is missing fcntl
+ try:
+ import fcntl
+
+ def _lock_file(f, exclusive):
+ fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+
+ def _unlock_file(f):
+ fcntl.flock(f, fcntl.LOCK_UN)
+ except ImportError:
+ UNSUPPORTED_MSG = 'file locking is not supported on this platform'
+
+ def _lock_file(f, exclusive):
+ raise IOError(UNSUPPORTED_MSG)
+
+ def _unlock_file(f):
+ raise IOError(UNSUPPORTED_MSG)
+
+
+class locked_file(object):
+ def __init__(self, filename, mode, encoding=None):
+ assert mode in ['r', 'a', 'w']
+ self.f = io.open(filename, mode, encoding=encoding)
+ self.mode = mode
+
+ def __enter__(self):
+ exclusive = self.mode != 'r'
+ try:
+ _lock_file(self.f, exclusive)
+ except IOError:
+ self.f.close()
+ raise
+ return self
+
+ def __exit__(self, etype, value, traceback):
+ try:
+ _unlock_file(self.f)
+ finally:
+ self.f.close()
+
+ def __iter__(self):
+ return iter(self.f)
+
+ def write(self, *args):
+ return self.f.write(*args)
+
+ def read(self, *args):
+ return self.f.read(*args)
+
+
+def get_filesystem_encoding():
+ encoding = sys.getfilesystemencoding()
+ return encoding if encoding is not None else 'utf-8'
+
+
+def shell_quote(args):
+ quoted_args = []
+ encoding = get_filesystem_encoding()
+ for a in args:
+ if isinstance(a, bytes):
+ # We may get a filename encoded with 'encodeFilename'
+ a = a.decode(encoding)
+ quoted_args.append(compat_shlex_quote(a))
+ return ' '.join(quoted_args)
+
+
+def smuggle_url(url, data):
+ """ Pass additional data in a URL for internal use. """
+
+ url, idata = unsmuggle_url(url, {})
+ data.update(idata)
+ sdata = compat_urllib_parse_urlencode(
+ {'__youtubedl_smuggle': json.dumps(data)})
+ return url + '#' + sdata
+
+
+def unsmuggle_url(smug_url, default=None):
+ if '#__youtubedl_smuggle' not in smug_url:
+ return smug_url, default
+ url, _, sdata = smug_url.rpartition('#')
+ jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
+ data = json.loads(jsond)
+ return url, data
+
+
+def format_bytes(bytes):
+ if bytes is None:
+ return 'N/A'
+ if type(bytes) is str:
+ bytes = float(bytes)
+ if bytes == 0.0:
+ exponent = 0
+ else:
+ exponent = int(math.log(bytes, 1024.0))
+ suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
+ converted = float(bytes) / float(1024 ** exponent)
+ return '%.2f%s' % (converted, suffix)
+
+
+def lookup_unit_table(unit_table, s):
+ units_re = '|'.join(re.escape(u) for u in unit_table)
+ m = re.match(
+ r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
+ if not m:
+ return None
+ num_str = m.group('num').replace(',', '.')
+ mult = unit_table[m.group('unit')]
+ return int(float(num_str) * mult)
+
+
+def parse_filesize(s):
+ if s is None:
+ return None
+
+ # The lower-case forms are of course incorrect and unofficial,
+ # but we support those too
+ _UNIT_TABLE = {
+ 'B': 1,
+ 'b': 1,
+ 'bytes': 1,
+ 'KiB': 1024,
+ 'KB': 1000,
+ 'kB': 1024,
+ 'Kb': 1000,
+ 'kb': 1000,
+ 'kilobytes': 1000,
+ 'kibibytes': 1024,
+ 'MiB': 1024 ** 2,
+ 'MB': 1000 ** 2,
+ 'mB': 1024 ** 2,
+ 'Mb': 1000 ** 2,
+ 'mb': 1000 ** 2,
+ 'megabytes': 1000 ** 2,
+ 'mebibytes': 1024 ** 2,
+ 'GiB': 1024 ** 3,
+ 'GB': 1000 ** 3,
+ 'gB': 1024 ** 3,
+ 'Gb': 1000 ** 3,
+ 'gb': 1000 ** 3,
+ 'gigabytes': 1000 ** 3,
+ 'gibibytes': 1024 ** 3,
+ 'TiB': 1024 ** 4,
+ 'TB': 1000 ** 4,
+ 'tB': 1024 ** 4,
+ 'Tb': 1000 ** 4,
+ 'tb': 1000 ** 4,
+ 'terabytes': 1000 ** 4,
+ 'tebibytes': 1024 ** 4,
+ 'PiB': 1024 ** 5,
+ 'PB': 1000 ** 5,
+ 'pB': 1024 ** 5,
+ 'Pb': 1000 ** 5,
+ 'pb': 1000 ** 5,
+ 'petabytes': 1000 ** 5,
+ 'pebibytes': 1024 ** 5,
+ 'EiB': 1024 ** 6,
+ 'EB': 1000 ** 6,
+ 'eB': 1024 ** 6,
+ 'Eb': 1000 ** 6,
+ 'eb': 1000 ** 6,
+ 'exabytes': 1000 ** 6,
+ 'exbibytes': 1024 ** 6,
+ 'ZiB': 1024 ** 7,
+ 'ZB': 1000 ** 7,
+ 'zB': 1024 ** 7,
+ 'Zb': 1000 ** 7,
+ 'zb': 1000 ** 7,
+ 'zettabytes': 1000 ** 7,
+ 'zebibytes': 1024 ** 7,
+ 'YiB': 1024 ** 8,
+ 'YB': 1000 ** 8,
+ 'yB': 1024 ** 8,
+ 'Yb': 1000 ** 8,
+ 'yb': 1000 ** 8,
+ 'yottabytes': 1000 ** 8,
+ 'yobibytes': 1024 ** 8,
+ }
+
+ return lookup_unit_table(_UNIT_TABLE, s)
+
+
+def parse_count(s):
+ if s is None:
+ return None
+
+ s = s.strip()
+
+ if re.match(r'^[\d,.]+$', s):
+ return str_to_int(s)
+
+ _UNIT_TABLE = {
+ 'k': 1000,
+ 'K': 1000,
+ 'm': 1000 ** 2,
+ 'M': 1000 ** 2,
+ 'kk': 1000 ** 2,
+ 'KK': 1000 ** 2,
+ }
+
+ return lookup_unit_table(_UNIT_TABLE, s)
+
+
+def parse_resolution(s):
+ if s is None:
+ return {}
+
+ mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
+ if mobj:
+ return {
+ 'width': int(mobj.group('w')),
+ 'height': int(mobj.group('h')),
+ }
+
+ mobj = re.search(r'\b(\d+)[pPiI]\b', s)
+ if mobj:
+ return {'height': int(mobj.group(1))}
+
+ mobj = re.search(r'\b([48])[kK]\b', s)
+ if mobj:
+ return {'height': int(mobj.group(1)) * 540}
+
+ return {}
+
+
+def month_by_name(name, lang='en'):
+ """ Return the number of a month by (locale-independently) English name """
+
+ month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
+
+ try:
+ return month_names.index(name) + 1
+ except ValueError:
+ return None
+
+
+def month_by_abbreviation(abbrev):
+ """ Return the number of a month by (locale-independently) English
+ abbreviations """
+
+ try:
+ return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
+ except ValueError:
+ return None
+
+
+def fix_xml_ampersands(xml_str):
+ """Replace all the '&' by '&amp;' in XML"""
+ return re.sub(
+ r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
+ '&amp;',
+ xml_str)
+
+
+def setproctitle(title):
+ assert isinstance(title, compat_str)
+
+ # ctypes in Jython is not complete
+ # http://bugs.jython.org/issue2148
+ if sys.platform.startswith('java'):
+ return
+
+ try:
+ libc = ctypes.cdll.LoadLibrary('libc.so.6')
+ except OSError:
+ return
+ except TypeError:
+ # LoadLibrary in Windows Python 2.7.13 only expects
+ # a bytestring, but since unicode_literals turns
+ # every string into a unicode string, it fails.
+ return
+ title_bytes = title.encode('utf-8')
+ buf = ctypes.create_string_buffer(len(title_bytes))
+ buf.value = title_bytes
+ try:
+ libc.prctl(15, buf, 0, 0, 0)
+ except AttributeError:
+ return # Strange libc, just skip this
+
+
+def remove_start(s, start):
+ return s[len(start):] if s is not None and s.startswith(start) else s
+
+
+def remove_end(s, end):
+ return s[:-len(end)] if s is not None and s.endswith(end) else s
+
+
+def remove_quotes(s):
+ if s is None or len(s) < 2:
+ return s
+ for quote in ('"', "'", ):
+ if s[0] == quote and s[-1] == quote:
+ return s[1:-1]
+ return s
+
+
+def url_basename(url):
+ path = compat_urlparse.urlparse(url).path
+ return path.strip('/').split('/')[-1]
+
+
+def base_url(url):
+ return re.match(r'https?://[^?#&]+/', url).group()
+
+
+def urljoin(base, path):
+ if isinstance(path, bytes):
+ path = path.decode('utf-8')
+ if not isinstance(path, compat_str) or not path:
+ return None
+ if re.match(r'^(?:https?:)?//', path):
+ return path
+ if isinstance(base, bytes):
+ base = base.decode('utf-8')
+ if not isinstance(base, compat_str) or not re.match(
+ r'^(?:https?:)?//', base):
+ return None
+ return compat_urlparse.urljoin(base, path)
+
+
+class HEADRequest(compat_urllib_request.Request):
+ def get_method(self):
+ return 'HEAD'
+
+
+class PUTRequest(compat_urllib_request.Request):
+ def get_method(self):
+ return 'PUT'
+
+
+def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
+ if get_attr:
+ if v is not None:
+ v = getattr(v, get_attr, None)
+ if v == '':
+ v = None
+ if v is None:
+ return default
+ try:
+ return int(v) * invscale // scale
+ except ValueError:
+ return default
+
+
+def str_or_none(v, default=None):
+ return default if v is None else compat_str(v)
+
+
+def str_to_int(int_str):
+ """ A more relaxed version of int_or_none """
+ if int_str is None:
+ return None
+ int_str = re.sub(r'[,\.\+]', '', int_str)
+ return int(int_str)
+
+
+def float_or_none(v, scale=1, invscale=1, default=None):
+ if v is None:
+ return default
+ try:
+ return float(v) * invscale / scale
+ except ValueError:
+ return default
+
+
+def bool_or_none(v, default=None):
+ return v if isinstance(v, bool) else default
+
+
+def strip_or_none(v):
+ return None if v is None else v.strip()
+
+
+def parse_duration(s):
+ if not isinstance(s, compat_basestring):
+ return None
+
+ s = s.strip()
+
+ days, hours, mins, secs, ms = [None] * 5
+ m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
+ if m:
+ days, hours, mins, secs, ms = m.groups()
+ else:
+ m = re.match(
+ r'''(?ix)(?:P?
+ (?:
+ [0-9]+\s*y(?:ears?)?\s*
+ )?
+ (?:
+ [0-9]+\s*m(?:onths?)?\s*
+ )?
+ (?:
+ [0-9]+\s*w(?:eeks?)?\s*
+ )?
+ (?:
+ (?P<days>[0-9]+)\s*d(?:ays?)?\s*
+ )?
+ T)?
+ (?:
+ (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
+ )?
+ (?:
+ (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
+ )?
+ (?:
+ (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
+ )?Z?$''', s)
+ if m:
+ days, hours, mins, secs, ms = m.groups()
+ else:
+ m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
+ if m:
+ hours, mins = m.groups()
+ else:
+ return None
+
+ duration = 0
+ if secs:
+ duration += float(secs)
+ if mins:
+ duration += float(mins) * 60
+ if hours:
+ duration += float(hours) * 60 * 60
+ if days:
+ duration += float(days) * 24 * 60 * 60
+ if ms:
+ duration += float(ms)
+ return duration
+
+
+def prepend_extension(filename, ext, expected_real_ext=None):
+ name, real_ext = os.path.splitext(filename)
+ return (
+ '{0}.{1}{2}'.format(name, ext, real_ext)
+ if not expected_real_ext or real_ext[1:] == expected_real_ext
+ else '{0}.{1}'.format(filename, ext))
+
+
+def replace_extension(filename, ext, expected_real_ext=None):
+ name, real_ext = os.path.splitext(filename)
+ return '{0}.{1}'.format(
+ name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
+ ext)
+
+
+def check_executable(exe, args=[]):
+ """ Checks if the given binary is installed somewhere in PATH, and returns its name.
+ args can be a list of arguments for a short output (like -version) """
+ try:
+ subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
+ except OSError:
+ return False
+ return exe
+
+
+def get_exe_version(exe, args=['--version'],
+ version_re=None, unrecognized='present'):
+ """ Returns the version of the specified executable,
+ or False if the executable is not present """
+ try:
+ # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
+ # SIGTTOU if youtube-dl is run in the background.
+ # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656
+ out, _ = subprocess.Popen(
+ [encodeArgument(exe)] + args,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
+ except OSError:
+ return False
+ if isinstance(out, bytes): # Python 2.x
+ out = out.decode('ascii', 'ignore')
+ return detect_exe_version(out, version_re, unrecognized)
+
+
+def detect_exe_version(output, version_re=None, unrecognized='present'):
+ assert isinstance(output, compat_str)
+ if version_re is None:
+ version_re = r'version\s+([-0-9._a-zA-Z]+)'
+ m = re.search(version_re, output)
+ if m:
+ return m.group(1)
+ else:
+ return unrecognized
+
+
+class PagedList(object):
+ def __len__(self):
+ # This is only useful for tests
+ return len(self.getslice())
+
+
+class OnDemandPagedList(PagedList):
+ def __init__(self, pagefunc, pagesize, use_cache=True):
+ self._pagefunc = pagefunc
+ self._pagesize = pagesize
+ self._use_cache = use_cache
+ if use_cache:
+ self._cache = {}
+
+ def getslice(self, start=0, end=None):
+ res = []
+ for pagenum in itertools.count(start // self._pagesize):
+ firstid = pagenum * self._pagesize
+ nextfirstid = pagenum * self._pagesize + self._pagesize
+ if start >= nextfirstid:
+ continue
+
+ page_results = None
+ if self._use_cache:
+ page_results = self._cache.get(pagenum)
+ if page_results is None:
+ page_results = list(self._pagefunc(pagenum))
+ if self._use_cache:
+ self._cache[pagenum] = page_results
+
+ startv = (
+ start % self._pagesize
+ if firstid <= start < nextfirstid
+ else 0)
+
+ endv = (
+ ((end - 1) % self._pagesize) + 1
+ if (end is not None and firstid <= end <= nextfirstid)
+ else None)
+
+ if startv != 0 or endv is not None:
+ page_results = page_results[startv:endv]
+ res.extend(page_results)
+
+ # A little optimization - if current page is not "full", ie. does
+ # not contain page_size videos then we can assume that this page
+ # is the last one - there are no more ids on further pages -
+ # i.e. no need to query again.
+ if len(page_results) + startv < self._pagesize:
+ break
+
+ # If we got the whole page, but the next page is not interesting,
+ # break out early as well
+ if end == nextfirstid:
+ break
+ return res
+
+
+class InAdvancePagedList(PagedList):
+ def __init__(self, pagefunc, pagecount, pagesize):
+ self._pagefunc = pagefunc
+ self._pagecount = pagecount
+ self._pagesize = pagesize
+
+ def getslice(self, start=0, end=None):
+ res = []
+ start_page = start // self._pagesize
+ end_page = (
+ self._pagecount if end is None else (end // self._pagesize + 1))
+ skip_elems = start - start_page * self._pagesize
+ only_more = None if end is None else end - start
+ for pagenum in range(start_page, end_page):
+ page = list(self._pagefunc(pagenum))
+ if skip_elems:
+ page = page[skip_elems:]
+ skip_elems = None
+ if only_more is not None:
+ if len(page) < only_more:
+ only_more -= len(page)
+ else:
+ page = page[:only_more]
+ res.extend(page)
+ break
+ res.extend(page)
+ return res
+
+
+def uppercase_escape(s):
+ unicode_escape = codecs.getdecoder('unicode_escape')
+ return re.sub(
+ r'\\U[0-9a-fA-F]{8}',
+ lambda m: unicode_escape(m.group(0))[0],
+ s)
+
+
+def lowercase_escape(s):
+ unicode_escape = codecs.getdecoder('unicode_escape')
+ return re.sub(
+ r'\\u[0-9a-fA-F]{4}',
+ lambda m: unicode_escape(m.group(0))[0],
+ s)
+
+
+def escape_rfc3986(s):
+ """Escape non-ASCII characters as suggested by RFC 3986"""
+ if sys.version_info < (3, 0) and isinstance(s, compat_str):
+ s = s.encode('utf-8')
+ return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
+
+
+def escape_url(url):
+ """Escape URL as suggested by RFC 3986"""
+ url_parsed = compat_urllib_parse_urlparse(url)
+ return url_parsed._replace(
+ netloc=url_parsed.netloc.encode('idna').decode('ascii'),
+ path=escape_rfc3986(url_parsed.path),
+ params=escape_rfc3986(url_parsed.params),
+ query=escape_rfc3986(url_parsed.query),
+ fragment=escape_rfc3986(url_parsed.fragment)
+ ).geturl()
+
+
+def read_batch_urls(batch_fd):
+ def fixup(url):
+ if not isinstance(url, compat_str):
+ url = url.decode('utf-8', 'replace')
+ BOM_UTF8 = '\xef\xbb\xbf'
+ if url.startswith(BOM_UTF8):
+ url = url[len(BOM_UTF8):]
+ url = url.strip()
+ if url.startswith(('#', ';', ']')):
+ return False
+ return url
+
+ with contextlib.closing(batch_fd) as fd:
+ return [url for url in map(fixup, fd) if url]
+
+
+def urlencode_postdata(*args, **kargs):
+ return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
+
+
+def update_url_query(url, query):
+ if not query:
+ return url
+ parsed_url = compat_urlparse.urlparse(url)
+ qs = compat_parse_qs(parsed_url.query)
+ qs.update(query)
+ return compat_urlparse.urlunparse(parsed_url._replace(
+ query=compat_urllib_parse_urlencode(qs, True)))
+
+
+def update_Request(req, url=None, data=None, headers={}, query={}):
+ req_headers = req.headers.copy()
+ req_headers.update(headers)
+ req_data = data or req.data
+ req_url = update_url_query(url or req.get_full_url(), query)
+ req_get_method = req.get_method()
+ if req_get_method == 'HEAD':
+ req_type = HEADRequest
+ elif req_get_method == 'PUT':
+ req_type = PUTRequest
+ else:
+ req_type = compat_urllib_request.Request
+ new_req = req_type(
+ req_url, data=req_data, headers=req_headers,
+ origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
+ if hasattr(req, 'timeout'):
+ new_req.timeout = req.timeout
+ return new_req
+
+
+def _multipart_encode_impl(data, boundary):
+ content_type = 'multipart/form-data; boundary=%s' % boundary
+
+ out = b''
+ for k, v in data.items():
+ out += b'--' + boundary.encode('ascii') + b'\r\n'
+ if isinstance(k, compat_str):
+ k = k.encode('utf-8')
+ if isinstance(v, compat_str):
+ v = v.encode('utf-8')
+ # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
+ # suggests sending UTF-8 directly. Firefox sends UTF-8, too
+ content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
+ if boundary.encode('ascii') in content:
+ raise ValueError('Boundary overlaps with data')
+ out += content
+
+ out += b'--' + boundary.encode('ascii') + b'--\r\n'
+
+ return out, content_type
+
+
+def multipart_encode(data, boundary=None):
+ '''
+ Encode a dict to RFC 7578-compliant form-data
+
+ data:
+ A dict where keys and values can be either Unicode or bytes-like
+ objects.
+ boundary:
+ If specified a Unicode object, it's used as the boundary. Otherwise
+ a random boundary is generated.
+
+ Reference: https://tools.ietf.org/html/rfc7578
+ '''
+ has_specified_boundary = boundary is not None
+
+ while True:
+ if boundary is None:
+ boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
+
+ try:
+ out, content_type = _multipart_encode_impl(data, boundary)
+ break
+ except ValueError:
+ if has_specified_boundary:
+ raise
+ boundary = None
+
+ return out, content_type
+
+
+def dict_get(d, key_or_keys, default=None, skip_false_values=True):
+ if isinstance(key_or_keys, (list, tuple)):
+ for key in key_or_keys:
+ if key not in d or d[key] is None or skip_false_values and not d[key]:
+ continue
+ return d[key]
+ return default
+ return d.get(key_or_keys, default)
+
+
+def try_get(src, getter, expected_type=None):
+ if not isinstance(getter, (list, tuple)):
+ getter = [getter]
+ for get in getter:
+ try:
+ v = get(src)
+ except (AttributeError, KeyError, TypeError, IndexError):
+ pass
+ else:
+ if expected_type is None or isinstance(v, expected_type):
+ return v
+
+
+def merge_dicts(*dicts):
+ merged = {}
+ for a_dict in dicts:
+ for k, v in a_dict.items():
+ if v is None:
+ continue
+ if (k not in merged or
+ (isinstance(v, compat_str) and v and
+ isinstance(merged[k], compat_str) and
+ not merged[k])):
+ merged[k] = v
+ return merged
+
+
+def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
+ return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
+
+
+US_RATINGS = {
+ 'G': 0,
+ 'PG': 10,
+ 'PG-13': 13,
+ 'R': 16,
+ 'NC': 18,
+}
+
+
+TV_PARENTAL_GUIDELINES = {
+ 'TV-Y': 0,
+ 'TV-Y7': 7,
+ 'TV-G': 0,
+ 'TV-PG': 0,
+ 'TV-14': 14,
+ 'TV-MA': 17,
+}
+
+
+def parse_age_limit(s):
+ if type(s) == int:
+ return s if 0 <= s <= 21 else None
+ if not isinstance(s, compat_basestring):
+ return None
+ m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
+ if m:
+ return int(m.group('age'))
+ if s in US_RATINGS:
+ return US_RATINGS[s]
+ m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
+ if m:
+ return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
+ return None
+
+
+def strip_jsonp(code):
+ return re.sub(
+ r'''(?sx)^
+ (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+)
+ (?:\s*&&\s*(?P=func_name))?
+ \s*\(\s*(?P<callback_data>.*)\);?
+ \s*?(?://[^\n]*)*$''',
+ r'\g<callback_data>', code)
+
+
+def js_to_json(code):
+ COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
+ SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
+ INTEGER_TABLE = (
+ (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
+ (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
+ )
+
+ def fix_kv(m):
+ v = m.group(0)
+ if v in ('true', 'false', 'null'):
+ return v
+ elif v.startswith('/*') or v.startswith('//') or v == ',':
+ return ""
+
+ if v[0] in ("'", '"'):
+ v = re.sub(r'(?s)\\.|"', lambda m: {
+ '"': '\\"',
+ "\\'": "'",
+ '\\\n': '',
+ '\\x': '\\u00',
+ }.get(m.group(0), m.group(0)), v[1:-1])
+
+ for regex, base in INTEGER_TABLE:
+ im = re.match(regex, v)
+ if im:
+ i = int(im.group(1), base)
+ return '"%d":' % i if v.endswith(':') else '%d' % i
+
+ return '"%s"' % v
+
+ return re.sub(r'''(?sx)
+ "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
+ '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
+ {comment}|,(?={skip}[\]}}])|
+ (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
+ \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
+ [0-9]+(?={skip}:)
+ '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
+
+
+def qualities(quality_ids):
+ """ Get a numeric quality value out of a list of possible values """
+ def q(qid):
+ try:
+ return quality_ids.index(qid)
+ except ValueError:
+ return -1
+ return q
+
+
+DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
+
+
+def limit_length(s, length):
+ """ Add ellipses to overly long strings """
+ if s is None:
+ return None
+ ELLIPSES = '...'
+ if len(s) > length:
+ return s[:length - len(ELLIPSES)] + ELLIPSES
+ return s
+
+
+def version_tuple(v):
+ return tuple(int(e) for e in re.split(r'[-.]', v))
+
+
+def is_outdated_version(version, limit, assume_new=True):
+ if not version:
+ return not assume_new
+ try:
+ return version_tuple(version) < version_tuple(limit)
+ except ValueError:
+ return not assume_new
+
+
+def ytdl_is_updateable():
+ """ Returns if youtube-dl can be updated with -U """
+ from zipimport import zipimporter
+
+ return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
+
+
+def args_to_str(args):
+ # Get a short string representation for a subprocess command
+ return ' '.join(compat_shlex_quote(a) for a in args)
+
+
+def error_to_compat_str(err):
+ err_str = str(err)
+ # On python 2 error byte string must be decoded with proper
+ # encoding rather than ascii
+ if sys.version_info[0] < 3:
+ err_str = err_str.decode(preferredencoding())
+ return err_str
+
+
+def mimetype2ext(mt):
+ if mt is None:
+ return None
+
+ ext = {
+ 'audio/mp4': 'm4a',
+ # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
+ # it's the most popular one
+ 'audio/mpeg': 'mp3',
+ }.get(mt)
+ if ext is not None:
+ return ext
+
+ _, _, res = mt.rpartition('/')
+ res = res.split(';')[0].strip().lower()
+
+ return {
+ '3gpp': '3gp',
+ 'smptett+xml': 'tt',
+ 'ttaf+xml': 'dfxp',
+ 'ttml+xml': 'ttml',
+ 'x-flv': 'flv',
+ 'x-mp4-fragmented': 'mp4',
+ 'x-ms-sami': 'sami',
+ 'x-ms-wmv': 'wmv',
+ 'mpegurl': 'm3u8',
+ 'x-mpegurl': 'm3u8',
+ 'vnd.apple.mpegurl': 'm3u8',
+ 'dash+xml': 'mpd',
+ 'f4m+xml': 'f4m',
+ 'hds+xml': 'f4m',
+ 'vnd.ms-sstr+xml': 'ism',
+ 'quicktime': 'mov',
+ 'mp2t': 'ts',
+ }.get(res, res)
+
+
+def parse_codecs(codecs_str):
+ # http://tools.ietf.org/html/rfc6381
+ if not codecs_str:
+ return {}
+ splited_codecs = list(filter(None, map(
+ lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
+ vcodec, acodec = None, None
+ for full_codec in splited_codecs:
+ codec = full_codec.split('.')[0]
+ if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1'):
+ if not vcodec:
+ vcodec = full_codec
+ elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
+ if not acodec:
+ acodec = full_codec
+ else:
+ write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
+ if not vcodec and not acodec:
+ if len(splited_codecs) == 2:
+ return {
+ 'vcodec': vcodec,
+ 'acodec': acodec,
+ }
+ elif len(splited_codecs) == 1:
+ return {
+ 'vcodec': 'none',
+ 'acodec': vcodec,
+ }
+ else:
+ return {
+ 'vcodec': vcodec or 'none',
+ 'acodec': acodec or 'none',
+ }
+ return {}
+
+
+def urlhandle_detect_ext(url_handle):
+ getheader = url_handle.headers.get
+
+ cd = getheader('Content-Disposition')
+ if cd:
+ m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
+ if m:
+ e = determine_ext(m.group('filename'), default_ext=None)
+ if e:
+ return e
+
+ return mimetype2ext(getheader('Content-Type'))
+
+
+def encode_data_uri(data, mime_type):
+ return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
+
+
+def age_restricted(content_limit, age_limit):
+ """ Returns True iff the content should be blocked """
+
+ if age_limit is None: # No limit set
+ return False
+ if content_limit is None:
+ return False # Content available for everyone
+ return age_limit < content_limit
+
+
+def is_html(first_bytes):
+ """ Detect whether a file contains HTML by examining its first bytes. """
+
+ BOMS = [
+ (b'\xef\xbb\xbf', 'utf-8'),
+ (b'\x00\x00\xfe\xff', 'utf-32-be'),
+ (b'\xff\xfe\x00\x00', 'utf-32-le'),
+ (b'\xff\xfe', 'utf-16-le'),
+ (b'\xfe\xff', 'utf-16-be'),
+ ]
+ for bom, enc in BOMS:
+ if first_bytes.startswith(bom):
+ s = first_bytes[len(bom):].decode(enc, 'replace')
+ break
+ else:
+ s = first_bytes.decode('utf-8', 'replace')
+
+ return re.match(r'^\s*<', s)
+
+
+def determine_protocol(info_dict):
+ protocol = info_dict.get('protocol')
+ if protocol is not None:
+ return protocol
+
+ url = info_dict['url']
+ if url.startswith('rtmp'):
+ return 'rtmp'
+ elif url.startswith('mms'):
+ return 'mms'
+ elif url.startswith('rtsp'):
+ return 'rtsp'
+
+ ext = determine_ext(url)
+ if ext == 'm3u8':
+ return 'm3u8'
+ elif ext == 'f4m':
+ return 'f4m'
+
+ return compat_urllib_parse_urlparse(url).scheme
+
+
+def render_table(header_row, data):
+ """ Render a list of rows, each as a list of values """
+ table = [header_row] + data
+ max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
+ format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
+ return '\n'.join(format_str % tuple(row) for row in table)
+
+
+def _match_one(filter_part, dct):
+ COMPARISON_OPERATORS = {
+ '<': operator.lt,
+ '<=': operator.le,
+ '>': operator.gt,
+ '>=': operator.ge,
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<key>[a-z_]+)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?:
+ (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
+ (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
+ (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
+ )
+ \s*$
+ ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
+ m = operator_rex.search(filter_part)
+ if m:
+ op = COMPARISON_OPERATORS[m.group('op')]
+ actual_value = dct.get(m.group('key'))
+ if (m.group('quotedstrval') is not None or
+ m.group('strval') is not None or
+ # If the original field is a string and matching comparisonvalue is
+ # a number we should respect the origin of the original field
+ # and process comparison value as a string (see
+ # https://github.com/rg3/youtube-dl/issues/11082).
+ actual_value is not None and m.group('intval') is not None and
+ isinstance(actual_value, compat_str)):
+ if m.group('op') not in ('=', '!='):
+ raise ValueError(
+ 'Operator %s does not support string values!' % m.group('op'))
+ comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
+ quote = m.group('quote')
+ if quote is not None:
+ comparison_value = comparison_value.replace(r'\%s' % quote, quote)
+ else:
+ try:
+ comparison_value = int(m.group('intval'))
+ except ValueError:
+ comparison_value = parse_filesize(m.group('intval'))
+ if comparison_value is None:
+ comparison_value = parse_filesize(m.group('intval') + 'B')
+ if comparison_value is None:
+ raise ValueError(
+ 'Invalid integer value %r in filter part %r' % (
+ m.group('intval'), filter_part))
+ if actual_value is None:
+ return m.group('none_inclusive')
+ return op(actual_value, comparison_value)
+
+ UNARY_OPERATORS = {
+ '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
+ '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<op>%s)\s*(?P<key>[a-z_]+)
+ \s*$
+ ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
+ m = operator_rex.search(filter_part)
+ if m:
+ op = UNARY_OPERATORS[m.group('op')]
+ actual_value = dct.get(m.group('key'))
+ return op(actual_value)
+
+ raise ValueError('Invalid filter part %r' % filter_part)
+
+
+def match_str(filter_str, dct):
+ """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
+
+ return all(
+ _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
+
+
+def match_filter_func(filter_str):
+ def _match_func(info_dict):
+ if match_str(filter_str, info_dict):
+ return None
+ else:
+ video_title = info_dict.get('title', info_dict.get('id', 'video'))
+ return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
+ return _match_func
+
+
+def parse_dfxp_time_expr(time_expr):
+ if not time_expr:
+ return
+
+ mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
+ if mobj:
+ return float(mobj.group('time_offset'))
+
+ mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
+ if mobj:
+ return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
+
+
+def srt_subtitles_timecode(seconds):
+ return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
+
+
+def dfxp2srt(dfxp_data):
+ '''
+ @param dfxp_data A bytes-like object containing DFXP data
+ @returns A unicode object containing converted SRT data
+ '''
+ LEGACY_NAMESPACES = (
+ (b'http://www.w3.org/ns/ttml', [
+ b'http://www.w3.org/2004/11/ttaf1',
+ b'http://www.w3.org/2006/04/ttaf1',
+ b'http://www.w3.org/2006/10/ttaf1',
+ ]),
+ (b'http://www.w3.org/ns/ttml#styling', [
+ b'http://www.w3.org/ns/ttml#style',
+ ]),
+ )
+
+ SUPPORTED_STYLING = [
+ 'color',
+ 'fontFamily',
+ 'fontSize',
+ 'fontStyle',
+ 'fontWeight',
+ 'textDecoration'
+ ]
+
+ _x = functools.partial(xpath_with_ns, ns_map={
+ 'xml': 'http://www.w3.org/XML/1998/namespace',
+ 'ttml': 'http://www.w3.org/ns/ttml',
+ 'tts': 'http://www.w3.org/ns/ttml#styling',
+ })
+
+ styles = {}
+ default_style = {}
+
+ class TTMLPElementParser(object):
+ _out = ''
+ _unclosed_elements = []
+ _applied_styles = []
+
+ def start(self, tag, attrib):
+ if tag in (_x('ttml:br'), 'br'):
+ self._out += '\n'
+ else:
+ unclosed_elements = []
+ style = {}
+ element_style_id = attrib.get('style')
+ if default_style:
+ style.update(default_style)
+ if element_style_id:
+ style.update(styles.get(element_style_id, {}))
+ for prop in SUPPORTED_STYLING:
+ prop_val = attrib.get(_x('tts:' + prop))
+ if prop_val:
+ style[prop] = prop_val
+ if style:
+ font = ''
+ for k, v in sorted(style.items()):
+ if self._applied_styles and self._applied_styles[-1].get(k) == v:
+ continue
+ if k == 'color':
+ font += ' color="%s"' % v
+ elif k == 'fontSize':
+ font += ' size="%s"' % v
+ elif k == 'fontFamily':
+ font += ' face="%s"' % v
+ elif k == 'fontWeight' and v == 'bold':
+ self._out += '<b>'
+ unclosed_elements.append('b')
+ elif k == 'fontStyle' and v == 'italic':
+ self._out += '<i>'
+ unclosed_elements.append('i')
+ elif k == 'textDecoration' and v == 'underline':
+ self._out += '<u>'
+ unclosed_elements.append('u')
+ if font:
+ self._out += '<font' + font + '>'
+ unclosed_elements.append('font')
+ applied_style = {}
+ if self._applied_styles:
+ applied_style.update(self._applied_styles[-1])
+ applied_style.update(style)
+ self._applied_styles.append(applied_style)
+ self._unclosed_elements.append(unclosed_elements)
+
+ def end(self, tag):
+ if tag not in (_x('ttml:br'), 'br'):
+ unclosed_elements = self._unclosed_elements.pop()
+ for element in reversed(unclosed_elements):
+ self._out += '</%s>' % element
+ if unclosed_elements and self._applied_styles:
+ self._applied_styles.pop()
+
+ def data(self, data):
+ self._out += data
+
+ def close(self):
+ return self._out.strip()
+
+ def parse_node(node):
+ target = TTMLPElementParser()
+ parser = xml.etree.ElementTree.XMLParser(target=target)
+ parser.feed(xml.etree.ElementTree.tostring(node))
+ return parser.close()
+
+ for k, v in LEGACY_NAMESPACES:
+ for ns in v:
+ dfxp_data = dfxp_data.replace(ns, k)
+
+ dfxp = compat_etree_fromstring(dfxp_data)
+ out = []
+ paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
+
+ if not paras:
+ raise ValueError('Invalid dfxp/TTML subtitle')
+
+ repeat = False
+ while True:
+ for style in dfxp.findall(_x('.//ttml:style')):
+ style_id = style.get('id') or style.get(_x('xml:id'))
+ if not style_id:
+ continue
+ parent_style_id = style.get('style')
+ if parent_style_id:
+ if parent_style_id not in styles:
+ repeat = True
+ continue
+ styles[style_id] = styles[parent_style_id].copy()
+ for prop in SUPPORTED_STYLING:
+ prop_val = style.get(_x('tts:' + prop))
+ if prop_val:
+ styles.setdefault(style_id, {})[prop] = prop_val
+ if repeat:
+ repeat = False
+ else:
+ break
+
+ for p in ('body', 'div'):
+ ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
+ if ele is None:
+ continue
+ style = styles.get(ele.get('style'))
+ if not style:
+ continue
+ default_style.update(style)
+
+ for para, index in zip(paras, itertools.count(1)):
+ begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
+ end_time = parse_dfxp_time_expr(para.attrib.get('end'))
+ dur = parse_dfxp_time_expr(para.attrib.get('dur'))
+ if begin_time is None:
+ continue
+ if not end_time:
+ if not dur:
+ continue
+ end_time = begin_time + dur
+ out.append('%d\n%s --> %s\n%s\n\n' % (
+ index,
+ srt_subtitles_timecode(begin_time),
+ srt_subtitles_timecode(end_time),
+ parse_node(para)))
+
+ return ''.join(out)
+
+
+def cli_option(params, command_option, param):
+ param = params.get(param)
+ if param:
+ param = compat_str(param)
+ return [command_option, param] if param is not None else []
+
+
+def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
+ param = params.get(param)
+ if param is None:
+ return []
+ assert isinstance(param, bool)
+ if separator:
+ return [command_option + separator + (true_value if param else false_value)]
+ return [command_option, true_value if param else false_value]
+
+
+def cli_valueless_option(params, command_option, param, expected_value=True):
+ param = params.get(param)
+ return [command_option] if param == expected_value else []
+
+
+def cli_configuration_args(params, param, default=[]):
+ ex_args = params.get(param)
+ if ex_args is None:
+ return default
+ assert isinstance(ex_args, list)
+ return ex_args
+
+
+class ISO639Utils(object):
+ # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
+ _lang_map = {
+ 'aa': 'aar',
+ 'ab': 'abk',
+ 'ae': 'ave',
+ 'af': 'afr',
+ 'ak': 'aka',
+ 'am': 'amh',
+ 'an': 'arg',
+ 'ar': 'ara',
+ 'as': 'asm',
+ 'av': 'ava',
+ 'ay': 'aym',
+ 'az': 'aze',
+ 'ba': 'bak',
+ 'be': 'bel',
+ 'bg': 'bul',
+ 'bh': 'bih',
+ 'bi': 'bis',
+ 'bm': 'bam',
+ 'bn': 'ben',
+ 'bo': 'bod',
+ 'br': 'bre',
+ 'bs': 'bos',
+ 'ca': 'cat',
+ 'ce': 'che',
+ 'ch': 'cha',
+ 'co': 'cos',
+ 'cr': 'cre',
+ 'cs': 'ces',
+ 'cu': 'chu',
+ 'cv': 'chv',
+ 'cy': 'cym',
+ 'da': 'dan',
+ 'de': 'deu',
+ 'dv': 'div',
+ 'dz': 'dzo',
+ 'ee': 'ewe',
+ 'el': 'ell',
+ 'en': 'eng',
+ 'eo': 'epo',
+ 'es': 'spa',
+ 'et': 'est',
+ 'eu': 'eus',
+ 'fa': 'fas',
+ 'ff': 'ful',
+ 'fi': 'fin',
+ 'fj': 'fij',
+ 'fo': 'fao',
+ 'fr': 'fra',
+ 'fy': 'fry',
+ 'ga': 'gle',
+ 'gd': 'gla',
+ 'gl': 'glg',
+ 'gn': 'grn',
+ 'gu': 'guj',
+ 'gv': 'glv',
+ 'ha': 'hau',
+ 'he': 'heb',
+ 'hi': 'hin',
+ 'ho': 'hmo',
+ 'hr': 'hrv',
+ 'ht': 'hat',
+ 'hu': 'hun',
+ 'hy': 'hye',
+ 'hz': 'her',
+ 'ia': 'ina',
+ 'id': 'ind',
+ 'ie': 'ile',
+ 'ig': 'ibo',
+ 'ii': 'iii',
+ 'ik': 'ipk',
+ 'io': 'ido',
+ 'is': 'isl',
+ 'it': 'ita',
+ 'iu': 'iku',
+ 'ja': 'jpn',
+ 'jv': 'jav',
+ 'ka': 'kat',
+ 'kg': 'kon',
+ 'ki': 'kik',
+ 'kj': 'kua',
+ 'kk': 'kaz',
+ 'kl': 'kal',
+ 'km': 'khm',
+ 'kn': 'kan',
+ 'ko': 'kor',
+ 'kr': 'kau',
+ 'ks': 'kas',
+ 'ku': 'kur',
+ 'kv': 'kom',
+ 'kw': 'cor',
+ 'ky': 'kir',
+ 'la': 'lat',
+ 'lb': 'ltz',
+ 'lg': 'lug',
+ 'li': 'lim',
+ 'ln': 'lin',
+ 'lo': 'lao',
+ 'lt': 'lit',
+ 'lu': 'lub',
+ 'lv': 'lav',
+ 'mg': 'mlg',
+ 'mh': 'mah',
+ 'mi': 'mri',
+ 'mk': 'mkd',
+ 'ml': 'mal',
+ 'mn': 'mon',
+ 'mr': 'mar',
+ 'ms': 'msa',
+ 'mt': 'mlt',
+ 'my': 'mya',
+ 'na': 'nau',
+ 'nb': 'nob',
+ 'nd': 'nde',
+ 'ne': 'nep',
+ 'ng': 'ndo',
+ 'nl': 'nld',
+ 'nn': 'nno',
+ 'no': 'nor',
+ 'nr': 'nbl',
+ 'nv': 'nav',
+ 'ny': 'nya',
+ 'oc': 'oci',
+ 'oj': 'oji',
+ 'om': 'orm',
+ 'or': 'ori',
+ 'os': 'oss',
+ 'pa': 'pan',
+ 'pi': 'pli',
+ 'pl': 'pol',
+ 'ps': 'pus',
+ 'pt': 'por',
+ 'qu': 'que',
+ 'rm': 'roh',
+ 'rn': 'run',
+ 'ro': 'ron',
+ 'ru': 'rus',
+ 'rw': 'kin',
+ 'sa': 'san',
+ 'sc': 'srd',
+ 'sd': 'snd',
+ 'se': 'sme',
+ 'sg': 'sag',
+ 'si': 'sin',
+ 'sk': 'slk',
+ 'sl': 'slv',
+ 'sm': 'smo',
+ 'sn': 'sna',
+ 'so': 'som',
+ 'sq': 'sqi',
+ 'sr': 'srp',
+ 'ss': 'ssw',
+ 'st': 'sot',
+ 'su': 'sun',
+ 'sv': 'swe',
+ 'sw': 'swa',
+ 'ta': 'tam',
+ 'te': 'tel',
+ 'tg': 'tgk',
+ 'th': 'tha',
+ 'ti': 'tir',
+ 'tk': 'tuk',
+ 'tl': 'tgl',
+ 'tn': 'tsn',
+ 'to': 'ton',
+ 'tr': 'tur',
+ 'ts': 'tso',
+ 'tt': 'tat',
+ 'tw': 'twi',
+ 'ty': 'tah',
+ 'ug': 'uig',
+ 'uk': 'ukr',
+ 'ur': 'urd',
+ 'uz': 'uzb',
+ 've': 'ven',
+ 'vi': 'vie',
+ 'vo': 'vol',
+ 'wa': 'wln',
+ 'wo': 'wol',
+ 'xh': 'xho',
+ 'yi': 'yid',
+ 'yo': 'yor',
+ 'za': 'zha',
+ 'zh': 'zho',
+ 'zu': 'zul',
+ }
+
+ @classmethod
+ def short2long(cls, code):
+ """Convert language code from ISO 639-1 to ISO 639-2/T"""
+ return cls._lang_map.get(code[:2])
+
+ @classmethod
+ def long2short(cls, code):
+ """Convert language code from ISO 639-2/T to ISO 639-1"""
+ for short_name, long_name in cls._lang_map.items():
+ if long_name == code:
+ return short_name
+
+
+class ISO3166Utils(object):
+ # From http://data.okfn.org/data/core/country-list
+ _country_map = {
+ 'AF': 'Afghanistan',
+ 'AX': 'Åland Islands',
+ 'AL': 'Albania',
+ 'DZ': 'Algeria',
+ 'AS': 'American Samoa',
+ 'AD': 'Andorra',
+ 'AO': 'Angola',
+ 'AI': 'Anguilla',
+ 'AQ': 'Antarctica',
+ 'AG': 'Antigua and Barbuda',
+ 'AR': 'Argentina',
+ 'AM': 'Armenia',
+ 'AW': 'Aruba',
+ 'AU': 'Australia',
+ 'AT': 'Austria',
+ 'AZ': 'Azerbaijan',
+ 'BS': 'Bahamas',
+ 'BH': 'Bahrain',
+ 'BD': 'Bangladesh',
+ 'BB': 'Barbados',
+ 'BY': 'Belarus',
+ 'BE': 'Belgium',
+ 'BZ': 'Belize',
+ 'BJ': 'Benin',
+ 'BM': 'Bermuda',
+ 'BT': 'Bhutan',
+ 'BO': 'Bolivia, Plurinational State of',
+ 'BQ': 'Bonaire, Sint Eustatius and Saba',
+ 'BA': 'Bosnia and Herzegovina',
+ 'BW': 'Botswana',
+ 'BV': 'Bouvet Island',
+ 'BR': 'Brazil',
+ 'IO': 'British Indian Ocean Territory',
+ 'BN': 'Brunei Darussalam',
+ 'BG': 'Bulgaria',
+ 'BF': 'Burkina Faso',
+ 'BI': 'Burundi',
+ 'KH': 'Cambodia',
+ 'CM': 'Cameroon',
+ 'CA': 'Canada',
+ 'CV': 'Cape Verde',
+ 'KY': 'Cayman Islands',
+ 'CF': 'Central African Republic',
+ 'TD': 'Chad',
+ 'CL': 'Chile',
+ 'CN': 'China',
+ 'CX': 'Christmas Island',
+ 'CC': 'Cocos (Keeling) Islands',
+ 'CO': 'Colombia',
+ 'KM': 'Comoros',
+ 'CG': 'Congo',
+ 'CD': 'Congo, the Democratic Republic of the',
+ 'CK': 'Cook Islands',
+ 'CR': 'Costa Rica',
+ 'CI': 'Côte d\'Ivoire',
+ 'HR': 'Croatia',
+ 'CU': 'Cuba',
+ 'CW': 'Curaçao',
+ 'CY': 'Cyprus',
+ 'CZ': 'Czech Republic',
+ 'DK': 'Denmark',
+ 'DJ': 'Djibouti',
+ 'DM': 'Dominica',
+ 'DO': 'Dominican Republic',
+ 'EC': 'Ecuador',
+ 'EG': 'Egypt',
+ 'SV': 'El Salvador',
+ 'GQ': 'Equatorial Guinea',
+ 'ER': 'Eritrea',
+ 'EE': 'Estonia',
+ 'ET': 'Ethiopia',
+ 'FK': 'Falkland Islands (Malvinas)',
+ 'FO': 'Faroe Islands',
+ 'FJ': 'Fiji',
+ 'FI': 'Finland',
+ 'FR': 'France',
+ 'GF': 'French Guiana',
+ 'PF': 'French Polynesia',
+ 'TF': 'French Southern Territories',
+ 'GA': 'Gabon',
+ 'GM': 'Gambia',
+ 'GE': 'Georgia',
+ 'DE': 'Germany',
+ 'GH': 'Ghana',
+ 'GI': 'Gibraltar',
+ 'GR': 'Greece',
+ 'GL': 'Greenland',
+ 'GD': 'Grenada',
+ 'GP': 'Guadeloupe',
+ 'GU': 'Guam',
+ 'GT': 'Guatemala',
+ 'GG': 'Guernsey',
+ 'GN': 'Guinea',
+ 'GW': 'Guinea-Bissau',
+ 'GY': 'Guyana',
+ 'HT': 'Haiti',
+ 'HM': 'Heard Island and McDonald Islands',
+ 'VA': 'Holy See (Vatican City State)',
+ 'HN': 'Honduras',
+ 'HK': 'Hong Kong',
+ 'HU': 'Hungary',
+ 'IS': 'Iceland',
+ 'IN': 'India',
+ 'ID': 'Indonesia',
+ 'IR': 'Iran, Islamic Republic of',
+ 'IQ': 'Iraq',
+ 'IE': 'Ireland',
+ 'IM': 'Isle of Man',
+ 'IL': 'Israel',
+ 'IT': 'Italy',
+ 'JM': 'Jamaica',
+ 'JP': 'Japan',
+ 'JE': 'Jersey',
+ 'JO': 'Jordan',
+ 'KZ': 'Kazakhstan',
+ 'KE': 'Kenya',
+ 'KI': 'Kiribati',
+ 'KP': 'Korea, Democratic People\'s Republic of',
+ 'KR': 'Korea, Republic of',
+ 'KW': 'Kuwait',
+ 'KG': 'Kyrgyzstan',
+ 'LA': 'Lao People\'s Democratic Republic',
+ 'LV': 'Latvia',
+ 'LB': 'Lebanon',
+ 'LS': 'Lesotho',
+ 'LR': 'Liberia',
+ 'LY': 'Libya',
+ 'LI': 'Liechtenstein',
+ 'LT': 'Lithuania',
+ 'LU': 'Luxembourg',
+ 'MO': 'Macao',
+ 'MK': 'Macedonia, the Former Yugoslav Republic of',
+ 'MG': 'Madagascar',
+ 'MW': 'Malawi',
+ 'MY': 'Malaysia',
+ 'MV': 'Maldives',
+ 'ML': 'Mali',
+ 'MT': 'Malta',
+ 'MH': 'Marshall Islands',
+ 'MQ': 'Martinique',
+ 'MR': 'Mauritania',
+ 'MU': 'Mauritius',
+ 'YT': 'Mayotte',
+ 'MX': 'Mexico',
+ 'FM': 'Micronesia, Federated States of',
+ 'MD': 'Moldova, Republic of',
+ 'MC': 'Monaco',
+ 'MN': 'Mongolia',
+ 'ME': 'Montenegro',
+ 'MS': 'Montserrat',
+ 'MA': 'Morocco',
+ 'MZ': 'Mozambique',
+ 'MM': 'Myanmar',
+ 'NA': 'Namibia',
+ 'NR': 'Nauru',
+ 'NP': 'Nepal',
+ 'NL': 'Netherlands',
+ 'NC': 'New Caledonia',
+ 'NZ': 'New Zealand',
+ 'NI': 'Nicaragua',
+ 'NE': 'Niger',
+ 'NG': 'Nigeria',
+ 'NU': 'Niue',
+ 'NF': 'Norfolk Island',
+ 'MP': 'Northern Mariana Islands',
+ 'NO': 'Norway',
+ 'OM': 'Oman',
+ 'PK': 'Pakistan',
+ 'PW': 'Palau',
+ 'PS': 'Palestine, State of',
+ 'PA': 'Panama',
+ 'PG': 'Papua New Guinea',
+ 'PY': 'Paraguay',
+ 'PE': 'Peru',
+ 'PH': 'Philippines',
+ 'PN': 'Pitcairn',
+ 'PL': 'Poland',
+ 'PT': 'Portugal',
+ 'PR': 'Puerto Rico',
+ 'QA': 'Qatar',
+ 'RE': 'Réunion',
+ 'RO': 'Romania',
+ 'RU': 'Russian Federation',
+ 'RW': 'Rwanda',
+ 'BL': 'Saint Barthélemy',
+ 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
+ 'KN': 'Saint Kitts and Nevis',
+ 'LC': 'Saint Lucia',
+ 'MF': 'Saint Martin (French part)',
+ 'PM': 'Saint Pierre and Miquelon',
+ 'VC': 'Saint Vincent and the Grenadines',
+ 'WS': 'Samoa',
+ 'SM': 'San Marino',
+ 'ST': 'Sao Tome and Principe',
+ 'SA': 'Saudi Arabia',
+ 'SN': 'Senegal',
+ 'RS': 'Serbia',
+ 'SC': 'Seychelles',
+ 'SL': 'Sierra Leone',
+ 'SG': 'Singapore',
+ 'SX': 'Sint Maarten (Dutch part)',
+ 'SK': 'Slovakia',
+ 'SI': 'Slovenia',
+ 'SB': 'Solomon Islands',
+ 'SO': 'Somalia',
+ 'ZA': 'South Africa',
+ 'GS': 'South Georgia and the South Sandwich Islands',
+ 'SS': 'South Sudan',
+ 'ES': 'Spain',
+ 'LK': 'Sri Lanka',
+ 'SD': 'Sudan',
+ 'SR': 'Suriname',
+ 'SJ': 'Svalbard and Jan Mayen',
+ 'SZ': 'Swaziland',
+ 'SE': 'Sweden',
+ 'CH': 'Switzerland',
+ 'SY': 'Syrian Arab Republic',
+ 'TW': 'Taiwan, Province of China',
+ 'TJ': 'Tajikistan',
+ 'TZ': 'Tanzania, United Republic of',
+ 'TH': 'Thailand',
+ 'TL': 'Timor-Leste',
+ 'TG': 'Togo',
+ 'TK': 'Tokelau',
+ 'TO': 'Tonga',
+ 'TT': 'Trinidad and Tobago',
+ 'TN': 'Tunisia',
+ 'TR': 'Turkey',
+ 'TM': 'Turkmenistan',
+ 'TC': 'Turks and Caicos Islands',
+ 'TV': 'Tuvalu',
+ 'UG': 'Uganda',
+ 'UA': 'Ukraine',
+ 'AE': 'United Arab Emirates',
+ 'GB': 'United Kingdom',
+ 'US': 'United States',
+ 'UM': 'United States Minor Outlying Islands',
+ 'UY': 'Uruguay',
+ 'UZ': 'Uzbekistan',
+ 'VU': 'Vanuatu',
+ 'VE': 'Venezuela, Bolivarian Republic of',
+ 'VN': 'Viet Nam',
+ 'VG': 'Virgin Islands, British',
+ 'VI': 'Virgin Islands, U.S.',
+ 'WF': 'Wallis and Futuna',
+ 'EH': 'Western Sahara',
+ 'YE': 'Yemen',
+ 'ZM': 'Zambia',
+ 'ZW': 'Zimbabwe',
+ }
+
+ @classmethod
+ def short2full(cls, code):
+ """Convert an ISO 3166-2 country code to the corresponding full name"""
+ return cls._country_map.get(code.upper())
+
+
+class GeoUtils(object):
+ # Major IPv4 address blocks per country
+ _country_ip_map = {
+ 'AD': '85.94.160.0/19',
+ 'AE': '94.200.0.0/13',
+ 'AF': '149.54.0.0/17',
+ 'AG': '209.59.64.0/18',
+ 'AI': '204.14.248.0/21',
+ 'AL': '46.99.0.0/16',
+ 'AM': '46.70.0.0/15',
+ 'AO': '105.168.0.0/13',
+ 'AP': '159.117.192.0/21',
+ 'AR': '181.0.0.0/12',
+ 'AS': '202.70.112.0/20',
+ 'AT': '84.112.0.0/13',
+ 'AU': '1.128.0.0/11',
+ 'AW': '181.41.0.0/18',
+ 'AZ': '5.191.0.0/16',
+ 'BA': '31.176.128.0/17',
+ 'BB': '65.48.128.0/17',
+ 'BD': '114.130.0.0/16',
+ 'BE': '57.0.0.0/8',
+ 'BF': '129.45.128.0/17',
+ 'BG': '95.42.0.0/15',
+ 'BH': '37.131.0.0/17',
+ 'BI': '154.117.192.0/18',
+ 'BJ': '137.255.0.0/16',
+ 'BL': '192.131.134.0/24',
+ 'BM': '196.12.64.0/18',
+ 'BN': '156.31.0.0/16',
+ 'BO': '161.56.0.0/16',
+ 'BQ': '161.0.80.0/20',
+ 'BR': '152.240.0.0/12',
+ 'BS': '24.51.64.0/18',
+ 'BT': '119.2.96.0/19',
+ 'BW': '168.167.0.0/16',
+ 'BY': '178.120.0.0/13',
+ 'BZ': '179.42.192.0/18',
+ 'CA': '99.224.0.0/11',
+ 'CD': '41.243.0.0/16',
+ 'CF': '196.32.200.0/21',
+ 'CG': '197.214.128.0/17',
+ 'CH': '85.0.0.0/13',
+ 'CI': '154.232.0.0/14',
+ 'CK': '202.65.32.0/19',
+ 'CL': '152.172.0.0/14',
+ 'CM': '165.210.0.0/15',
+ 'CN': '36.128.0.0/10',
+ 'CO': '181.240.0.0/12',
+ 'CR': '201.192.0.0/12',
+ 'CU': '152.206.0.0/15',
+ 'CV': '165.90.96.0/19',
+ 'CW': '190.88.128.0/17',
+ 'CY': '46.198.0.0/15',
+ 'CZ': '88.100.0.0/14',
+ 'DE': '53.0.0.0/8',
+ 'DJ': '197.241.0.0/17',
+ 'DK': '87.48.0.0/12',
+ 'DM': '192.243.48.0/20',
+ 'DO': '152.166.0.0/15',
+ 'DZ': '41.96.0.0/12',
+ 'EC': '186.68.0.0/15',
+ 'EE': '90.190.0.0/15',
+ 'EG': '156.160.0.0/11',
+ 'ER': '196.200.96.0/20',
+ 'ES': '88.0.0.0/11',
+ 'ET': '196.188.0.0/14',
+ 'EU': '2.16.0.0/13',
+ 'FI': '91.152.0.0/13',
+ 'FJ': '144.120.0.0/16',
+ 'FM': '119.252.112.0/20',
+ 'FO': '88.85.32.0/19',
+ 'FR': '90.0.0.0/9',
+ 'GA': '41.158.0.0/15',
+ 'GB': '25.0.0.0/8',
+ 'GD': '74.122.88.0/21',
+ 'GE': '31.146.0.0/16',
+ 'GF': '161.22.64.0/18',
+ 'GG': '62.68.160.0/19',
+ 'GH': '45.208.0.0/14',
+ 'GI': '85.115.128.0/19',
+ 'GL': '88.83.0.0/19',
+ 'GM': '160.182.0.0/15',
+ 'GN': '197.149.192.0/18',
+ 'GP': '104.250.0.0/19',
+ 'GQ': '105.235.224.0/20',
+ 'GR': '94.64.0.0/13',
+ 'GT': '168.234.0.0/16',
+ 'GU': '168.123.0.0/16',
+ 'GW': '197.214.80.0/20',
+ 'GY': '181.41.64.0/18',
+ 'HK': '113.252.0.0/14',
+ 'HN': '181.210.0.0/16',
+ 'HR': '93.136.0.0/13',
+ 'HT': '148.102.128.0/17',
+ 'HU': '84.0.0.0/14',
+ 'ID': '39.192.0.0/10',
+ 'IE': '87.32.0.0/12',
+ 'IL': '79.176.0.0/13',
+ 'IM': '5.62.80.0/20',
+ 'IN': '117.192.0.0/10',
+ 'IO': '203.83.48.0/21',
+ 'IQ': '37.236.0.0/14',
+ 'IR': '2.176.0.0/12',
+ 'IS': '82.221.0.0/16',
+ 'IT': '79.0.0.0/10',
+ 'JE': '87.244.64.0/18',
+ 'JM': '72.27.0.0/17',
+ 'JO': '176.29.0.0/16',
+ 'JP': '126.0.0.0/8',
+ 'KE': '105.48.0.0/12',
+ 'KG': '158.181.128.0/17',
+ 'KH': '36.37.128.0/17',
+ 'KI': '103.25.140.0/22',
+ 'KM': '197.255.224.0/20',
+ 'KN': '198.32.32.0/19',
+ 'KP': '175.45.176.0/22',
+ 'KR': '175.192.0.0/10',
+ 'KW': '37.36.0.0/14',
+ 'KY': '64.96.0.0/15',
+ 'KZ': '2.72.0.0/13',
+ 'LA': '115.84.64.0/18',
+ 'LB': '178.135.0.0/16',
+ 'LC': '192.147.231.0/24',
+ 'LI': '82.117.0.0/19',
+ 'LK': '112.134.0.0/15',
+ 'LR': '41.86.0.0/19',
+ 'LS': '129.232.0.0/17',
+ 'LT': '78.56.0.0/13',
+ 'LU': '188.42.0.0/16',
+ 'LV': '46.109.0.0/16',
+ 'LY': '41.252.0.0/14',
+ 'MA': '105.128.0.0/11',
+ 'MC': '88.209.64.0/18',
+ 'MD': '37.246.0.0/16',
+ 'ME': '178.175.0.0/17',
+ 'MF': '74.112.232.0/21',
+ 'MG': '154.126.0.0/17',
+ 'MH': '117.103.88.0/21',
+ 'MK': '77.28.0.0/15',
+ 'ML': '154.118.128.0/18',
+ 'MM': '37.111.0.0/17',
+ 'MN': '49.0.128.0/17',
+ 'MO': '60.246.0.0/16',
+ 'MP': '202.88.64.0/20',
+ 'MQ': '109.203.224.0/19',
+ 'MR': '41.188.64.0/18',
+ 'MS': '208.90.112.0/22',
+ 'MT': '46.11.0.0/16',
+ 'MU': '105.16.0.0/12',
+ 'MV': '27.114.128.0/18',
+ 'MW': '105.234.0.0/16',
+ 'MX': '187.192.0.0/11',
+ 'MY': '175.136.0.0/13',
+ 'MZ': '197.218.0.0/15',
+ 'NA': '41.182.0.0/16',
+ 'NC': '101.101.0.0/18',
+ 'NE': '197.214.0.0/18',
+ 'NF': '203.17.240.0/22',
+ 'NG': '105.112.0.0/12',
+ 'NI': '186.76.0.0/15',
+ 'NL': '145.96.0.0/11',
+ 'NO': '84.208.0.0/13',
+ 'NP': '36.252.0.0/15',
+ 'NR': '203.98.224.0/19',
+ 'NU': '49.156.48.0/22',
+ 'NZ': '49.224.0.0/14',
+ 'OM': '5.36.0.0/15',
+ 'PA': '186.72.0.0/15',
+ 'PE': '186.160.0.0/14',
+ 'PF': '123.50.64.0/18',
+ 'PG': '124.240.192.0/19',
+ 'PH': '49.144.0.0/13',
+ 'PK': '39.32.0.0/11',
+ 'PL': '83.0.0.0/11',
+ 'PM': '70.36.0.0/20',
+ 'PR': '66.50.0.0/16',
+ 'PS': '188.161.0.0/16',
+ 'PT': '85.240.0.0/13',
+ 'PW': '202.124.224.0/20',
+ 'PY': '181.120.0.0/14',
+ 'QA': '37.210.0.0/15',
+ 'RE': '139.26.0.0/16',
+ 'RO': '79.112.0.0/13',
+ 'RS': '178.220.0.0/14',
+ 'RU': '5.136.0.0/13',
+ 'RW': '105.178.0.0/15',
+ 'SA': '188.48.0.0/13',
+ 'SB': '202.1.160.0/19',
+ 'SC': '154.192.0.0/11',
+ 'SD': '154.96.0.0/13',
+ 'SE': '78.64.0.0/12',
+ 'SG': '152.56.0.0/14',
+ 'SI': '188.196.0.0/14',
+ 'SK': '78.98.0.0/15',
+ 'SL': '197.215.0.0/17',
+ 'SM': '89.186.32.0/19',
+ 'SN': '41.82.0.0/15',
+ 'SO': '197.220.64.0/19',
+ 'SR': '186.179.128.0/17',
+ 'SS': '105.235.208.0/21',
+ 'ST': '197.159.160.0/19',
+ 'SV': '168.243.0.0/16',
+ 'SX': '190.102.0.0/20',
+ 'SY': '5.0.0.0/16',
+ 'SZ': '41.84.224.0/19',
+ 'TC': '65.255.48.0/20',
+ 'TD': '154.68.128.0/19',
+ 'TG': '196.168.0.0/14',
+ 'TH': '171.96.0.0/13',
+ 'TJ': '85.9.128.0/18',
+ 'TK': '27.96.24.0/21',
+ 'TL': '180.189.160.0/20',
+ 'TM': '95.85.96.0/19',
+ 'TN': '197.0.0.0/11',
+ 'TO': '175.176.144.0/21',
+ 'TR': '78.160.0.0/11',
+ 'TT': '186.44.0.0/15',
+ 'TV': '202.2.96.0/19',
+ 'TW': '120.96.0.0/11',
+ 'TZ': '156.156.0.0/14',
+ 'UA': '93.72.0.0/13',
+ 'UG': '154.224.0.0/13',
+ 'US': '3.0.0.0/8',
+ 'UY': '167.56.0.0/13',
+ 'UZ': '82.215.64.0/18',
+ 'VA': '212.77.0.0/19',
+ 'VC': '24.92.144.0/20',
+ 'VE': '186.88.0.0/13',
+ 'VG': '172.103.64.0/18',
+ 'VI': '146.226.0.0/16',
+ 'VN': '14.160.0.0/11',
+ 'VU': '202.80.32.0/20',
+ 'WF': '117.20.32.0/21',
+ 'WS': '202.4.32.0/19',
+ 'YE': '134.35.0.0/16',
+ 'YT': '41.242.116.0/22',
+ 'ZA': '41.0.0.0/11',
+ 'ZM': '165.56.0.0/13',
+ 'ZW': '41.85.192.0/19',
+ }
+
+ @classmethod
+ def random_ipv4(cls, code_or_block):
+ if len(code_or_block) == 2:
+ block = cls._country_ip_map.get(code_or_block.upper())
+ if not block:
+ return None
+ else:
+ block = code_or_block
+ addr, preflen = block.split('/')
+ addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
+ addr_max = addr_min | (0xffffffff >> int(preflen))
+ return compat_str(socket.inet_ntoa(
+ compat_struct_pack('!L', random.randint(addr_min, addr_max))))
+
+
+class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
+ def __init__(self, proxies=None):
+ # Set default handlers
+ for type in ('http', 'https'):
+ setattr(self, '%s_open' % type,
+ lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
+ meth(r, proxy, type))
+ return compat_urllib_request.ProxyHandler.__init__(self, proxies)
+
+ def proxy_open(self, req, proxy, type):
+ req_proxy = req.headers.get('Ytdl-request-proxy')
+ if req_proxy is not None:
+ proxy = req_proxy
+ del req.headers['Ytdl-request-proxy']
+
+ if proxy == '__noproxy__':
+ return None # No Proxy
+ if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+ req.add_header('Ytdl-socks-proxy', proxy)
+ # youtube-dl's http/https handlers do wrapping the socket with socks
+ return None
+ return compat_urllib_request.ProxyHandler.proxy_open(
+ self, req, proxy, type)
+
+
+# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
+# released into Public Domain
+# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
+
+def long_to_bytes(n, blocksize=0):
+ """long_to_bytes(n:long, blocksize:int) : string
+ Convert a long integer to a byte string.
+
+ If optional blocksize is given and greater than zero, pad the front of the
+ byte string with binary zeros so that the length is a multiple of
+ blocksize.
+ """
+ # after much testing, this algorithm was deemed to be the fastest
+ s = b''
+ n = int(n)
+ while n > 0:
+ s = compat_struct_pack('>I', n & 0xffffffff) + s
+ n = n >> 32
+ # strip off leading zeros
+ for i in range(len(s)):
+ if s[i] != b'\000'[0]:
+ break
+ else:
+ # only happens when n == 0
+ s = b'\000'
+ i = 0
+ s = s[i:]
+ # add back some pad bytes. this could be done more efficiently w.r.t. the
+ # de-padding being done above, but sigh...
+ if blocksize > 0 and len(s) % blocksize:
+ s = (blocksize - len(s) % blocksize) * b'\000' + s
+ return s
+
+
+def bytes_to_long(s):
+ """bytes_to_long(string) : long
+ Convert a byte string to a long integer.
+
+ This is (essentially) the inverse of long_to_bytes().
+ """
+ acc = 0
+ length = len(s)
+ if length % 4:
+ extra = (4 - length % 4)
+ s = b'\000' * extra + s
+ length = length + extra
+ for i in range(0, length, 4):
+ acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
+ return acc
+
+
+def ohdave_rsa_encrypt(data, exponent, modulus):
+ '''
+ Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
+
+ Input:
+ data: data to encrypt, bytes-like object
+ exponent, modulus: parameter e and N of RSA algorithm, both integer
+ Output: hex string of encrypted data
+
+ Limitation: supports one block encryption only
+ '''
+
+ payload = int(binascii.hexlify(data[::-1]), 16)
+ encrypted = pow(payload, exponent, modulus)
+ return '%x' % encrypted
+
+
+def pkcs1pad(data, length):
+ """
+ Padding input data with PKCS#1 scheme
+
+ @param {int[]} data input data
+ @param {int} length target length
+ @returns {int[]} padded data
+ """
+ if len(data) > length - 11:
+ raise ValueError('Input data too long for PKCS#1 padding')
+
+ pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
+ return [0, 2] + pseudo_random + [0] + data
+
+
+def encode_base_n(num, n, table=None):
+ FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ if not table:
+ table = FULL_TABLE[:n]
+
+ if n > len(table):
+ raise ValueError('base %d exceeds table length %d' % (n, len(table)))
+
+ if num == 0:
+ return table[0]
+
+ ret = ''
+ while num:
+ ret = table[num % n] + ret
+ num = num // n
+ return ret
+
+
+def decode_packed_codes(code):
+ mobj = re.search(PACKED_CODES_RE, code)
+ obfucasted_code, base, count, symbols = mobj.groups()
+ base = int(base)
+ count = int(count)
+ symbols = symbols.split('|')
+ symbol_table = {}
+
+ while count:
+ count -= 1
+ base_n_count = encode_base_n(count, base)
+ symbol_table[base_n_count] = symbols[count] or base_n_count
+
+ return re.sub(
+ r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
+ obfucasted_code)
+
+
+def parse_m3u8_attributes(attrib):
+ info = {}
+ for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
+ if val.startswith('"'):
+ val = val[1:-1]
+ info[key] = val
+ return info
+
+
+def urshift(val, n):
+ return val >> n if val >= 0 else (val + 0x100000000) >> n
+
+
+# Based on png2str() written by @gdkchan and improved by @yokrysty
+# Originally posted at https://github.com/rg3/youtube-dl/issues/9706
+def decode_png(png_data):
+ # Reference: https://www.w3.org/TR/PNG/
+ header = png_data[8:]
+
+ if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
+ raise IOError('Not a valid PNG file.')
+
+ int_map = {1: '>B', 2: '>H', 4: '>I'}
+ unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
+
+ chunks = []
+
+ while header:
+ length = unpack_integer(header[:4])
+ header = header[4:]
+
+ chunk_type = header[:4]
+ header = header[4:]
+
+ chunk_data = header[:length]
+ header = header[length:]
+
+ header = header[4:] # Skip CRC
+
+ chunks.append({
+ 'type': chunk_type,
+ 'length': length,
+ 'data': chunk_data
+ })
+
+ ihdr = chunks[0]['data']
+
+ width = unpack_integer(ihdr[:4])
+ height = unpack_integer(ihdr[4:8])
+
+ idat = b''
+
+ for chunk in chunks:
+ if chunk['type'] == b'IDAT':
+ idat += chunk['data']
+
+ if not idat:
+ raise IOError('Unable to read PNG data.')
+
+ decompressed_data = bytearray(zlib.decompress(idat))
+
+ stride = width * 3
+ pixels = []
+
+ def _get_pixel(idx):
+ x = idx % stride
+ y = idx // stride
+ return pixels[y][x]
+
+ for y in range(height):
+ basePos = y * (1 + stride)
+ filter_type = decompressed_data[basePos]
+
+ current_row = []
+
+ pixels.append(current_row)
+
+ for x in range(stride):
+ color = decompressed_data[1 + basePos + x]
+ basex = y * stride + x
+ left = 0
+ up = 0
+
+ if x > 2:
+ left = _get_pixel(basex - 3)
+ if y > 0:
+ up = _get_pixel(basex - stride)
+
+ if filter_type == 1: # Sub
+ color = (color + left) & 0xff
+ elif filter_type == 2: # Up
+ color = (color + up) & 0xff
+ elif filter_type == 3: # Average
+ color = (color + ((left + up) >> 1)) & 0xff
+ elif filter_type == 4: # Paeth
+ a = left
+ b = up
+ c = 0
+
+ if x > 2 and y > 0:
+ c = _get_pixel(basex - stride - 3)
+
+ p = a + b - c
+
+ pa = abs(p - a)
+ pb = abs(p - b)
+ pc = abs(p - c)
+
+ if pa <= pb and pa <= pc:
+ color = (color + a) & 0xff
+ elif pb <= pc:
+ color = (color + b) & 0xff
+ else:
+ color = (color + c) & 0xff
+
+ current_row.append(color)
+
+ return width, height, pixels
+
+
+def write_xattr(path, key, value):
+ # This mess below finds the best xattr tool for the job
+ try:
+ # try the pyxattr module...
+ import xattr
+
+ if hasattr(xattr, 'set'): # pyxattr
+ # Unicode arguments are not supported in python-pyxattr until
+ # version 0.5.0
+ # See https://github.com/rg3/youtube-dl/issues/5498
+ pyxattr_required_version = '0.5.0'
+ if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
+ # TODO: fallback to CLI tools
+ raise XAttrUnavailableError(
+ 'python-pyxattr is detected but is too old. '
+ 'youtube-dl requires %s or above while your version is %s. '
+ 'Falling back to other xattr implementations' % (
+ pyxattr_required_version, xattr.__version__))
+
+ setxattr = xattr.set
+ else: # xattr
+ setxattr = xattr.setxattr
+
+ try:
+ setxattr(path, key, value)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+
+ except ImportError:
+ if compat_os_name == 'nt':
+ # Write xattrs to NTFS Alternate Data Streams:
+ # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
+ assert ':' not in key
+ assert os.path.exists(path)
+
+ ads_fn = path + ':' + key
+ try:
+ with open(ads_fn, 'wb') as f:
+ f.write(value)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ else:
+ user_has_setfattr = check_executable('setfattr', ['--version'])
+ user_has_xattr = check_executable('xattr', ['-h'])
+
+ if user_has_setfattr or user_has_xattr:
+
+ value = value.decode('utf-8')
+ if user_has_setfattr:
+ executable = 'setfattr'
+ opts = ['-n', key, '-v', value]
+ elif user_has_xattr:
+ executable = 'xattr'
+ opts = ['-w', key, value]
+
+ cmd = ([encodeFilename(executable, True)] +
+ [encodeArgument(o) for o in opts] +
+ [encodeFilename(path, True)])
+
+ try:
+ p = subprocess.Popen(
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ stdout, stderr = p.communicate()
+ stderr = stderr.decode('utf-8', 'replace')
+ if p.returncode != 0:
+ raise XAttrMetadataError(p.returncode, stderr)
+
+ else:
+ # On Unix, and can't find pyxattr, setfattr, or xattr.
+ if sys.platform.startswith('linux'):
+ raise XAttrUnavailableError(
+ "Couldn't find a tool to set the xattrs. "
+ "Install either the python 'pyxattr' or 'xattr' "
+ "modules, or the GNU 'attr' package "
+ "(which contains the 'setfattr' tool).")
+ else:
+ raise XAttrUnavailableError(
+ "Couldn't find a tool to set the xattrs. "
+ "Install either the python 'xattr' module, "
+ "or the 'xattr' binary.")
+
+
+def random_birthday(year_field, month_field, day_field):
+ return {
+ year_field: str(random.randint(1950, 1995)),
+ month_field: str(random.randint(1, 12)),
+ day_field: str(random.randint(1, 31)),
+ }
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
new file mode 100644
index 0000000..c7083cf
--- /dev/null
+++ b/youtube_dl/version.py
@@ -0,0 +1,3 @@
+from __future__ import unicode_literals
+
+__version__ = '2018.07.10'