aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/YoutubeDL.py2392
-rw-r--r--youtube_dl/__init__.py481
-rw-r--r--youtube_dl/__main__.py19
-rw-r--r--youtube_dl/aes.py361
-rw-r--r--youtube_dl/cache.py96
-rw-r--r--youtube_dl/compat.py3016
-rw-r--r--youtube_dl/downloader/__init__.py61
-rw-r--r--youtube_dl/downloader/common.py389
-rw-r--r--youtube_dl/downloader/dash.py80
-rw-r--r--youtube_dl/downloader/external.py354
-rw-r--r--youtube_dl/downloader/f4m.py438
-rw-r--r--youtube_dl/downloader/fragment.py268
-rw-r--r--youtube_dl/downloader/hls.py204
-rw-r--r--youtube_dl/downloader/http.py354
-rw-r--r--youtube_dl/downloader/ism.py259
-rw-r--r--youtube_dl/downloader/rtmp.py214
-rw-r--r--youtube_dl/downloader/rtsp.py47
-rw-r--r--youtube_dl/extractor/__init__.py46
-rw-r--r--youtube_dl/extractor/adobepass.py1567
-rw-r--r--youtube_dl/extractor/common.py2862
-rw-r--r--youtube_dl/extractor/commonmistakes.py50
-rw-r--r--youtube_dl/extractor/commonprotocols.py60
-rw-r--r--youtube_dl/extractor/extractors.py31
-rw-r--r--youtube_dl/extractor/generic.py3335
-rw-r--r--youtube_dl/extractor/openload.py379
-rw-r--r--youtube_dl/extractor/youtube.py3394
-rw-r--r--youtube_dl/extractor/youtube_unmodified_reference.py3325
-rw-r--r--youtube_dl/jsinterp.py262
-rw-r--r--youtube_dl/options.py916
-rw-r--r--youtube_dl/postprocessor/__init__.py40
-rw-r--r--youtube_dl/postprocessor/common.py69
-rw-r--r--youtube_dl/postprocessor/embedthumbnail.py93
-rw-r--r--youtube_dl/postprocessor/execafterdownload.py31
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py613
-rw-r--r--youtube_dl/postprocessor/metadatafromtitle.py48
-rw-r--r--youtube_dl/postprocessor/xattrpp.py79
-rw-r--r--youtube_dl/socks.py273
-rw-r--r--youtube_dl/swfinterp.py834
-rw-r--r--youtube_dl/update.py187
-rw-r--r--youtube_dl/utils.py3990
-rw-r--r--youtube_dl/version.py3
41 files changed, 0 insertions, 31520 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
deleted file mode 100644
index 38ba43a..0000000
--- a/youtube_dl/YoutubeDL.py
+++ /dev/null
@@ -1,2392 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-from __future__ import absolute_import, unicode_literals
-
-import collections
-import contextlib
-import copy
-import datetime
-import errno
-import fileinput
-import io
-import itertools
-import json
-import locale
-import operator
-import os
-import platform
-import re
-import shutil
-import subprocess
-import socket
-import sys
-import time
-import tokenize
-import traceback
-import random
-
-from string import ascii_letters
-
-from .compat import (
- compat_basestring,
- compat_cookiejar,
- compat_get_terminal_size,
- compat_http_client,
- compat_kwargs,
- compat_numeric_types,
- compat_os_name,
- compat_str,
- compat_tokenize_tokenize,
- compat_urllib_error,
- compat_urllib_request,
- compat_urllib_request_DataHandler,
-)
-from .utils import (
- age_restricted,
- args_to_str,
- ContentTooShortError,
- date_from_str,
- DateRange,
- DEFAULT_OUTTMPL,
- determine_ext,
- determine_protocol,
- DownloadError,
- encode_compat_str,
- encodeFilename,
- error_to_compat_str,
- expand_path,
- ExtractorError,
- format_bytes,
- formatSeconds,
- GeoRestrictedError,
- int_or_none,
- ISO3166Utils,
- locked_file,
- make_HTTPS_handler,
- MaxDownloadsReached,
- orderedSet,
- PagedList,
- parse_filesize,
- PerRequestProxyHandler,
- platform_name,
- PostProcessingError,
- preferredencoding,
- prepend_extension,
- register_socks_protocols,
- render_table,
- replace_extension,
- SameFileError,
- sanitize_filename,
- sanitize_path,
- sanitize_url,
- sanitized_Request,
- std_headers,
- subtitles_filename,
- UnavailableVideoError,
- url_basename,
- version_tuple,
- write_json_file,
- write_string,
- YoutubeDLCookieProcessor,
- YoutubeDLHandler,
-)
-from .cache import Cache
-from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
-from .extractor.openload import PhantomJSwrapper
-from .downloader import get_suitable_downloader
-from .downloader.rtmp import rtmpdump_version
-from .postprocessor import (
- FFmpegFixupM3u8PP,
- FFmpegFixupM4aPP,
- FFmpegFixupStretchedPP,
- FFmpegMergerPP,
- FFmpegPostProcessor,
- get_postprocessor,
-)
-from .version import __version__
-
-if compat_os_name == 'nt':
- import ctypes
-
-
-class YoutubeDL(object):
- """YoutubeDL class.
-
- YoutubeDL objects are the ones responsible of downloading the
- actual video file and writing it to disk if the user has requested
- it, among some other tasks. In most cases there should be one per
- program. As, given a video URL, the downloader doesn't know how to
- extract all the needed information, task that InfoExtractors do, it
- has to pass the URL to one of them.
-
- For this, YoutubeDL objects have a method that allows
- InfoExtractors to be registered in a given order. When it is passed
- a URL, the YoutubeDL object handles it to the first InfoExtractor it
- finds that reports being able to handle it. The InfoExtractor extracts
- all the information about the video or videos the URL refers to, and
- YoutubeDL process the extracted information, possibly using a File
- Downloader to download the video.
-
- YoutubeDL objects accept a lot of parameters. In order not to saturate
- the object constructor with arguments, it receives a dictionary of
- options instead. These options are available through the params
- attribute for the InfoExtractors to use. The YoutubeDL also
- registers itself as the downloader in charge for the InfoExtractors
- that are added to it, so this is a "mutual registration".
-
- Available options:
-
- username: Username for authentication purposes.
- password: Password for authentication purposes.
- videopassword: Password for accessing a video.
- ap_mso: Adobe Pass multiple-system operator identifier.
- ap_username: Multiple-system operator account username.
- ap_password: Multiple-system operator account password.
- usenetrc: Use netrc for authentication instead.
- verbose: Print additional info to stdout.
- quiet: Do not print messages to stdout.
- no_warnings: Do not print out anything for warnings.
- forceurl: Force printing final URL.
- forcetitle: Force printing title.
- forceid: Force printing ID.
- forcethumbnail: Force printing thumbnail URL.
- forcedescription: Force printing description.
- forcefilename: Force printing final filename.
- forceduration: Force printing duration.
- forcejson: Force printing info_dict as JSON.
- dump_single_json: Force printing the info_dict of the whole playlist
- (or video) as a single JSON line.
- simulate: Do not download the video files.
- format: Video format code. See options.py for more information.
- outtmpl: Template for output names.
- restrictfilenames: Do not allow "&" and spaces in file names
- ignoreerrors: Do not stop on download errors.
- force_generic_extractor: Force downloader to use the generic extractor
- nooverwrites: Prevent overwriting files.
- playliststart: Playlist item to start at.
- playlistend: Playlist item to end at.
- playlist_items: Specific indices of playlist to download.
- playlistreverse: Download playlist items in reverse order.
- playlistrandom: Download playlist items in random order.
- matchtitle: Download only matching titles.
- rejecttitle: Reject downloads for matching titles.
- logger: Log messages to a logging.Logger instance.
- logtostderr: Log messages to stderr instead of stdout.
- writedescription: Write the video description to a .description file
- writeinfojson: Write the video description to a .info.json file
- writeannotations: Write the video annotations to a .annotations.xml file
- writethumbnail: Write the thumbnail image to a file
- write_all_thumbnails: Write all thumbnail formats to files
- writesubtitles: Write the video subtitles to a file
- writeautomaticsub: Write the automatically generated subtitles to a file
- allsubtitles: Downloads all the subtitles of the video
- (requires writesubtitles or writeautomaticsub)
- listsubtitles: Lists all available subtitles for the video
- subtitlesformat: The format code for subtitles
- subtitleslangs: List of languages of the subtitles to download
- keepvideo: Keep the video file after post-processing
- daterange: A DateRange object, download only if the upload_date is in the range.
- skip_download: Skip the actual download of the video file
- cachedir: Location of the cache files in the filesystem.
- False to disable filesystem cache.
- noplaylist: Download single video instead of a playlist if in doubt.
- age_limit: An integer representing the user's age in years.
- Unsuitable videos for the given age are skipped.
- min_views: An integer representing the minimum view count the video
- must have in order to not be skipped.
- Videos without view count information are always
- downloaded. None for no limit.
- max_views: An integer representing the maximum view count.
- Videos that are more popular than that are not
- downloaded.
- Videos without view count information are always
- downloaded. None for no limit.
- download_archive: File name of a file where all downloads are recorded.
- Videos already present in the file are not downloaded
- again.
- cookiefile: File name where cookies should be read from and dumped to.
- nocheckcertificate:Do not verify SSL certificates
- prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
- At the moment, this is only supported by YouTube.
- proxy: URL of the proxy server to use
- geo_verification_proxy: URL of the proxy to use for IP address verification
- on geo-restricted sites.
- socket_timeout: Time to wait for unresponsive hosts, in seconds
- bidi_workaround: Work around buggy terminals without bidirectional text
- support, using fridibi
- debug_printtraffic:Print out sent and received HTTP traffic
- include_ads: Download ads as well
- default_search: Prepend this string if an input url is not valid.
- 'auto' for elaborate guessing
- encoding: Use this encoding instead of the system-specified.
- extract_flat: Do not resolve URLs, return the immediate result.
- Pass in 'in_playlist' to only show this behavior for
- playlist items.
- postprocessors: A list of dictionaries, each with an entry
- * key: The name of the postprocessor. See
- youtube_dl/postprocessor/__init__.py for a list.
- as well as any further keyword arguments for the
- postprocessor.
- progress_hooks: A list of functions that get called on download
- progress, with a dictionary with the entries
- * status: One of "downloading", "error", or "finished".
- Check this first and ignore unknown values.
-
- If status is one of "downloading", or "finished", the
- following properties may also be present:
- * filename: The final filename (always present)
- * tmpfilename: The filename we're currently writing to
- * downloaded_bytes: Bytes on disk
- * total_bytes: Size of the whole file, None if unknown
- * total_bytes_estimate: Guess of the eventual file size,
- None if unavailable.
- * elapsed: The number of seconds since download started.
- * eta: The estimated time in seconds, None if unknown
- * speed: The download speed in bytes/second, None if
- unknown
- * fragment_index: The counter of the currently
- downloaded video fragment.
- * fragment_count: The number of fragments (= individual
- files that will be merged)
-
- Progress hooks are guaranteed to be called at least once
- (with status "finished") if the download is successful.
- merge_output_format: Extension to use when merging formats.
- fixup: Automatically correct known faults of the file.
- One of:
- - "never": do nothing
- - "warn": only emit a warning
- - "detect_or_warn": check whether we can do anything
- about it, warn otherwise (default)
- source_address: Client-side IP address to bind to.
- call_home: Boolean, true iff we are allowed to contact the
- youtube-dl servers for debugging.
- sleep_interval: Number of seconds to sleep before each download when
- used alone or a lower bound of a range for randomized
- sleep before each download (minimum possible number
- of seconds to sleep) when used along with
- max_sleep_interval.
- max_sleep_interval:Upper bound of a range for randomized sleep before each
- download (maximum possible number of seconds to sleep).
- Must only be used along with sleep_interval.
- Actual sleep time will be a random float from range
- [sleep_interval; max_sleep_interval].
- listformats: Print an overview of available video formats and exit.
- list_thumbnails: Print a table of all thumbnails and exit.
- match_filter: A function that gets called with the info_dict of
- every video.
- If it returns a message, the video is ignored.
- If it returns None, the video is downloaded.
- match_filter_func in utils.py is one example for this.
- no_color: Do not emit color codes in output.
- geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
- HTTP header
- geo_bypass_country:
- Two-letter ISO 3166-2 country code that will be used for
- explicit geographic restriction bypassing via faking
- X-Forwarded-For HTTP header
- geo_bypass_ip_block:
- IP range in CIDR notation that will be used similarly to
- geo_bypass_country
-
- The following options determine which downloader is picked:
- external_downloader: Executable of the external downloader to call.
- None or unset for standard (built-in) downloader.
- hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
- if True, otherwise use ffmpeg/avconv if False, otherwise
- use downloader suggested by extractor if None.
-
- The following parameters are not used by YoutubeDL itself, they are used by
- the downloader (see youtube_dl/downloader/common.py):
- nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
- noresizebuffer, retries, continuedl, noprogress, consoletitle,
- xattr_set_filesize, external_downloader_args, hls_use_mpegts,
- http_chunk_size.
-
- The following options are used by the post processors:
- prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
- otherwise prefer ffmpeg.
- postprocessor_args: A list of additional command-line arguments for the
- postprocessor.
-
- The following options are used by the Youtube extractor:
- youtube_include_dash_manifest: If True (default), DASH manifests and related
- data will be downloaded and processed by extractor.
- You can reduce network I/O by disabling it if you don't
- care about DASH.
- """
-
- _NUMERIC_FIELDS = set((
- 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
- 'timestamp', 'upload_year', 'upload_month', 'upload_day',
- 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
- 'average_rating', 'comment_count', 'age_limit',
- 'start_time', 'end_time',
- 'chapter_number', 'season_number', 'episode_number',
- 'track_number', 'disc_number', 'release_year',
- 'playlist_index',
- ))
-
- params = None
- _ies = []
- _pps = []
- _download_retcode = None
- _num_downloads = None
- _screen_file = None
-
- def __init__(self, params=None, auto_init=True):
- """Create a FileDownloader object with the given options."""
- if params is None:
- params = {}
- self._ies = []
- self._ies_instances = {}
- self._pps = []
- self._progress_hooks = []
- self._download_retcode = 0
- self._num_downloads = 0
- self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
- self._err_file = sys.stderr
- self.params = {
- # Default parameters
- 'nocheckcertificate': False,
- }
- self.params.update(params)
- self.cache = Cache(self)
-
- def check_deprecated(param, option, suggestion):
- if self.params.get(param) is not None:
- self.report_warning(
- '%s is deprecated. Use %s instead.' % (option, suggestion))
- return True
- return False
-
- if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
- if self.params.get('geo_verification_proxy') is None:
- self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
-
- check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
- check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
- check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
-
- if params.get('bidi_workaround', False):
- try:
- import pty
- master, slave = pty.openpty()
- width = compat_get_terminal_size().columns
- if width is None:
- width_args = []
- else:
- width_args = ['-w', str(width)]
- sp_kwargs = dict(
- stdin=subprocess.PIPE,
- stdout=slave,
- stderr=self._err_file)
- try:
- self._output_process = subprocess.Popen(
- ['bidiv'] + width_args, **sp_kwargs
- )
- except OSError:
- self._output_process = subprocess.Popen(
- ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
- self._output_channel = os.fdopen(master, 'rb')
- except OSError as ose:
- if ose.errno == errno.ENOENT:
- self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
- else:
- raise
-
- if (sys.platform != 'win32' and
- sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
- not params.get('restrictfilenames', False)):
- # Unicode filesystem API will throw errors (#1474, #13027)
- self.report_warning(
- 'Assuming --restrict-filenames since file system encoding '
- 'cannot encode all characters. '
- 'Set the LC_ALL environment variable to fix this.')
- self.params['restrictfilenames'] = True
-
- if isinstance(params.get('outtmpl'), bytes):
- self.report_warning(
- 'Parameter outtmpl is bytes, but should be a unicode string. '
- 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
-
- self._setup_opener()
-
- if auto_init:
- self.print_debug_header()
- self.add_default_info_extractors()
-
- for pp_def_raw in self.params.get('postprocessors', []):
- pp_class = get_postprocessor(pp_def_raw['key'])
- pp_def = dict(pp_def_raw)
- del pp_def['key']
- pp = pp_class(self, **compat_kwargs(pp_def))
- self.add_post_processor(pp)
-
- for ph in self.params.get('progress_hooks', []):
- self.add_progress_hook(ph)
-
- register_socks_protocols()
-
- def warn_if_short_id(self, argv):
- # short YouTube ID starting with dash?
- idxs = [
- i for i, a in enumerate(argv)
- if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
- if idxs:
- correct_argv = (
- ['youtube-dl'] +
- [a for i, a in enumerate(argv) if i not in idxs] +
- ['--'] + [argv[i] for i in idxs]
- )
- self.report_warning(
- 'Long argument string detected. '
- 'Use -- to separate parameters and URLs, like this:\n%s\n' %
- args_to_str(correct_argv))
-
- def add_info_extractor(self, ie):
- """Add an InfoExtractor object to the end of the list."""
- self._ies.append(ie)
- if not isinstance(ie, type):
- self._ies_instances[ie.ie_key()] = ie
- ie.set_downloader(self)
-
- def get_info_extractor(self, ie_key):
- """
- Get an instance of an IE with name ie_key, it will try to get one from
- the _ies list, if there's no instance it will create a new one and add
- it to the extractor list.
- """
- ie = self._ies_instances.get(ie_key)
- if ie is None:
- ie = get_info_extractor(ie_key)()
- self.add_info_extractor(ie)
- return ie
-
- def add_default_info_extractors(self):
- """
- Add the InfoExtractors returned by gen_extractors to the end of the list
- """
- for ie in gen_extractor_classes():
- self.add_info_extractor(ie)
-
- def add_post_processor(self, pp):
- """Add a PostProcessor object to the end of the chain."""
- self._pps.append(pp)
- pp.set_downloader(self)
-
- def add_progress_hook(self, ph):
- """Add the progress hook (currently only for the file downloader)"""
- self._progress_hooks.append(ph)
-
- def _bidi_workaround(self, message):
- if not hasattr(self, '_output_channel'):
- return message
-
- assert hasattr(self, '_output_process')
- assert isinstance(message, compat_str)
- line_count = message.count('\n') + 1
- self._output_process.stdin.write((message + '\n').encode('utf-8'))
- self._output_process.stdin.flush()
- res = ''.join(self._output_channel.readline().decode('utf-8')
- for _ in range(line_count))
- return res[:-len('\n')]
-
- def to_screen(self, message, skip_eol=False):
- """Print message to stdout if not in quiet mode."""
- return self.to_stdout(message, skip_eol, check_quiet=True)
-
- def _write_string(self, s, out=None):
- write_string(s, out=out, encoding=self.params.get('encoding'))
-
- def to_stdout(self, message, skip_eol=False, check_quiet=False):
- """Print message to stdout if not in quiet mode."""
- if self.params.get('logger'):
- self.params['logger'].debug(message)
- elif not check_quiet or not self.params.get('quiet', False):
- message = self._bidi_workaround(message)
- terminator = ['\n', ''][skip_eol]
- output = message + terminator
-
- self._write_string(output, self._screen_file)
-
- def to_stderr(self, message):
- """Print message to stderr."""
- assert isinstance(message, compat_str)
- if self.params.get('logger'):
- self.params['logger'].error(message)
- else:
- message = self._bidi_workaround(message)
- output = message + '\n'
- self._write_string(output, self._err_file)
-
- def to_console_title(self, message):
- if not self.params.get('consoletitle', False):
- return
- if compat_os_name == 'nt':
- if ctypes.windll.kernel32.GetConsoleWindow():
- # c_wchar_p() might not be necessary if `message` is
- # already of type unicode()
- ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
- elif 'TERM' in os.environ:
- self._write_string('\033]0;%s\007' % message, self._screen_file)
-
- def save_console_title(self):
- if not self.params.get('consoletitle', False):
- return
- if self.params.get('simulate', False):
- return
- if compat_os_name != 'nt' and 'TERM' in os.environ:
- # Save the title on stack
- self._write_string('\033[22;0t', self._screen_file)
-
- def restore_console_title(self):
- if not self.params.get('consoletitle', False):
- return
- if self.params.get('simulate', False):
- return
- if compat_os_name != 'nt' and 'TERM' in os.environ:
- # Restore the title from stack
- self._write_string('\033[23;0t', self._screen_file)
-
- def __enter__(self):
- self.save_console_title()
- return self
-
- def __exit__(self, *args):
- self.restore_console_title()
-
- if self.params.get('cookiefile') is not None:
- self.cookiejar.save()
-
- def trouble(self, message=None, tb=None):
- """Determine action to take when a download problem appears.
-
- Depending on if the downloader has been configured to ignore
- download errors or not, this method may throw an exception or
- not when errors are found, after printing the message.
-
- tb, if given, is additional traceback information.
- """
- if message is not None:
- self.to_stderr(message)
- if self.params.get('verbose'):
- if tb is None:
- if sys.exc_info()[0]: # if .trouble has been called from an except block
- tb = ''
- if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
- tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
- tb += encode_compat_str(traceback.format_exc())
- else:
- tb_data = traceback.format_list(traceback.extract_stack())
- tb = ''.join(tb_data)
- self.to_stderr(tb)
- if not self.params.get('ignoreerrors', False):
- if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
- exc_info = sys.exc_info()[1].exc_info
- else:
- exc_info = sys.exc_info()
- raise DownloadError(message, exc_info)
- self._download_retcode = 1
-
- def report_warning(self, message):
- '''
- Print the message to stderr, it will be prefixed with 'WARNING:'
- If stderr is a tty file the 'WARNING:' will be colored
- '''
- if self.params.get('logger') is not None:
- self.params['logger'].warning(message)
- else:
- if self.params.get('no_warnings'):
- return
- if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
- _msg_header = '\033[0;33mWARNING:\033[0m'
- else:
- _msg_header = 'WARNING:'
- warning_message = '%s %s' % (_msg_header, message)
- self.to_stderr(warning_message)
-
- def report_error(self, message, tb=None):
- '''
- Do the same as trouble, but prefixes the message with 'ERROR:', colored
- in red if stderr is a tty file.
- '''
- if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
- _msg_header = '\033[0;31mERROR:\033[0m'
- else:
- _msg_header = 'ERROR:'
- error_message = '%s %s' % (_msg_header, message)
- self.trouble(error_message, tb)
-
- def report_file_already_downloaded(self, file_name):
- """Report file has already been fully downloaded."""
- try:
- self.to_screen('[download] %s has already been downloaded' % file_name)
- except UnicodeEncodeError:
- self.to_screen('[download] The file has already been downloaded')
-
- def prepare_filename(self, info_dict):
- """Generate the output filename."""
- try:
- template_dict = dict(info_dict)
-
- template_dict['epoch'] = int(time.time())
- autonumber_size = self.params.get('autonumber_size')
- if autonumber_size is None:
- autonumber_size = 5
- template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
- if template_dict.get('resolution') is None:
- if template_dict.get('width') and template_dict.get('height'):
- template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
- elif template_dict.get('height'):
- template_dict['resolution'] = '%sp' % template_dict['height']
- elif template_dict.get('width'):
- template_dict['resolution'] = '%dx?' % template_dict['width']
-
- sanitize = lambda k, v: sanitize_filename(
- compat_str(v),
- restricted=self.params.get('restrictfilenames'),
- is_id=(k == 'id' or k.endswith('_id')))
- template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
- for k, v in template_dict.items()
- if v is not None and not isinstance(v, (list, tuple, dict)))
- template_dict = collections.defaultdict(lambda: 'NA', template_dict)
-
- outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
-
- # For fields playlist_index and autonumber convert all occurrences
- # of %(field)s to %(field)0Nd for backward compatibility
- field_size_compat_map = {
- 'playlist_index': len(str(template_dict['n_entries'])),
- 'autonumber': autonumber_size,
- }
- FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
- mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
- if mobj:
- outtmpl = re.sub(
- FIELD_SIZE_COMPAT_RE,
- r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
- outtmpl)
-
- # Missing numeric fields used together with integer presentation types
- # in format specification will break the argument substitution since
- # string 'NA' is returned for missing fields. We will patch output
- # template for missing fields to meet string presentation type.
- for numeric_field in self._NUMERIC_FIELDS:
- if numeric_field not in template_dict:
- # As of [1] format syntax is:
- # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
- # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
- FORMAT_RE = r'''(?x)
- (?<!%)
- %
- \({0}\) # mapping key
- (?:[#0\-+ ]+)? # conversion flags (optional)
- (?:\d+)? # minimum field width (optional)
- (?:\.\d+)? # precision (optional)
- [hlL]? # length modifier (optional)
- [diouxXeEfFgGcrs%] # conversion type
- '''
- outtmpl = re.sub(
- FORMAT_RE.format(numeric_field),
- r'%({0})s'.format(numeric_field), outtmpl)
-
- # expand_path translates '%%' into '%' and '$$' into '$'
- # correspondingly that is not what we want since we need to keep
- # '%%' intact for template dict substitution step. Working around
- # with boundary-alike separator hack.
- sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
- outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
-
- # outtmpl should be expand_path'ed before template dict substitution
- # because meta fields may contain env variables we don't want to
- # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
- # title "Hello $PATH", we don't want `$PATH` to be expanded.
- filename = expand_path(outtmpl).replace(sep, '') % template_dict
-
- # Temporary fix for #4787
- # 'Treat' all problem characters by passing filename through preferredencoding
- # to workaround encoding issues with subprocess on python2 @ Windows
- if sys.version_info < (3, 0) and sys.platform == 'win32':
- filename = encodeFilename(filename, True).decode(preferredencoding())
- return sanitize_path(filename)
- except ValueError as err:
- self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
- return None
-
- def _match_entry(self, info_dict, incomplete):
- """ Returns None iff the file should be downloaded """
-
- video_title = info_dict.get('title', info_dict.get('id', 'video'))
- if 'title' in info_dict:
- # This can happen when we're just evaluating the playlist
- title = info_dict['title']
- matchtitle = self.params.get('matchtitle', False)
- if matchtitle:
- if not re.search(matchtitle, title, re.IGNORECASE):
- return '"' + title + '" title did not match pattern "' + matchtitle + '"'
- rejecttitle = self.params.get('rejecttitle', False)
- if rejecttitle:
- if re.search(rejecttitle, title, re.IGNORECASE):
- return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
- date = info_dict.get('upload_date')
- if date is not None:
- dateRange = self.params.get('daterange', DateRange())
- if date not in dateRange:
- return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
- view_count = info_dict.get('view_count')
- if view_count is not None:
- min_views = self.params.get('min_views')
- if min_views is not None and view_count < min_views:
- return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
- max_views = self.params.get('max_views')
- if max_views is not None and view_count > max_views:
- return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
- if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
- return 'Skipping "%s" because it is age restricted' % video_title
- if self.in_download_archive(info_dict):
- return '%s has already been recorded in archive' % video_title
-
- if not incomplete:
- match_filter = self.params.get('match_filter')
- if match_filter is not None:
- ret = match_filter(info_dict)
- if ret is not None:
- return ret
-
- return None
-
- @staticmethod
- def add_extra_info(info_dict, extra_info):
- '''Set the keys from extra_info in info dict if they are missing'''
- for key, value in extra_info.items():
- info_dict.setdefault(key, value)
-
- def extract_info(self, url, download=True, ie_key=None, extra_info={},
- process=True, force_generic_extractor=False):
- '''
- Returns a list with a dictionary for each video we find.
- If 'download', also downloads the videos.
- extra_info is a dict containing the extra values to add to each result
- '''
-
- if not ie_key and force_generic_extractor:
- ie_key = 'Generic'
-
- if ie_key:
- ies = [self.get_info_extractor(ie_key)]
- else:
- ies = self._ies
-
- for ie in ies:
- if not ie.suitable(url):
- continue
-
- ie = self.get_info_extractor(ie.ie_key())
- if not ie.working():
- self.report_warning('The program functionality for this site has been marked as broken, '
- 'and will probably not work.')
-
- try:
- ie_result = ie.extract(url)
- if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
- break
- if isinstance(ie_result, list):
- # Backwards compatibility: old IE result format
- ie_result = {
- '_type': 'compat_list',
- 'entries': ie_result,
- }
- self.add_default_extra_info(ie_result, ie, url)
- if process:
- return self.process_ie_result(ie_result, download, extra_info)
- else:
- return ie_result
- except GeoRestrictedError as e:
- msg = e.msg
- if e.countries:
- msg += '\nThis video is available in %s.' % ', '.join(
- map(ISO3166Utils.short2full, e.countries))
- msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
- self.report_error(msg)
- break
- except ExtractorError as e: # An error we somewhat expected
- self.report_error(compat_str(e), e.format_traceback())
- break
- except MaxDownloadsReached:
- raise
- except Exception as e:
- if self.params.get('ignoreerrors', False):
- self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
- break
- else:
- raise
- else:
- self.report_error('no suitable InfoExtractor for URL %s' % url)
-
- def add_default_extra_info(self, ie_result, ie, url):
- self.add_extra_info(ie_result, {
- 'extractor': ie.IE_NAME,
- 'webpage_url': url,
- 'webpage_url_basename': url_basename(url),
- 'extractor_key': ie.ie_key(),
- })
-
- def process_ie_result(self, ie_result, download=True, extra_info={}):
- """
- Take the result of the ie(may be modified) and resolve all unresolved
- references (URLs, playlist items).
-
- It will also download the videos if 'download'.
- Returns the resolved ie_result.
- """
- result_type = ie_result.get('_type', 'video')
-
- if result_type in ('url', 'url_transparent'):
- ie_result['url'] = sanitize_url(ie_result['url'])
- extract_flat = self.params.get('extract_flat', False)
- if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
- extract_flat is True):
- if self.params.get('forcejson', False):
- self.to_stdout(json.dumps(ie_result))
- return ie_result
-
- if result_type == 'video':
- self.add_extra_info(ie_result, extra_info)
- return self.process_video_result(ie_result, download=download)
- elif result_type == 'url':
- # We have to add extra_info to the results because it may be
- # contained in a playlist
- return self.extract_info(ie_result['url'],
- download,
- ie_key=ie_result.get('ie_key'),
- extra_info=extra_info)
- elif result_type == 'url_transparent':
- # Use the information from the embedding page
- info = self.extract_info(
- ie_result['url'], ie_key=ie_result.get('ie_key'),
- extra_info=extra_info, download=False, process=False)
-
- # extract_info may return None when ignoreerrors is enabled and
- # extraction failed with an error, don't crash and return early
- # in this case
- if not info:
- return info
-
- force_properties = dict(
- (k, v) for k, v in ie_result.items() if v is not None)
- for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
- if f in force_properties:
- del force_properties[f]
- new_result = info.copy()
- new_result.update(force_properties)
-
- # Extracted info may not be a video result (i.e.
- # info.get('_type', 'video') != video) but rather an url or
- # url_transparent. In such cases outer metadata (from ie_result)
- # should be propagated to inner one (info). For this to happen
- # _type of info should be overridden with url_transparent. This
- # fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
- if new_result.get('_type') == 'url':
- new_result['_type'] = 'url_transparent'
-
- return self.process_ie_result(
- new_result, download=download, extra_info=extra_info)
- elif result_type in ('playlist', 'multi_video'):
- # We process each entry in the playlist
- playlist = ie_result.get('title') or ie_result.get('id')
- self.to_screen('[download] Downloading playlist: %s' % playlist)
-
- playlist_results = []
-
- playliststart = self.params.get('playliststart', 1) - 1
- playlistend = self.params.get('playlistend')
- # For backwards compatibility, interpret -1 as whole list
- if playlistend == -1:
- playlistend = None
-
- playlistitems_str = self.params.get('playlist_items')
- playlistitems = None
- if playlistitems_str is not None:
- def iter_playlistitems(format):
- for string_segment in format.split(','):
- if '-' in string_segment:
- start, end = string_segment.split('-')
- for item in range(int(start), int(end) + 1):
- yield int(item)
- else:
- yield int(string_segment)
- playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
-
- ie_entries = ie_result['entries']
-
- def make_playlistitems_entries(list_ie_entries):
- num_entries = len(list_ie_entries)
- return [
- list_ie_entries[i - 1] for i in playlistitems
- if -num_entries <= i - 1 < num_entries]
-
- def report_download(num_entries):
- self.to_screen(
- '[%s] playlist %s: Downloading %d videos' %
- (ie_result['extractor'], playlist, num_entries))
-
- if isinstance(ie_entries, list):
- n_all_entries = len(ie_entries)
- if playlistitems:
- entries = make_playlistitems_entries(ie_entries)
- else:
- entries = ie_entries[playliststart:playlistend]
- n_entries = len(entries)
- self.to_screen(
- '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
- (ie_result['extractor'], playlist, n_all_entries, n_entries))
- elif isinstance(ie_entries, PagedList):
- if playlistitems:
- entries = []
- for item in playlistitems:
- entries.extend(ie_entries.getslice(
- item - 1, item
- ))
- else:
- entries = ie_entries.getslice(
- playliststart, playlistend)
- n_entries = len(entries)
- report_download(n_entries)
- else: # iterable
- if playlistitems:
- entries = make_playlistitems_entries(list(itertools.islice(
- ie_entries, 0, max(playlistitems))))
- else:
- entries = list(itertools.islice(
- ie_entries, playliststart, playlistend))
- n_entries = len(entries)
- report_download(n_entries)
-
- if self.params.get('playlistreverse', False):
- entries = entries[::-1]
-
- if self.params.get('playlistrandom', False):
- random.shuffle(entries)
-
- x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
-
- for i, entry in enumerate(entries, 1):
- self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
- # This __x_forwarded_for_ip thing is a bit ugly but requires
- # minimal changes
- if x_forwarded_for:
- entry['__x_forwarded_for_ip'] = x_forwarded_for
- extra = {
- 'n_entries': n_entries,
- 'playlist': playlist,
- 'playlist_id': ie_result.get('id'),
- 'playlist_title': ie_result.get('title'),
- 'playlist_uploader': ie_result.get('uploader'),
- 'playlist_uploader_id': ie_result.get('uploader_id'),
- 'playlist_index': i + playliststart,
- 'extractor': ie_result['extractor'],
- 'webpage_url': ie_result['webpage_url'],
- 'webpage_url_basename': url_basename(ie_result['webpage_url']),
- 'extractor_key': ie_result['extractor_key'],
- }
-
- reason = self._match_entry(entry, incomplete=True)
- if reason is not None:
- self.to_screen('[download] ' + reason)
- continue
-
- entry_result = self.process_ie_result(entry,
- download=download,
- extra_info=extra)
- playlist_results.append(entry_result)
- ie_result['entries'] = playlist_results
- self.to_screen('[download] Finished downloading playlist: %s' % playlist)
- return ie_result
- elif result_type == 'compat_list':
- self.report_warning(
- 'Extractor %s returned a compat_list result. '
- 'It needs to be updated.' % ie_result.get('extractor'))
-
- def _fixup(r):
- self.add_extra_info(
- r,
- {
- 'extractor': ie_result['extractor'],
- 'webpage_url': ie_result['webpage_url'],
- 'webpage_url_basename': url_basename(ie_result['webpage_url']),
- 'extractor_key': ie_result['extractor_key'],
- }
- )
- return r
- ie_result['entries'] = [
- self.process_ie_result(_fixup(r), download, extra_info)
- for r in ie_result['entries']
- ]
- return ie_result
- else:
- raise Exception('Invalid result type: %s' % result_type)
-
- def _build_format_filter(self, filter_spec):
- " Returns a function to filter the formats according to the filter_spec "
-
- OPERATORS = {
- '<': operator.lt,
- '<=': operator.le,
- '>': operator.gt,
- '>=': operator.ge,
- '=': operator.eq,
- '!=': operator.ne,
- }
- operator_rex = re.compile(r'''(?x)\s*
- (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
- \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
- (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
- $
- ''' % '|'.join(map(re.escape, OPERATORS.keys())))
- m = operator_rex.search(filter_spec)
- if m:
- try:
- comparison_value = int(m.group('value'))
- except ValueError:
- comparison_value = parse_filesize(m.group('value'))
- if comparison_value is None:
- comparison_value = parse_filesize(m.group('value') + 'B')
- if comparison_value is None:
- raise ValueError(
- 'Invalid value %r in format specification %r' % (
- m.group('value'), filter_spec))
- op = OPERATORS[m.group('op')]
-
- if not m:
- STR_OPERATORS = {
- '=': operator.eq,
- '!=': operator.ne,
- '^=': lambda attr, value: attr.startswith(value),
- '$=': lambda attr, value: attr.endswith(value),
- '*=': lambda attr, value: value in attr,
- }
- str_operator_rex = re.compile(r'''(?x)
- \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
- \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
- \s*(?P<value>[a-zA-Z0-9._-]+)
- \s*$
- ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
- m = str_operator_rex.search(filter_spec)
- if m:
- comparison_value = m.group('value')
- op = STR_OPERATORS[m.group('op')]
-
- if not m:
- raise ValueError('Invalid filter specification %r' % filter_spec)
-
- def _filter(f):
- actual_value = f.get(m.group('key'))
- if actual_value is None:
- return m.group('none_inclusive')
- return op(actual_value, comparison_value)
- return _filter
-
- def _default_format_spec(self, info_dict, download=True):
-
- def can_merge():
- merger = FFmpegMergerPP(self)
- return merger.available and merger.can_merge()
-
- def prefer_best():
- if self.params.get('simulate', False):
- return False
- if not download:
- return False
- if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
- return True
- if info_dict.get('is_live'):
- return True
- if not can_merge():
- return True
- return False
-
- req_format_list = ['bestvideo+bestaudio', 'best']
- if prefer_best():
- req_format_list.reverse()
- return '/'.join(req_format_list)
-
- def build_format_selector(self, format_spec):
- def syntax_error(note, start):
- message = (
- 'Invalid format specification: '
- '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
- return SyntaxError(message)
-
- PICKFIRST = 'PICKFIRST'
- MERGE = 'MERGE'
- SINGLE = 'SINGLE'
- GROUP = 'GROUP'
- FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
-
- def _parse_filter(tokens):
- filter_parts = []
- for type, string, start, _, _ in tokens:
- if type == tokenize.OP and string == ']':
- return ''.join(filter_parts)
- else:
- filter_parts.append(string)
-
- def _remove_unused_ops(tokens):
- # Remove operators that we don't use and join them with the surrounding strings
- # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
- ALLOWED_OPS = ('/', '+', ',', '(', ')')
- last_string, last_start, last_end, last_line = None, None, None, None
- for type, string, start, end, line in tokens:
- if type == tokenize.OP and string == '[':
- if last_string:
- yield tokenize.NAME, last_string, last_start, last_end, last_line
- last_string = None
- yield type, string, start, end, line
- # everything inside brackets will be handled by _parse_filter
- for type, string, start, end, line in tokens:
- yield type, string, start, end, line
- if type == tokenize.OP and string == ']':
- break
- elif type == tokenize.OP and string in ALLOWED_OPS:
- if last_string:
- yield tokenize.NAME, last_string, last_start, last_end, last_line
- last_string = None
- yield type, string, start, end, line
- elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
- if not last_string:
- last_string = string
- last_start = start
- last_end = end
- else:
- last_string += string
- if last_string:
- yield tokenize.NAME, last_string, last_start, last_end, last_line
-
- def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
- selectors = []
- current_selector = None
- for type, string, start, _, _ in tokens:
- # ENCODING is only defined in python 3.x
- if type == getattr(tokenize, 'ENCODING', None):
- continue
- elif type in [tokenize.NAME, tokenize.NUMBER]:
- current_selector = FormatSelector(SINGLE, string, [])
- elif type == tokenize.OP:
- if string == ')':
- if not inside_group:
- # ')' will be handled by the parentheses group
- tokens.restore_last_token()
- break
- elif inside_merge and string in ['/', ',']:
- tokens.restore_last_token()
- break
- elif inside_choice and string == ',':
- tokens.restore_last_token()
- break
- elif string == ',':
- if not current_selector:
- raise syntax_error('"," must follow a format selector', start)
- selectors.append(current_selector)
- current_selector = None
- elif string == '/':
- if not current_selector:
- raise syntax_error('"/" must follow a format selector', start)
- first_choice = current_selector
- second_choice = _parse_format_selection(tokens, inside_choice=True)
- current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
- elif string == '[':
- if not current_selector:
- current_selector = FormatSelector(SINGLE, 'best', [])
- format_filter = _parse_filter(tokens)
- current_selector.filters.append(format_filter)
- elif string == '(':
- if current_selector:
- raise syntax_error('Unexpected "("', start)
- group = _parse_format_selection(tokens, inside_group=True)
- current_selector = FormatSelector(GROUP, group, [])
- elif string == '+':
- video_selector = current_selector
- audio_selector = _parse_format_selection(tokens, inside_merge=True)
- if not video_selector or not audio_selector:
- raise syntax_error('"+" must be between two format selectors', start)
- current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
- else:
- raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
- elif type == tokenize.ENDMARKER:
- break
- if current_selector:
- selectors.append(current_selector)
- return selectors
-
- def _build_selector_function(selector):
- if isinstance(selector, list):
- fs = [_build_selector_function(s) for s in selector]
-
- def selector_function(ctx):
- for f in fs:
- for format in f(ctx):
- yield format
- return selector_function
- elif selector.type == GROUP:
- selector_function = _build_selector_function(selector.selector)
- elif selector.type == PICKFIRST:
- fs = [_build_selector_function(s) for s in selector.selector]
-
- def selector_function(ctx):
- for f in fs:
- picked_formats = list(f(ctx))
- if picked_formats:
- return picked_formats
- return []
- elif selector.type == SINGLE:
- format_spec = selector.selector
-
- def selector_function(ctx):
- formats = list(ctx['formats'])
- if not formats:
- return
- if format_spec == 'all':
- for f in formats:
- yield f
- elif format_spec in ['best', 'worst', None]:
- format_idx = 0 if format_spec == 'worst' else -1
- audiovideo_formats = [
- f for f in formats
- if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
- if audiovideo_formats:
- yield audiovideo_formats[format_idx]
- # for extractors with incomplete formats (audio only (soundcloud)
- # or video only (imgur)) we will fallback to best/worst
- # {video,audio}-only format
- elif ctx['incomplete_formats']:
- yield formats[format_idx]
- elif format_spec == 'bestaudio':
- audio_formats = [
- f for f in formats
- if f.get('vcodec') == 'none']
- if audio_formats:
- yield audio_formats[-1]
- elif format_spec == 'worstaudio':
- audio_formats = [
- f for f in formats
- if f.get('vcodec') == 'none']
- if audio_formats:
- yield audio_formats[0]
- elif format_spec == 'bestvideo':
- video_formats = [
- f for f in formats
- if f.get('acodec') == 'none']
- if video_formats:
- yield video_formats[-1]
- elif format_spec == 'worstvideo':
- video_formats = [
- f for f in formats
- if f.get('acodec') == 'none']
- if video_formats:
- yield video_formats[0]
- else:
- extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
- if format_spec in extensions:
- filter_f = lambda f: f['ext'] == format_spec
- else:
- filter_f = lambda f: f['format_id'] == format_spec
- matches = list(filter(filter_f, formats))
- if matches:
- yield matches[-1]
- elif selector.type == MERGE:
- def _merge(formats_info):
- format_1, format_2 = [f['format_id'] for f in formats_info]
- # The first format must contain the video and the
- # second the audio
- if formats_info[0].get('vcodec') == 'none':
- self.report_error('The first format must '
- 'contain the video, try using '
- '"-f %s+%s"' % (format_2, format_1))
- return
- # Formats must be opposite (video+audio)
- if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
- self.report_error(
- 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
- % (format_1, format_2))
- return
- output_ext = (
- formats_info[0]['ext']
- if self.params.get('merge_output_format') is None
- else self.params['merge_output_format'])
- return {
- 'requested_formats': formats_info,
- 'format': '%s+%s' % (formats_info[0].get('format'),
- formats_info[1].get('format')),
- 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
- formats_info[1].get('format_id')),
- 'width': formats_info[0].get('width'),
- 'height': formats_info[0].get('height'),
- 'resolution': formats_info[0].get('resolution'),
- 'fps': formats_info[0].get('fps'),
- 'vcodec': formats_info[0].get('vcodec'),
- 'vbr': formats_info[0].get('vbr'),
- 'stretched_ratio': formats_info[0].get('stretched_ratio'),
- 'acodec': formats_info[1].get('acodec'),
- 'abr': formats_info[1].get('abr'),
- 'ext': output_ext,
- }
- video_selector, audio_selector = map(_build_selector_function, selector.selector)
-
- def selector_function(ctx):
- for pair in itertools.product(
- video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
- yield _merge(pair)
-
- filters = [self._build_format_filter(f) for f in selector.filters]
-
- def final_selector(ctx):
- ctx_copy = copy.deepcopy(ctx)
- for _filter in filters:
- ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
- return selector_function(ctx_copy)
- return final_selector
-
- stream = io.BytesIO(format_spec.encode('utf-8'))
- try:
- tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
- except tokenize.TokenError:
- raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
-
- class TokenIterator(object):
- def __init__(self, tokens):
- self.tokens = tokens
- self.counter = 0
-
- def __iter__(self):
- return self
-
- def __next__(self):
- if self.counter >= len(self.tokens):
- raise StopIteration()
- value = self.tokens[self.counter]
- self.counter += 1
- return value
-
- next = __next__
-
- def restore_last_token(self):
- self.counter -= 1
-
- parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
- return _build_selector_function(parsed_selector)
-
- def _calc_headers(self, info_dict):
- res = std_headers.copy()
-
- add_headers = info_dict.get('http_headers')
- if add_headers:
- res.update(add_headers)
-
- cookies = self._calc_cookies(info_dict)
- if cookies:
- res['Cookie'] = cookies
-
- if 'X-Forwarded-For' not in res:
- x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
- if x_forwarded_for_ip:
- res['X-Forwarded-For'] = x_forwarded_for_ip
-
- return res
-
- def _calc_cookies(self, info_dict):
- pr = sanitized_Request(info_dict['url'])
- self.cookiejar.add_cookie_header(pr)
- return pr.get_header('Cookie')
-
- def process_video_result(self, info_dict, download=True):
- assert info_dict.get('_type', 'video') == 'video'
-
- if 'id' not in info_dict:
- raise ExtractorError('Missing "id" field in extractor result')
- if 'title' not in info_dict:
- raise ExtractorError('Missing "title" field in extractor result')
-
- def report_force_conversion(field, field_not, conversion):
- self.report_warning(
- '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
- % (field, field_not, conversion))
-
- def sanitize_string_field(info, string_field):
- field = info.get(string_field)
- if field is None or isinstance(field, compat_str):
- return
- report_force_conversion(string_field, 'a string', 'string')
- info[string_field] = compat_str(field)
-
- def sanitize_numeric_fields(info):
- for numeric_field in self._NUMERIC_FIELDS:
- field = info.get(numeric_field)
- if field is None or isinstance(field, compat_numeric_types):
- continue
- report_force_conversion(numeric_field, 'numeric', 'int')
- info[numeric_field] = int_or_none(field)
-
- sanitize_string_field(info_dict, 'id')
- sanitize_numeric_fields(info_dict)
-
- if 'playlist' not in info_dict:
- # It isn't part of a playlist
- info_dict['playlist'] = None
- info_dict['playlist_index'] = None
-
- thumbnails = info_dict.get('thumbnails')
- if thumbnails is None:
- thumbnail = info_dict.get('thumbnail')
- if thumbnail:
- info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
- if thumbnails:
- thumbnails.sort(key=lambda t: (
- t.get('preference') if t.get('preference') is not None else -1,
- t.get('width') if t.get('width') is not None else -1,
- t.get('height') if t.get('height') is not None else -1,
- t.get('id') if t.get('id') is not None else '', t.get('url')))
- for i, t in enumerate(thumbnails):
- t['url'] = sanitize_url(t['url'])
- if t.get('width') and t.get('height'):
- t['resolution'] = '%dx%d' % (t['width'], t['height'])
- if t.get('id') is None:
- t['id'] = '%d' % i
-
- if self.params.get('list_thumbnails'):
- self.list_thumbnails(info_dict)
- return
-
- thumbnail = info_dict.get('thumbnail')
- if thumbnail:
- info_dict['thumbnail'] = sanitize_url(thumbnail)
- elif thumbnails:
- info_dict['thumbnail'] = thumbnails[-1]['url']
-
- if 'display_id' not in info_dict and 'id' in info_dict:
- info_dict['display_id'] = info_dict['id']
-
- if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
- # Working around out-of-range timestamp values (e.g. negative ones on Windows,
- # see http://bugs.python.org/issue1646728)
- try:
- upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
- info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
- except (ValueError, OverflowError, OSError):
- pass
-
- # Auto generate title fields corresponding to the *_number fields when missing
- # in order to always have clean titles. This is very common for TV series.
- for field in ('chapter', 'season', 'episode'):
- if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
- info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
-
- for cc_kind in ('subtitles', 'automatic_captions'):
- cc = info_dict.get(cc_kind)
- if cc:
- for _, subtitle in cc.items():
- for subtitle_format in subtitle:
- if subtitle_format.get('url'):
- subtitle_format['url'] = sanitize_url(subtitle_format['url'])
- if subtitle_format.get('ext') is None:
- subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
-
- automatic_captions = info_dict.get('automatic_captions')
- subtitles = info_dict.get('subtitles')
-
- if self.params.get('listsubtitles', False):
- if 'automatic_captions' in info_dict:
- self.list_subtitles(
- info_dict['id'], automatic_captions, 'automatic captions')
- self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
- return
-
- info_dict['requested_subtitles'] = self.process_subtitles(
- info_dict['id'], subtitles, automatic_captions)
-
- # We now pick which formats have to be downloaded
- if info_dict.get('formats') is None:
- # There's only one format available
- formats = [info_dict]
- else:
- formats = info_dict['formats']
-
- if not formats:
- raise ExtractorError('No video formats found!')
-
- def is_wellformed(f):
- url = f.get('url')
- if not url:
- self.report_warning(
- '"url" field is missing or empty - skipping format, '
- 'there is an error in extractor')
- return False
- if isinstance(url, bytes):
- sanitize_string_field(f, 'url')
- return True
-
- # Filter out malformed formats for better extraction robustness
- formats = list(filter(is_wellformed, formats))
-
- formats_dict = {}
-
- # We check that all the formats have the format and format_id fields
- for i, format in enumerate(formats):
- sanitize_string_field(format, 'format_id')
- sanitize_numeric_fields(format)
- format['url'] = sanitize_url(format['url'])
- if not format.get('format_id'):
- format['format_id'] = compat_str(i)
- else:
- # Sanitize format_id from characters used in format selector expression
- format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
- format_id = format['format_id']
- if format_id not in formats_dict:
- formats_dict[format_id] = []
- formats_dict[format_id].append(format)
-
- # Make sure all formats have unique format_id
- for format_id, ambiguous_formats in formats_dict.items():
- if len(ambiguous_formats) > 1:
- for i, format in enumerate(ambiguous_formats):
- format['format_id'] = '%s-%d' % (format_id, i)
-
- for i, format in enumerate(formats):
- if format.get('format') is None:
- format['format'] = '{id} - {res}{note}'.format(
- id=format['format_id'],
- res=self.format_resolution(format),
- note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
- )
- # Automatically determine file extension if missing
- if format.get('ext') is None:
- format['ext'] = determine_ext(format['url']).lower()
- # Automatically determine protocol if missing (useful for format
- # selection purposes)
- if format.get('protocol') is None:
- format['protocol'] = determine_protocol(format)
- # Add HTTP headers, so that external programs can use them from the
- # json output
- full_format_info = info_dict.copy()
- full_format_info.update(format)
- format['http_headers'] = self._calc_headers(full_format_info)
- # Remove private housekeeping stuff
- if '__x_forwarded_for_ip' in info_dict:
- del info_dict['__x_forwarded_for_ip']
-
- # TODO Central sorting goes here
-
- if formats[0] is not info_dict:
- # only set the 'formats' fields if the original info_dict list them
- # otherwise we end up with a circular reference, the first (and unique)
- # element in the 'formats' field in info_dict is info_dict itself,
- # which can't be exported to json
- info_dict['formats'] = formats
- if self.params.get('listformats'):
- self.list_formats(info_dict)
- return
-
- req_format = self.params.get('format')
- if req_format is None:
- req_format = self._default_format_spec(info_dict, download=download)
- if self.params.get('verbose'):
- self.to_stdout('[debug] Default format spec: %s' % req_format)
-
- format_selector = self.build_format_selector(req_format)
-
- # While in format selection we may need to have an access to the original
- # format set in order to calculate some metrics or do some processing.
- # For now we need to be able to guess whether original formats provided
- # by extractor are incomplete or not (i.e. whether extractor provides only
- # video-only or audio-only formats) for proper formats selection for
- # extractors with such incomplete formats (see
- # https://github.com/rg3/youtube-dl/pull/5556).
- # Since formats may be filtered during format selection and may not match
- # the original formats the results may be incorrect. Thus original formats
- # or pre-calculated metrics should be passed to format selection routines
- # as well.
- # We will pass a context object containing all necessary additional data
- # instead of just formats.
- # This fixes incorrect format selection issue (see
- # https://github.com/rg3/youtube-dl/issues/10083).
- incomplete_formats = (
- # All formats are video-only or
- all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
- # all formats are audio-only
- all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
-
- ctx = {
- 'formats': formats,
- 'incomplete_formats': incomplete_formats,
- }
-
- formats_to_download = list(format_selector(ctx))
- if not formats_to_download:
- raise ExtractorError('requested format not available',
- expected=True)
-
- if download:
- if len(formats_to_download) > 1:
- self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
- for format in formats_to_download:
- new_info = dict(info_dict)
- new_info.update(format)
- self.process_info(new_info)
- # We update the info dict with the best quality format (backwards compatibility)
- info_dict.update(formats_to_download[-1])
- return info_dict
-
- def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
- """Select the requested subtitles and their format"""
- available_subs = {}
- if normal_subtitles and self.params.get('writesubtitles'):
- available_subs.update(normal_subtitles)
- if automatic_captions and self.params.get('writeautomaticsub'):
- for lang, cap_info in automatic_captions.items():
- if lang not in available_subs:
- available_subs[lang] = cap_info
-
- if (not self.params.get('writesubtitles') and not
- self.params.get('writeautomaticsub') or not
- available_subs):
- return None
-
- if self.params.get('allsubtitles', False):
- requested_langs = available_subs.keys()
- else:
- if self.params.get('subtitleslangs', False):
- requested_langs = self.params.get('subtitleslangs')
- elif 'en' in available_subs:
- requested_langs = ['en']
- else:
- requested_langs = [list(available_subs.keys())[0]]
-
- formats_query = self.params.get('subtitlesformat', 'best')
- formats_preference = formats_query.split('/') if formats_query else []
- subs = {}
- for lang in requested_langs:
- formats = available_subs.get(lang)
- if formats is None:
- self.report_warning('%s subtitles not available for %s' % (lang, video_id))
- continue
- for ext in formats_preference:
- if ext == 'best':
- f = formats[-1]
- break
- matches = list(filter(lambda f: f['ext'] == ext, formats))
- if matches:
- f = matches[-1]
- break
- else:
- f = formats[-1]
- self.report_warning(
- 'No subtitle format found matching "%s" for language %s, '
- 'using %s' % (formats_query, lang, f['ext']))
- subs[lang] = f
- return subs
-
- def process_info(self, info_dict):
- """Process a single resolved IE result."""
-
- assert info_dict.get('_type', 'video') == 'video'
-
- max_downloads = self.params.get('max_downloads')
- if max_downloads is not None:
- if self._num_downloads >= int(max_downloads):
- raise MaxDownloadsReached()
-
- info_dict['fulltitle'] = info_dict['title']
- if len(info_dict['title']) > 200:
- info_dict['title'] = info_dict['title'][:197] + '...'
-
- if 'format' not in info_dict:
- info_dict['format'] = info_dict['ext']
-
- reason = self._match_entry(info_dict, incomplete=False)
- if reason is not None:
- self.to_screen('[download] ' + reason)
- return
-
- self._num_downloads += 1
-
- info_dict['_filename'] = filename = self.prepare_filename(info_dict)
-
- # Forced printings
- if self.params.get('forcetitle', False):
- self.to_stdout(info_dict['fulltitle'])
- if self.params.get('forceid', False):
- self.to_stdout(info_dict['id'])
- if self.params.get('forceurl', False):
- if info_dict.get('requested_formats') is not None:
- for f in info_dict['requested_formats']:
- self.to_stdout(f['url'] + f.get('play_path', ''))
- else:
- # For RTMP URLs, also include the playpath
- self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
- if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
- self.to_stdout(info_dict['thumbnail'])
- if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
- self.to_stdout(info_dict['description'])
- if self.params.get('forcefilename', False) and filename is not None:
- self.to_stdout(filename)
- if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
- self.to_stdout(formatSeconds(info_dict['duration']))
- if self.params.get('forceformat', False):
- self.to_stdout(info_dict['format'])
- if self.params.get('forcejson', False):
- self.to_stdout(json.dumps(info_dict))
-
- # Do nothing else if in simulate mode
- if self.params.get('simulate', False):
- return
-
- if filename is None:
- return
-
- def ensure_dir_exists(path):
- try:
- dn = os.path.dirname(path)
- if dn and not os.path.exists(dn):
- os.makedirs(dn)
- return True
- except (OSError, IOError) as err:
- self.report_error('unable to create directory ' + error_to_compat_str(err))
- return False
-
- if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
- return
-
- if self.params.get('writedescription', False):
- descfn = replace_extension(filename, 'description', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
- self.to_screen('[info] Video description is already present')
- elif info_dict.get('description') is None:
- self.report_warning('There\'s no description to write.')
- else:
- try:
- self.to_screen('[info] Writing video description to: ' + descfn)
- with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
- descfile.write(info_dict['description'])
- except (OSError, IOError):
- self.report_error('Cannot write description file ' + descfn)
- return
-
- if self.params.get('writeannotations', False):
- annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
- self.to_screen('[info] Video annotations are already present')
- else:
- try:
- self.to_screen('[info] Writing video annotations to: ' + annofn)
- with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
- annofile.write(info_dict['annotations'])
- except (KeyError, TypeError):
- self.report_warning('There are no annotations to write.')
- except (OSError, IOError):
- self.report_error('Cannot write annotations file: ' + annofn)
- return
-
- subtitles_are_requested = any([self.params.get('writesubtitles', False),
- self.params.get('writeautomaticsub')])
-
- if subtitles_are_requested and info_dict.get('requested_subtitles'):
- # subtitles download errors are already managed as troubles in relevant IE
- # that way it will silently go on when used with unsupporting IE
- subtitles = info_dict['requested_subtitles']
- ie = self.get_info_extractor(info_dict['extractor_key'])
- for sub_lang, sub_info in subtitles.items():
- sub_format = sub_info['ext']
- sub_filename = subtitles_filename(filename, sub_lang, sub_format)
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
- self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
- else:
- self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
- if sub_info.get('data') is not None:
- try:
- # Use newline='' to prevent conversion of newline characters
- # See https://github.com/rg3/youtube-dl/issues/10268
- with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
- subfile.write(sub_info['data'])
- except (OSError, IOError):
- self.report_error('Cannot write subtitles file ' + sub_filename)
- return
- else:
- try:
- sub_data = ie._request_webpage(
- sub_info['url'], info_dict['id'], note=False).read()
- with io.open(encodeFilename(sub_filename), 'wb') as subfile:
- subfile.write(sub_data)
- except (ExtractorError, IOError, OSError, ValueError) as err:
- self.report_warning('Unable to download subtitle for "%s": %s' %
- (sub_lang, error_to_compat_str(err)))
- continue
-
- if self.params.get('writeinfojson', False):
- infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
- self.to_screen('[info] Video description metadata is already present')
- else:
- self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
- try:
- write_json_file(self.filter_requested_info(info_dict), infofn)
- except (OSError, IOError):
- self.report_error('Cannot write metadata to JSON file ' + infofn)
- return
-
- self._write_thumbnails(info_dict, filename)
-
- if not self.params.get('skip_download', False):
- try:
- def dl(name, info):
- fd = get_suitable_downloader(info, self.params)(self, self.params)
- for ph in self._progress_hooks:
- fd.add_progress_hook(ph)
- if self.params.get('verbose'):
- self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
- return fd.download(name, info)
-
- if info_dict.get('requested_formats') is not None:
- downloaded = []
- success = True
- merger = FFmpegMergerPP(self)
- if not merger.available:
- postprocessors = []
- self.report_warning('You have requested multiple '
- 'formats but ffmpeg or avconv are not installed.'
- ' The formats won\'t be merged.')
- else:
- postprocessors = [merger]
-
- def compatible_formats(formats):
- video, audio = formats
- # Check extension
- video_ext, audio_ext = video.get('ext'), audio.get('ext')
- if video_ext and audio_ext:
- COMPATIBLE_EXTS = (
- ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
- ('webm')
- )
- for exts in COMPATIBLE_EXTS:
- if video_ext in exts and audio_ext in exts:
- return True
- # TODO: Check acodec/vcodec
- return False
-
- filename_real_ext = os.path.splitext(filename)[1][1:]
- filename_wo_ext = (
- os.path.splitext(filename)[0]
- if filename_real_ext == info_dict['ext']
- else filename)
- requested_formats = info_dict['requested_formats']
- if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
- info_dict['ext'] = 'mkv'
- self.report_warning(
- 'Requested formats are incompatible for merge and will be merged into mkv.')
- # Ensure filename always has a correct extension for successful merge
- filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
- if os.path.exists(encodeFilename(filename)):
- self.to_screen(
- '[download] %s has already been downloaded and '
- 'merged' % filename)
- else:
- for f in requested_formats:
- new_info = dict(info_dict)
- new_info.update(f)
- fname = prepend_extension(
- self.prepare_filename(new_info),
- 'f%s' % f['format_id'], new_info['ext'])
- if not ensure_dir_exists(fname):
- return
- downloaded.append(fname)
- partial_success = dl(fname, new_info)
- success = success and partial_success
- info_dict['__postprocessors'] = postprocessors
- info_dict['__files_to_merge'] = downloaded
- else:
- # Just a single file
- success = dl(filename, info_dict)
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self.report_error('unable to download video data: %s' % error_to_compat_str(err))
- return
- except (OSError, IOError) as err:
- raise UnavailableVideoError(err)
- except (ContentTooShortError, ) as err:
- self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
- return
-
- if success and filename != '-':
- # Fixup content
- fixup_policy = self.params.get('fixup')
- if fixup_policy is None:
- fixup_policy = 'detect_or_warn'
-
- INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
-
- stretched_ratio = info_dict.get('stretched_ratio')
- if stretched_ratio is not None and stretched_ratio != 1:
- if fixup_policy == 'warn':
- self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
- info_dict['id'], stretched_ratio))
- elif fixup_policy == 'detect_or_warn':
- stretched_pp = FFmpegFixupStretchedPP(self)
- if stretched_pp.available:
- info_dict.setdefault('__postprocessors', [])
- info_dict['__postprocessors'].append(stretched_pp)
- else:
- self.report_warning(
- '%s: Non-uniform pixel ratio (%s). %s'
- % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
- else:
- assert fixup_policy in ('ignore', 'never')
-
- if (info_dict.get('requested_formats') is None and
- info_dict.get('container') == 'm4a_dash'):
- if fixup_policy == 'warn':
- self.report_warning(
- '%s: writing DASH m4a. '
- 'Only some players support this container.'
- % info_dict['id'])
- elif fixup_policy == 'detect_or_warn':
- fixup_pp = FFmpegFixupM4aPP(self)
- if fixup_pp.available:
- info_dict.setdefault('__postprocessors', [])
- info_dict['__postprocessors'].append(fixup_pp)
- else:
- self.report_warning(
- '%s: writing DASH m4a. '
- 'Only some players support this container. %s'
- % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
- else:
- assert fixup_policy in ('ignore', 'never')
-
- if (info_dict.get('protocol') == 'm3u8_native' or
- info_dict.get('protocol') == 'm3u8' and
- self.params.get('hls_prefer_native')):
- if fixup_policy == 'warn':
- self.report_warning('%s: malformed AAC bitstream detected.' % (
- info_dict['id']))
- elif fixup_policy == 'detect_or_warn':
- fixup_pp = FFmpegFixupM3u8PP(self)
- if fixup_pp.available:
- info_dict.setdefault('__postprocessors', [])
- info_dict['__postprocessors'].append(fixup_pp)
- else:
- self.report_warning(
- '%s: malformed AAC bitstream detected. %s'
- % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
- else:
- assert fixup_policy in ('ignore', 'never')
-
- try:
- self.post_process(filename, info_dict)
- except (PostProcessingError) as err:
- self.report_error('postprocessing: %s' % str(err))
- return
- self.record_download_archive(info_dict)
-
- def download(self, url_list):
- """Download a given list of URLs."""
- outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
- if (len(url_list) > 1 and
- outtmpl != '-' and
- '%' not in outtmpl and
- self.params.get('max_downloads') != 1):
- raise SameFileError(outtmpl)
-
- for url in url_list:
- try:
- # It also downloads the videos
- res = self.extract_info(
- url, force_generic_extractor=self.params.get('force_generic_extractor', False))
- except UnavailableVideoError:
- self.report_error('unable to download video')
- except MaxDownloadsReached:
- self.to_screen('[info] Maximum number of downloaded files reached.')
- raise
- else:
- if self.params.get('dump_single_json', False):
- self.to_stdout(json.dumps(res))
-
- return self._download_retcode
-
- def download_with_info_file(self, info_filename):
- with contextlib.closing(fileinput.FileInput(
- [info_filename], mode='r',
- openhook=fileinput.hook_encoded('utf-8'))) as f:
- # FileInput doesn't have a read method, we can't call json.load
- info = self.filter_requested_info(json.loads('\n'.join(f)))
- try:
- self.process_ie_result(info, download=True)
- except DownloadError:
- webpage_url = info.get('webpage_url')
- if webpage_url is not None:
- self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
- return self.download([webpage_url])
- else:
- raise
- return self._download_retcode
-
- @staticmethod
- def filter_requested_info(info_dict):
- return dict(
- (k, v) for k, v in info_dict.items()
- if k not in ['requested_formats', 'requested_subtitles'])
-
- def post_process(self, filename, ie_info):
- """Run all the postprocessors on the given file."""
- info = dict(ie_info)
- info['filepath'] = filename
- pps_chain = []
- if ie_info.get('__postprocessors') is not None:
- pps_chain.extend(ie_info['__postprocessors'])
- pps_chain.extend(self._pps)
- for pp in pps_chain:
- files_to_delete = []
- try:
- files_to_delete, info = pp.run(info)
- except PostProcessingError as e:
- self.report_error(e.msg)
- if files_to_delete and not self.params.get('keepvideo', False):
- for old_filename in files_to_delete:
- self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
- try:
- os.remove(encodeFilename(old_filename))
- except (IOError, OSError):
- self.report_warning('Unable to remove downloaded original file')
-
- def _make_archive_id(self, info_dict):
- # Future-proof against any change in case
- # and backwards compatibility with prior versions
- extractor = info_dict.get('extractor_key')
- if extractor is None:
- if 'id' in info_dict:
- extractor = info_dict.get('ie_key') # key in a playlist
- if extractor is None:
- return None # Incomplete video information
- return extractor.lower() + ' ' + info_dict['id']
-
- def in_download_archive(self, info_dict):
- fn = self.params.get('download_archive')
- if fn is None:
- return False
-
- vid_id = self._make_archive_id(info_dict)
- if vid_id is None:
- return False # Incomplete video information
-
- try:
- with locked_file(fn, 'r', encoding='utf-8') as archive_file:
- for line in archive_file:
- if line.strip() == vid_id:
- return True
- except IOError as ioe:
- if ioe.errno != errno.ENOENT:
- raise
- return False
-
- def record_download_archive(self, info_dict):
- fn = self.params.get('download_archive')
- if fn is None:
- return
- vid_id = self._make_archive_id(info_dict)
- assert vid_id
- with locked_file(fn, 'a', encoding='utf-8') as archive_file:
- archive_file.write(vid_id + '\n')
-
- @staticmethod
- def format_resolution(format, default='unknown'):
- if format.get('vcodec') == 'none':
- return 'audio only'
- if format.get('resolution') is not None:
- return format['resolution']
- if format.get('height') is not None:
- if format.get('width') is not None:
- res = '%sx%s' % (format['width'], format['height'])
- else:
- res = '%sp' % format['height']
- elif format.get('width') is not None:
- res = '%dx?' % format['width']
- else:
- res = default
- return res
-
- def _format_note(self, fdict):
- res = ''
- if fdict.get('ext') in ['f4f', 'f4m']:
- res += '(unsupported) '
- if fdict.get('language'):
- if res:
- res += ' '
- res += '[%s] ' % fdict['language']
- if fdict.get('format_note') is not None:
- res += fdict['format_note'] + ' '
- if fdict.get('tbr') is not None:
- res += '%4dk ' % fdict['tbr']
- if fdict.get('container') is not None:
- if res:
- res += ', '
- res += '%s container' % fdict['container']
- if (fdict.get('vcodec') is not None and
- fdict.get('vcodec') != 'none'):
- if res:
- res += ', '
- res += fdict['vcodec']
- if fdict.get('vbr') is not None:
- res += '@'
- elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
- res += 'video@'
- if fdict.get('vbr') is not None:
- res += '%4dk' % fdict['vbr']
- if fdict.get('fps') is not None:
- if res:
- res += ', '
- res += '%sfps' % fdict['fps']
- if fdict.get('acodec') is not None:
- if res:
- res += ', '
- if fdict['acodec'] == 'none':
- res += 'video only'
- else:
- res += '%-5s' % fdict['acodec']
- elif fdict.get('abr') is not None:
- if res:
- res += ', '
- res += 'audio'
- if fdict.get('abr') is not None:
- res += '@%3dk' % fdict['abr']
- if fdict.get('asr') is not None:
- res += ' (%5dHz)' % fdict['asr']
- if fdict.get('filesize') is not None:
- if res:
- res += ', '
- res += format_bytes(fdict['filesize'])
- elif fdict.get('filesize_approx') is not None:
- if res:
- res += ', '
- res += '~' + format_bytes(fdict['filesize_approx'])
- return res
-
- def list_formats(self, info_dict):
- formats = info_dict.get('formats', [info_dict])
- table = [
- [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
- for f in formats
- if f.get('preference') is None or f['preference'] >= -1000]
- if len(formats) > 1:
- table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
-
- header_line = ['format code', 'extension', 'resolution', 'note']
- self.to_screen(
- '[info] Available formats for %s:\n%s' %
- (info_dict['id'], render_table(header_line, table)))
-
- def list_thumbnails(self, info_dict):
- thumbnails = info_dict.get('thumbnails')
- if not thumbnails:
- self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
- return
-
- self.to_screen(
- '[info] Thumbnails for %s:' % info_dict['id'])
- self.to_screen(render_table(
- ['ID', 'width', 'height', 'URL'],
- [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
-
- def list_subtitles(self, video_id, subtitles, name='subtitles'):
- if not subtitles:
- self.to_screen('%s has no %s' % (video_id, name))
- return
- self.to_screen(
- 'Available %s for %s:' % (name, video_id))
- self.to_screen(render_table(
- ['Language', 'formats'],
- [[lang, ', '.join(f['ext'] for f in reversed(formats))]
- for lang, formats in subtitles.items()]))
-
- def urlopen(self, req):
- """ Start an HTTP download """
- if isinstance(req, compat_basestring):
- req = sanitized_Request(req)
- return self._opener.open(req, timeout=self._socket_timeout)
-
- def print_debug_header(self):
- if not self.params.get('verbose'):
- return
-
- if type('') is not compat_str:
- # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
- self.report_warning(
- 'Your Python is broken! Update to a newer and supported version')
-
- stdout_encoding = getattr(
- sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
- encoding_str = (
- '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
- locale.getpreferredencoding(),
- sys.getfilesystemencoding(),
- stdout_encoding,
- self.get_encoding()))
- write_string(encoding_str, encoding=None)
-
- self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
- if _LAZY_LOADER:
- self._write_string('[debug] Lazy loading extractors enabled' + '\n')
- try:
- sp = subprocess.Popen(
- ['git', 'rev-parse', '--short', 'HEAD'],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE,
- cwd=os.path.dirname(os.path.abspath(__file__)))
- out, err = sp.communicate()
- out = out.decode().strip()
- if re.match('[0-9a-f]+', out):
- self._write_string('[debug] Git HEAD: ' + out + '\n')
- except Exception:
- try:
- sys.exc_clear()
- except Exception:
- pass
-
- def python_implementation():
- impl_name = platform.python_implementation()
- if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
- return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
- return impl_name
-
- self._write_string('[debug] Python version %s (%s) - %s\n' % (
- platform.python_version(), python_implementation(),
- platform_name()))
-
- exe_versions = FFmpegPostProcessor.get_versions(self)
- exe_versions['rtmpdump'] = rtmpdump_version()
- exe_versions['phantomjs'] = PhantomJSwrapper._version()
- exe_str = ', '.join(
- '%s %s' % (exe, v)
- for exe, v in sorted(exe_versions.items())
- if v
- )
- if not exe_str:
- exe_str = 'none'
- self._write_string('[debug] exe versions: %s\n' % exe_str)
-
- proxy_map = {}
- for handler in self._opener.handlers:
- if hasattr(handler, 'proxies'):
- proxy_map.update(handler.proxies)
- self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
-
- if self.params.get('call_home', False):
- ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
- self._write_string('[debug] Public IP address: %s\n' % ipaddr)
- latest_version = self.urlopen(
- 'https://yt-dl.org/latest/version').read().decode('utf-8')
- if version_tuple(latest_version) > version_tuple(__version__):
- self.report_warning(
- 'You are using an outdated version (newest version: %s)! '
- 'See https://yt-dl.org/update if you need help updating.' %
- latest_version)
-
- def _setup_opener(self):
- timeout_val = self.params.get('socket_timeout')
- self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
-
- opts_cookiefile = self.params.get('cookiefile')
- opts_proxy = self.params.get('proxy')
-
- if opts_cookiefile is None:
- self.cookiejar = compat_cookiejar.CookieJar()
- else:
- opts_cookiefile = expand_path(opts_cookiefile)
- self.cookiejar = compat_cookiejar.MozillaCookieJar(
- opts_cookiefile)
- if os.access(opts_cookiefile, os.R_OK):
- self.cookiejar.load()
-
- cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
- if opts_proxy is not None:
- if opts_proxy == '':
- proxies = {}
- else:
- proxies = {'http': opts_proxy, 'https': opts_proxy}
- else:
- proxies = compat_urllib_request.getproxies()
- # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
- if 'http' in proxies and 'https' not in proxies:
- proxies['https'] = proxies['http']
- proxy_handler = PerRequestProxyHandler(proxies)
-
- debuglevel = 1 if self.params.get('debug_printtraffic') else 0
- https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
- ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
- data_handler = compat_urllib_request_DataHandler()
-
- # When passing our own FileHandler instance, build_opener won't add the
- # default FileHandler and allows us to disable the file protocol, which
- # can be used for malicious purposes (see
- # https://github.com/rg3/youtube-dl/issues/8227)
- file_handler = compat_urllib_request.FileHandler()
-
- def file_open(*args, **kwargs):
- raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
- file_handler.file_open = file_open
-
- opener = compat_urllib_request.build_opener(
- proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
-
- # Delete the default user-agent header, which would otherwise apply in
- # cases where our custom HTTP handler doesn't come into play
- # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
- opener.addheaders = []
- self._opener = opener
-
- def encode(self, s):
- if isinstance(s, bytes):
- return s # Already encoded
-
- try:
- return s.encode(self.get_encoding())
- except UnicodeEncodeError as err:
- err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
- raise
-
- def get_encoding(self):
- encoding = self.params.get('encoding')
- if encoding is None:
- encoding = preferredencoding()
- return encoding
-
- def _write_thumbnails(self, info_dict, filename):
- if self.params.get('writethumbnail', False):
- thumbnails = info_dict.get('thumbnails')
- if thumbnails:
- thumbnails = [thumbnails[-1]]
- elif self.params.get('write_all_thumbnails', False):
- thumbnails = info_dict.get('thumbnails')
- else:
- return
-
- if not thumbnails:
- # No thumbnails present, so return immediately
- return
-
- for t in thumbnails:
- thumb_ext = determine_ext(t['url'], 'jpg')
- suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
- thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
- t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
-
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
- self.to_screen('[%s] %s: Thumbnail %sis already present' %
- (info_dict['extractor'], info_dict['id'], thumb_display_id))
- else:
- self.to_screen('[%s] %s: Downloading thumbnail %s...' %
- (info_dict['extractor'], info_dict['id'], thumb_display_id))
- try:
- uf = self.urlopen(t['url'])
- with open(encodeFilename(thumb_filename), 'wb') as thumbf:
- shutil.copyfileobj(uf, thumbf)
- self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
- (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self.report_warning('Unable to download thumbnail "%s": %s' %
- (t['url'], error_to_compat_str(err)))
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
deleted file mode 100644
index ba435ea..0000000
--- a/youtube_dl/__init__.py
+++ /dev/null
@@ -1,481 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-__license__ = 'Public Domain'
-
-import codecs
-import io
-import os
-import random
-import sys
-
-
-from .options import (
- parseOpts,
-)
-from .compat import (
- compat_getpass,
- compat_shlex_split,
- workaround_optparse_bug9161,
-)
-from .utils import (
- DateRange,
- decodeOption,
- DEFAULT_OUTTMPL,
- DownloadError,
- expand_path,
- match_filter_func,
- MaxDownloadsReached,
- preferredencoding,
- read_batch_urls,
- SameFileError,
- setproctitle,
- std_headers,
- write_string,
- render_table,
-)
-from .update import update_self
-from .downloader import (
- FileDownloader,
-)
-from .extractor import gen_extractors, list_extractors
-from .extractor.adobepass import MSO_INFO
-from .YoutubeDL import YoutubeDL
-
-
-def _real_main(argv=None):
- # Compatibility fixes for Windows
- if sys.platform == 'win32':
- # https://github.com/rg3/youtube-dl/issues/820
- codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
-
- workaround_optparse_bug9161()
-
- setproctitle('youtube-dl')
-
- parser, opts, args = parseOpts(argv)
-
- # Set user agent
- if opts.user_agent is not None:
- std_headers['User-Agent'] = opts.user_agent
-
- # Set referer
- if opts.referer is not None:
- std_headers['Referer'] = opts.referer
-
- # Custom HTTP headers
- if opts.headers is not None:
- for h in opts.headers:
- if ':' not in h:
- parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
- key, value = h.split(':', 1)
- if opts.verbose:
- write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
- std_headers[key] = value
-
- # Dump user agent
- if opts.dump_user_agent:
- write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
- sys.exit(0)
-
- # Batch file verification
- batch_urls = []
- if opts.batchfile is not None:
- try:
- if opts.batchfile == '-':
- batchfd = sys.stdin
- else:
- batchfd = io.open(
- expand_path(opts.batchfile),
- 'r', encoding='utf-8', errors='ignore')
- batch_urls = read_batch_urls(batchfd)
- if opts.verbose:
- write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
- except IOError:
- sys.exit('ERROR: batch file could not be read')
- all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
- _enc = preferredencoding()
- all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
-
- if opts.list_extractors:
- for ie in list_extractors(opts.age_limit):
- write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
- matchedUrls = [url for url in all_urls if ie.suitable(url)]
- for mu in matchedUrls:
- write_string(' ' + mu + '\n', out=sys.stdout)
- sys.exit(0)
- if opts.list_extractor_descriptions:
- for ie in list_extractors(opts.age_limit):
- if not ie._WORKING:
- continue
- desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
- if desc is False:
- continue
- if hasattr(ie, 'SEARCH_KEY'):
- _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
- _COUNTS = ('', '5', '10', 'all')
- desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
- write_string(desc + '\n', out=sys.stdout)
- sys.exit(0)
- if opts.ap_list_mso:
- table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]
- write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout)
- sys.exit(0)
-
- # Conflicting, missing and erroneous options
- if opts.usenetrc and (opts.username is not None or opts.password is not None):
- parser.error('using .netrc conflicts with giving username/password')
- if opts.password is not None and opts.username is None:
- parser.error('account username missing\n')
- if opts.ap_password is not None and opts.ap_username is None:
- parser.error('TV Provider account username missing\n')
- if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
- parser.error('using output template conflicts with using title, video ID or auto number')
- if opts.autonumber_size is not None:
- if opts.autonumber_size <= 0:
- parser.error('auto number size must be positive')
- if opts.autonumber_start is not None:
- if opts.autonumber_start < 0:
- parser.error('auto number start must be positive or 0')
- if opts.usetitle and opts.useid:
- parser.error('using title conflicts with using video ID')
- if opts.username is not None and opts.password is None:
- opts.password = compat_getpass('Type account password and press [Return]: ')
- if opts.ap_username is not None and opts.ap_password is None:
- opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
- if opts.ratelimit is not None:
- numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
- if numeric_limit is None:
- parser.error('invalid rate limit specified')
- opts.ratelimit = numeric_limit
- if opts.min_filesize is not None:
- numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
- if numeric_limit is None:
- parser.error('invalid min_filesize specified')
- opts.min_filesize = numeric_limit
- if opts.max_filesize is not None:
- numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
- if numeric_limit is None:
- parser.error('invalid max_filesize specified')
- opts.max_filesize = numeric_limit
- if opts.sleep_interval is not None:
- if opts.sleep_interval < 0:
- parser.error('sleep interval must be positive or 0')
- if opts.max_sleep_interval is not None:
- if opts.max_sleep_interval < 0:
- parser.error('max sleep interval must be positive or 0')
- if opts.max_sleep_interval < opts.sleep_interval:
- parser.error('max sleep interval must be greater than or equal to min sleep interval')
- else:
- opts.max_sleep_interval = opts.sleep_interval
- if opts.ap_mso and opts.ap_mso not in MSO_INFO:
- parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
-
- def parse_retries(retries):
- if retries in ('inf', 'infinite'):
- parsed_retries = float('inf')
- else:
- try:
- parsed_retries = int(retries)
- except (TypeError, ValueError):
- parser.error('invalid retry count specified')
- return parsed_retries
- if opts.retries is not None:
- opts.retries = parse_retries(opts.retries)
- if opts.fragment_retries is not None:
- opts.fragment_retries = parse_retries(opts.fragment_retries)
- if opts.buffersize is not None:
- numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
- if numeric_buffersize is None:
- parser.error('invalid buffer size specified')
- opts.buffersize = numeric_buffersize
- if opts.http_chunk_size is not None:
- numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size)
- if not numeric_chunksize:
- parser.error('invalid http chunk size specified')
- opts.http_chunk_size = numeric_chunksize
- if opts.playliststart <= 0:
- raise ValueError('Playlist start must be positive')
- if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
- raise ValueError('Playlist end must be greater than playlist start')
- if opts.extractaudio:
- if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
- parser.error('invalid audio format specified')
- if opts.audioquality:
- opts.audioquality = opts.audioquality.strip('k').strip('K')
- if not opts.audioquality.isdigit():
- parser.error('invalid audio quality specified')
- if opts.recodevideo is not None:
- if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
- parser.error('invalid video recode format specified')
- if opts.convertsubtitles is not None:
- if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
- parser.error('invalid subtitle format specified')
-
- if opts.date is not None:
- date = DateRange.day(opts.date)
- else:
- date = DateRange(opts.dateafter, opts.datebefore)
-
- # Do not download videos when there are audio-only formats
- if opts.extractaudio and not opts.keepvideo and opts.format is None:
- opts.format = 'bestaudio/best'
-
- # --all-sub automatically sets --write-sub if --write-auto-sub is not given
- # this was the old behaviour if only --all-sub was given.
- if opts.allsubtitles and not opts.writeautomaticsub:
- opts.writesubtitles = True
-
- outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
- (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
- (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
- (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
- (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
- (opts.useid and '%(id)s.%(ext)s') or
- (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
- DEFAULT_OUTTMPL)
- if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
- parser.error('Cannot download a video and extract audio into the same'
- ' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
- ' template'.format(outtmpl))
-
- any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
- any_printing = opts.print_json
- download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
-
- # PostProcessors
- postprocessors = []
- if opts.metafromtitle:
- postprocessors.append({
- 'key': 'MetadataFromTitle',
- 'titleformat': opts.metafromtitle
- })
- if opts.extractaudio:
- postprocessors.append({
- 'key': 'FFmpegExtractAudio',
- 'preferredcodec': opts.audioformat,
- 'preferredquality': opts.audioquality,
- 'nopostoverwrites': opts.nopostoverwrites,
- })
- if opts.recodevideo:
- postprocessors.append({
- 'key': 'FFmpegVideoConvertor',
- 'preferedformat': opts.recodevideo,
- })
- # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
- # FFmpegExtractAudioPP as containers before conversion may not support
- # metadata (3gp, webm, etc.)
- # And this post-processor should be placed before other metadata
- # manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of
- # extra metadata. By default ffmpeg preserves metadata applicable for both
- # source and target containers. From this point the container won't change,
- # so metadata can be added here.
- if opts.addmetadata:
- postprocessors.append({'key': 'FFmpegMetadata'})
- if opts.convertsubtitles:
- postprocessors.append({
- 'key': 'FFmpegSubtitlesConvertor',
- 'format': opts.convertsubtitles,
- })
- if opts.embedsubtitles:
- postprocessors.append({
- 'key': 'FFmpegEmbedSubtitle',
- })
- if opts.embedthumbnail:
- already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
- postprocessors.append({
- 'key': 'EmbedThumbnail',
- 'already_have_thumbnail': already_have_thumbnail
- })
- if not already_have_thumbnail:
- opts.writethumbnail = True
- # XAttrMetadataPP should be run after post-processors that may change file
- # contents
- if opts.xattrs:
- postprocessors.append({'key': 'XAttrMetadata'})
- # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
- # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
- if opts.exec_cmd:
- postprocessors.append({
- 'key': 'ExecAfterDownload',
- 'exec_cmd': opts.exec_cmd,
- })
- external_downloader_args = None
- if opts.external_downloader_args:
- external_downloader_args = compat_shlex_split(opts.external_downloader_args)
- postprocessor_args = None
- if opts.postprocessor_args:
- postprocessor_args = compat_shlex_split(opts.postprocessor_args)
- match_filter = (
- None if opts.match_filter is None
- else match_filter_func(opts.match_filter))
-
- ydl_opts = {
- 'usenetrc': opts.usenetrc,
- 'username': opts.username,
- 'password': opts.password,
- 'twofactor': opts.twofactor,
- 'videopassword': opts.videopassword,
- 'ap_mso': opts.ap_mso,
- 'ap_username': opts.ap_username,
- 'ap_password': opts.ap_password,
- 'quiet': (opts.quiet or any_getting or any_printing),
- 'no_warnings': opts.no_warnings,
- 'forceurl': opts.geturl,
- 'forcetitle': opts.gettitle,
- 'forceid': opts.getid,
- 'forcethumbnail': opts.getthumbnail,
- 'forcedescription': opts.getdescription,
- 'forceduration': opts.getduration,
- 'forcefilename': opts.getfilename,
- 'forceformat': opts.getformat,
- 'forcejson': opts.dumpjson or opts.print_json,
- 'dump_single_json': opts.dump_single_json,
- 'simulate': opts.simulate or any_getting,
- 'skip_download': opts.skip_download,
- 'format': opts.format,
- 'listformats': opts.listformats,
- 'outtmpl': outtmpl,
- 'autonumber_size': opts.autonumber_size,
- 'autonumber_start': opts.autonumber_start,
- 'restrictfilenames': opts.restrictfilenames,
- 'ignoreerrors': opts.ignoreerrors,
- 'force_generic_extractor': opts.force_generic_extractor,
- 'ratelimit': opts.ratelimit,
- 'nooverwrites': opts.nooverwrites,
- 'retries': opts.retries,
- 'fragment_retries': opts.fragment_retries,
- 'skip_unavailable_fragments': opts.skip_unavailable_fragments,
- 'keep_fragments': opts.keep_fragments,
- 'buffersize': opts.buffersize,
- 'noresizebuffer': opts.noresizebuffer,
- 'http_chunk_size': opts.http_chunk_size,
- 'continuedl': opts.continue_dl,
- 'noprogress': opts.noprogress,
- 'progress_with_newline': opts.progress_with_newline,
- 'playliststart': opts.playliststart,
- 'playlistend': opts.playlistend,
- 'playlistreverse': opts.playlist_reverse,
- 'playlistrandom': opts.playlist_random,
- 'noplaylist': opts.noplaylist,
- 'logtostderr': opts.outtmpl == '-',
- 'consoletitle': opts.consoletitle,
- 'nopart': opts.nopart,
- 'updatetime': opts.updatetime,
- 'writedescription': opts.writedescription,
- 'writeannotations': opts.writeannotations,
- 'writeinfojson': opts.writeinfojson,
- 'writethumbnail': opts.writethumbnail,
- 'write_all_thumbnails': opts.write_all_thumbnails,
- 'writesubtitles': opts.writesubtitles,
- 'writeautomaticsub': opts.writeautomaticsub,
- 'allsubtitles': opts.allsubtitles,
- 'listsubtitles': opts.listsubtitles,
- 'subtitlesformat': opts.subtitlesformat,
- 'subtitleslangs': opts.subtitleslangs,
- 'matchtitle': decodeOption(opts.matchtitle),
- 'rejecttitle': decodeOption(opts.rejecttitle),
- 'max_downloads': opts.max_downloads,
- 'prefer_free_formats': opts.prefer_free_formats,
- 'verbose': opts.verbose,
- 'dump_intermediate_pages': opts.dump_intermediate_pages,
- 'write_pages': opts.write_pages,
- 'test': opts.test,
- 'keepvideo': opts.keepvideo,
- 'min_filesize': opts.min_filesize,
- 'max_filesize': opts.max_filesize,
- 'min_views': opts.min_views,
- 'max_views': opts.max_views,
- 'daterange': date,
- 'cachedir': opts.cachedir,
- 'youtube_print_sig_code': opts.youtube_print_sig_code,
- 'age_limit': opts.age_limit,
- 'download_archive': download_archive_fn,
- 'cookiefile': opts.cookiefile,
- 'nocheckcertificate': opts.no_check_certificate,
- 'prefer_insecure': opts.prefer_insecure,
- 'proxy': opts.proxy,
- 'socket_timeout': opts.socket_timeout,
- 'bidi_workaround': opts.bidi_workaround,
- 'debug_printtraffic': opts.debug_printtraffic,
- 'prefer_ffmpeg': opts.prefer_ffmpeg,
- 'include_ads': opts.include_ads,
- 'default_search': opts.default_search,
- 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
- 'encoding': opts.encoding,
- 'extract_flat': opts.extract_flat,
- 'mark_watched': opts.mark_watched,
- 'merge_output_format': opts.merge_output_format,
- 'postprocessors': postprocessors,
- 'fixup': opts.fixup,
- 'source_address': opts.source_address,
- 'call_home': opts.call_home,
- 'sleep_interval': opts.sleep_interval,
- 'max_sleep_interval': opts.max_sleep_interval,
- 'external_downloader': opts.external_downloader,
- 'list_thumbnails': opts.list_thumbnails,
- 'playlist_items': opts.playlist_items,
- 'xattr_set_filesize': opts.xattr_set_filesize,
- 'match_filter': match_filter,
- 'no_color': opts.no_color,
- 'ffmpeg_location': opts.ffmpeg_location,
- 'hls_prefer_native': opts.hls_prefer_native,
- 'hls_use_mpegts': opts.hls_use_mpegts,
- 'external_downloader_args': external_downloader_args,
- 'postprocessor_args': postprocessor_args,
- 'cn_verification_proxy': opts.cn_verification_proxy,
- 'geo_verification_proxy': opts.geo_verification_proxy,
- 'config_location': opts.config_location,
- 'geo_bypass': opts.geo_bypass,
- 'geo_bypass_country': opts.geo_bypass_country,
- 'geo_bypass_ip_block': opts.geo_bypass_ip_block,
- # just for deprecation check
- 'autonumber': opts.autonumber if opts.autonumber is True else None,
- 'usetitle': opts.usetitle if opts.usetitle is True else None,
- }
-
- with YoutubeDL(ydl_opts) as ydl:
- # Update version
- if opts.update_self:
- update_self(ydl.to_screen, opts.verbose, ydl._opener)
-
- # Remove cache dir
- if opts.rm_cachedir:
- ydl.cache.remove()
-
- # Maybe do nothing
- if (len(all_urls) < 1) and (opts.load_info_filename is None):
- if opts.update_self or opts.rm_cachedir:
- sys.exit()
-
- ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
- parser.error(
- 'You must provide at least one URL.\n'
- 'Type youtube-dl --help to see a list of all options.')
-
- try:
- if opts.load_info_filename is not None:
- retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
- else:
- retcode = ydl.download(all_urls)
- except MaxDownloadsReached:
- ydl.to_screen('--max-download limit reached, aborting.')
- retcode = 101
-
- sys.exit(retcode)
-
-
-def main(argv=None):
- try:
- _real_main(argv)
- except DownloadError:
- sys.exit(1)
- except SameFileError:
- sys.exit('ERROR: fixed output name but more than one file to download')
- except KeyboardInterrupt:
- sys.exit('\nERROR: Interrupted by user')
-
-
-__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
diff --git a/youtube_dl/__main__.py b/youtube_dl/__main__.py
deleted file mode 100644
index 138f5fb..0000000
--- a/youtube_dl/__main__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env python
-from __future__ import unicode_literals
-
-# Execute with
-# $ python youtube_dl/__main__.py (2.6+)
-# $ python -m youtube_dl (2.7+)
-
-import sys
-
-if __package__ is None and not hasattr(sys, 'frozen'):
- # direct call of __main__.py
- import os.path
- path = os.path.realpath(os.path.abspath(__file__))
- sys.path.insert(0, os.path.dirname(os.path.dirname(path)))
-
-import youtube_dl
-
-if __name__ == '__main__':
- youtube_dl.main()
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py
deleted file mode 100644
index 461bb6d..0000000
--- a/youtube_dl/aes.py
+++ /dev/null
@@ -1,361 +0,0 @@
-from __future__ import unicode_literals
-
-from math import ceil
-
-from .compat import compat_b64decode
-from .utils import bytes_to_intlist, intlist_to_bytes
-
-BLOCK_SIZE_BYTES = 16
-
-
-def aes_ctr_decrypt(data, key, counter):
- """
- Decrypt with aes in counter mode
-
- @param {int[]} data cipher
- @param {int[]} key 16/24/32-Byte cipher key
- @param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block)
- returns the next counter block
- @returns {int[]} decrypted data
- """
- expanded_key = key_expansion(key)
- block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
-
- decrypted_data = []
- for i in range(block_count):
- counter_block = counter.next_value()
- block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
- block += [0] * (BLOCK_SIZE_BYTES - len(block))
-
- cipher_counter_block = aes_encrypt(counter_block, expanded_key)
- decrypted_data += xor(block, cipher_counter_block)
- decrypted_data = decrypted_data[:len(data)]
-
- return decrypted_data
-
-
-def aes_cbc_decrypt(data, key, iv):
- """
- Decrypt with aes in CBC mode
-
- @param {int[]} data cipher
- @param {int[]} key 16/24/32-Byte cipher key
- @param {int[]} iv 16-Byte IV
- @returns {int[]} decrypted data
- """
- expanded_key = key_expansion(key)
- block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
-
- decrypted_data = []
- previous_cipher_block = iv
- for i in range(block_count):
- block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
- block += [0] * (BLOCK_SIZE_BYTES - len(block))
-
- decrypted_block = aes_decrypt(block, expanded_key)
- decrypted_data += xor(decrypted_block, previous_cipher_block)
- previous_cipher_block = block
- decrypted_data = decrypted_data[:len(data)]
-
- return decrypted_data
-
-
-def aes_cbc_encrypt(data, key, iv):
- """
- Encrypt with aes in CBC mode. Using PKCS#7 padding
-
- @param {int[]} data cleartext
- @param {int[]} key 16/24/32-Byte cipher key
- @param {int[]} iv 16-Byte IV
- @returns {int[]} encrypted data
- """
- expanded_key = key_expansion(key)
- block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
-
- encrypted_data = []
- previous_cipher_block = iv
- for i in range(block_count):
- block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
- remaining_length = BLOCK_SIZE_BYTES - len(block)
- block += [remaining_length] * remaining_length
- mixed_block = xor(block, previous_cipher_block)
-
- encrypted_block = aes_encrypt(mixed_block, expanded_key)
- encrypted_data += encrypted_block
-
- previous_cipher_block = encrypted_block
-
- return encrypted_data
-
-
-def key_expansion(data):
- """
- Generate key schedule
-
- @param {int[]} data 16/24/32-Byte cipher key
- @returns {int[]} 176/208/240-Byte expanded key
- """
- data = data[:] # copy
- rcon_iteration = 1
- key_size_bytes = len(data)
- expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
-
- while len(data) < expanded_key_size_bytes:
- temp = data[-4:]
- temp = key_schedule_core(temp, rcon_iteration)
- rcon_iteration += 1
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
-
- for _ in range(3):
- temp = data[-4:]
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
-
- if key_size_bytes == 32:
- temp = data[-4:]
- temp = sub_bytes(temp)
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
-
- for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
- temp = data[-4:]
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
- data = data[:expanded_key_size_bytes]
-
- return data
-
-
-def aes_encrypt(data, expanded_key):
- """
- Encrypt one block with aes
-
- @param {int[]} data 16-Byte state
- @param {int[]} expanded_key 176/208/240-Byte expanded key
- @returns {int[]} 16-Byte cipher
- """
- rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
-
- data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
- for i in range(1, rounds + 1):
- data = sub_bytes(data)
- data = shift_rows(data)
- if i != rounds:
- data = mix_columns(data)
- data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
-
- return data
-
-
-def aes_decrypt(data, expanded_key):
- """
- Decrypt one block with aes
-
- @param {int[]} data 16-Byte cipher
- @param {int[]} expanded_key 176/208/240-Byte expanded key
- @returns {int[]} 16-Byte state
- """
- rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
-
- for i in range(rounds, 0, -1):
- data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
- if i != rounds:
- data = mix_columns_inv(data)
- data = shift_rows_inv(data)
- data = sub_bytes_inv(data)
- data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
-
- return data
-
-
-def aes_decrypt_text(data, password, key_size_bytes):
- """
- Decrypt text
- - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
- - The cipher key is retrieved by encrypting the first 16 Byte of 'password'
- with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
- - Mode of operation is 'counter'
-
- @param {str} data Base64 encoded string
- @param {str,unicode} password Password (will be encoded with utf-8)
- @param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
- @returns {str} Decrypted data
- """
- NONCE_LENGTH_BYTES = 8
-
- data = bytes_to_intlist(compat_b64decode(data))
- password = bytes_to_intlist(password.encode('utf-8'))
-
- key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
- key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
-
- nonce = data[:NONCE_LENGTH_BYTES]
- cipher = data[NONCE_LENGTH_BYTES:]
-
- class Counter(object):
- __value = nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
-
- def next_value(self):
- temp = self.__value
- self.__value = inc(self.__value)
- return temp
-
- decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
- plaintext = intlist_to_bytes(decrypted_data)
-
- return plaintext
-
-
-RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
-SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
- 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
- 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
- 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
- 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
- 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
- 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
- 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
- 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
- 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
- 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
- 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
- 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
- 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
- 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
- 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
-SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
- 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
- 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
- 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
- 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
- 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
- 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
- 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
- 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
- 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
- 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
- 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
- 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
- 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
- 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
- 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d)
-MIX_COLUMN_MATRIX = ((0x2, 0x3, 0x1, 0x1),
- (0x1, 0x2, 0x3, 0x1),
- (0x1, 0x1, 0x2, 0x3),
- (0x3, 0x1, 0x1, 0x2))
-MIX_COLUMN_MATRIX_INV = ((0xE, 0xB, 0xD, 0x9),
- (0x9, 0xE, 0xB, 0xD),
- (0xD, 0x9, 0xE, 0xB),
- (0xB, 0xD, 0x9, 0xE))
-RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
- 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
- 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
- 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD,
- 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88,
- 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A,
- 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3,
- 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0,
- 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41,
- 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75,
- 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80,
- 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54,
- 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA,
- 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E,
- 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17,
- 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01)
-RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
- 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
- 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
- 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
- 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
- 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
- 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
- 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
- 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
- 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
- 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
- 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
- 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
- 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
- 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
- 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
-
-
-def sub_bytes(data):
- return [SBOX[x] for x in data]
-
-
-def sub_bytes_inv(data):
- return [SBOX_INV[x] for x in data]
-
-
-def rotate(data):
- return data[1:] + [data[0]]
-
-
-def key_schedule_core(data, rcon_iteration):
- data = rotate(data)
- data = sub_bytes(data)
- data[0] = data[0] ^ RCON[rcon_iteration]
-
- return data
-
-
-def xor(data1, data2):
- return [x ^ y for x, y in zip(data1, data2)]
-
-
-def rijndael_mul(a, b):
- if(a == 0 or b == 0):
- return 0
- return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
-
-
-def mix_column(data, matrix):
- data_mixed = []
- for row in range(4):
- mixed = 0
- for column in range(4):
- # xor is (+) and (-)
- mixed ^= rijndael_mul(data[column], matrix[row][column])
- data_mixed.append(mixed)
- return data_mixed
-
-
-def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
- data_mixed = []
- for i in range(4):
- column = data[i * 4: (i + 1) * 4]
- data_mixed += mix_column(column, matrix)
- return data_mixed
-
-
-def mix_columns_inv(data):
- return mix_columns(data, MIX_COLUMN_MATRIX_INV)
-
-
-def shift_rows(data):
- data_shifted = []
- for column in range(4):
- for row in range(4):
- data_shifted.append(data[((column + row) & 0b11) * 4 + row])
- return data_shifted
-
-
-def shift_rows_inv(data):
- data_shifted = []
- for column in range(4):
- for row in range(4):
- data_shifted.append(data[((column - row) & 0b11) * 4 + row])
- return data_shifted
-
-
-def inc(data):
- data = data[:] # copy
- for i in range(len(data) - 1, -1, -1):
- if data[i] == 255:
- data[i] = 0
- else:
- data[i] = data[i] + 1
- break
- return data
-
-
-__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py
deleted file mode 100644
index 7bdade1..0000000
--- a/youtube_dl/cache.py
+++ /dev/null
@@ -1,96 +0,0 @@
-from __future__ import unicode_literals
-
-import errno
-import io
-import json
-import os
-import re
-import shutil
-import traceback
-
-from .compat import compat_getenv
-from .utils import (
- expand_path,
- write_json_file,
-)
-
-
-class Cache(object):
- def __init__(self, ydl):
- self._ydl = ydl
-
- def _get_root_dir(self):
- res = self._ydl.params.get('cachedir')
- if res is None:
- cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
- res = os.path.join(cache_root, 'youtube-dl')
- return expand_path(res)
-
- def _get_cache_fn(self, section, key, dtype):
- assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
- 'invalid section %r' % section
- assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
- return os.path.join(
- self._get_root_dir(), section, '%s.%s' % (key, dtype))
-
- @property
- def enabled(self):
- return self._ydl.params.get('cachedir') is not False
-
- def store(self, section, key, data, dtype='json'):
- assert dtype in ('json',)
-
- if not self.enabled:
- return
-
- fn = self._get_cache_fn(section, key, dtype)
- try:
- try:
- os.makedirs(os.path.dirname(fn))
- except OSError as ose:
- if ose.errno != errno.EEXIST:
- raise
- write_json_file(data, fn)
- except Exception:
- tb = traceback.format_exc()
- self._ydl.report_warning(
- 'Writing cache to %r failed: %s' % (fn, tb))
-
- def load(self, section, key, dtype='json', default=None):
- assert dtype in ('json',)
-
- if not self.enabled:
- return default
-
- cache_fn = self._get_cache_fn(section, key, dtype)
- try:
- try:
- with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
- return json.load(cachef)
- except ValueError:
- try:
- file_size = os.path.getsize(cache_fn)
- except (OSError, IOError) as oe:
- file_size = str(oe)
- self._ydl.report_warning(
- 'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
- except IOError:
- pass # No cache available
-
- return default
-
- def remove(self):
- if not self.enabled:
- self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
- return
-
- cachedir = self._get_root_dir()
- if not any((term in cachedir) for term in ('cache', 'tmp')):
- raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
-
- self._ydl.to_screen(
- 'Removing cache dir %s .' % cachedir, skip_eol=True)
- if os.path.exists(cachedir):
- self._ydl.to_screen('.', skip_eol=True)
- shutil.rmtree(cachedir)
- self._ydl.to_screen('.')
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
deleted file mode 100644
index 7b77034..0000000
--- a/youtube_dl/compat.py
+++ /dev/null
@@ -1,3016 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import base64
-import binascii
-import collections
-import ctypes
-import email
-import getpass
-import io
-import itertools
-import optparse
-import os
-import platform
-import re
-import shlex
-import shutil
-import socket
-import struct
-import subprocess
-import sys
-import xml.etree.ElementTree
-
-
-try:
- import urllib.request as compat_urllib_request
-except ImportError: # Python 2
- import urllib2 as compat_urllib_request
-
-try:
- import urllib.error as compat_urllib_error
-except ImportError: # Python 2
- import urllib2 as compat_urllib_error
-
-try:
- import urllib.parse as compat_urllib_parse
-except ImportError: # Python 2
- import urllib as compat_urllib_parse
-
-try:
- from urllib.parse import urlparse as compat_urllib_parse_urlparse
-except ImportError: # Python 2
- from urlparse import urlparse as compat_urllib_parse_urlparse
-
-try:
- import urllib.parse as compat_urlparse
-except ImportError: # Python 2
- import urlparse as compat_urlparse
-
-try:
- import urllib.response as compat_urllib_response
-except ImportError: # Python 2
- import urllib as compat_urllib_response
-
-try:
- import http.cookiejar as compat_cookiejar
-except ImportError: # Python 2
- import cookielib as compat_cookiejar
-
-try:
- import http.cookies as compat_cookies
-except ImportError: # Python 2
- import Cookie as compat_cookies
-
-try:
- import html.entities as compat_html_entities
-except ImportError: # Python 2
- import htmlentitydefs as compat_html_entities
-
-try: # Python >= 3.3
- compat_html_entities_html5 = compat_html_entities.html5
-except AttributeError:
- # Copied from CPython 3.5.1 html/entities.py
- compat_html_entities_html5 = {
- 'Aacute': '\xc1',
- 'aacute': '\xe1',
- 'Aacute;': '\xc1',
- 'aacute;': '\xe1',
- 'Abreve;': '\u0102',
- 'abreve;': '\u0103',
- 'ac;': '\u223e',
- 'acd;': '\u223f',
- 'acE;': '\u223e\u0333',
- 'Acirc': '\xc2',
- 'acirc': '\xe2',
- 'Acirc;': '\xc2',
- 'acirc;': '\xe2',
- 'acute': '\xb4',
- 'acute;': '\xb4',
- 'Acy;': '\u0410',
- 'acy;': '\u0430',
- 'AElig': '\xc6',
- 'aelig': '\xe6',
- 'AElig;': '\xc6',
- 'aelig;': '\xe6',
- 'af;': '\u2061',
- 'Afr;': '\U0001d504',
- 'afr;': '\U0001d51e',
- 'Agrave': '\xc0',
- 'agrave': '\xe0',
- 'Agrave;': '\xc0',
- 'agrave;': '\xe0',
- 'alefsym;': '\u2135',
- 'aleph;': '\u2135',
- 'Alpha;': '\u0391',
- 'alpha;': '\u03b1',
- 'Amacr;': '\u0100',
- 'amacr;': '\u0101',
- 'amalg;': '\u2a3f',
- 'AMP': '&',
- 'amp': '&',
- 'AMP;': '&',
- 'amp;': '&',
- 'And;': '\u2a53',
- 'and;': '\u2227',
- 'andand;': '\u2a55',
- 'andd;': '\u2a5c',
- 'andslope;': '\u2a58',
- 'andv;': '\u2a5a',
- 'ang;': '\u2220',
- 'ange;': '\u29a4',
- 'angle;': '\u2220',
- 'angmsd;': '\u2221',
- 'angmsdaa;': '\u29a8',
- 'angmsdab;': '\u29a9',
- 'angmsdac;': '\u29aa',
- 'angmsdad;': '\u29ab',
- 'angmsdae;': '\u29ac',
- 'angmsdaf;': '\u29ad',
- 'angmsdag;': '\u29ae',
- 'angmsdah;': '\u29af',
- 'angrt;': '\u221f',
- 'angrtvb;': '\u22be',
- 'angrtvbd;': '\u299d',
- 'angsph;': '\u2222',
- 'angst;': '\xc5',
- 'angzarr;': '\u237c',
- 'Aogon;': '\u0104',
- 'aogon;': '\u0105',
- 'Aopf;': '\U0001d538',
- 'aopf;': '\U0001d552',
- 'ap;': '\u2248',
- 'apacir;': '\u2a6f',
- 'apE;': '\u2a70',
- 'ape;': '\u224a',
- 'apid;': '\u224b',
- 'apos;': "'",
- 'ApplyFunction;': '\u2061',
- 'approx;': '\u2248',
- 'approxeq;': '\u224a',
- 'Aring': '\xc5',
- 'aring': '\xe5',
- 'Aring;': '\xc5',
- 'aring;': '\xe5',
- 'Ascr;': '\U0001d49c',
- 'ascr;': '\U0001d4b6',
- 'Assign;': '\u2254',
- 'ast;': '*',
- 'asymp;': '\u2248',
- 'asympeq;': '\u224d',
- 'Atilde': '\xc3',
- 'atilde': '\xe3',
- 'Atilde;': '\xc3',
- 'atilde;': '\xe3',
- 'Auml': '\xc4',
- 'auml': '\xe4',
- 'Auml;': '\xc4',
- 'auml;': '\xe4',
- 'awconint;': '\u2233',
- 'awint;': '\u2a11',
- 'backcong;': '\u224c',
- 'backepsilon;': '\u03f6',
- 'backprime;': '\u2035',
- 'backsim;': '\u223d',
- 'backsimeq;': '\u22cd',
- 'Backslash;': '\u2216',
- 'Barv;': '\u2ae7',
- 'barvee;': '\u22bd',
- 'Barwed;': '\u2306',
- 'barwed;': '\u2305',
- 'barwedge;': '\u2305',
- 'bbrk;': '\u23b5',
- 'bbrktbrk;': '\u23b6',
- 'bcong;': '\u224c',
- 'Bcy;': '\u0411',
- 'bcy;': '\u0431',
- 'bdquo;': '\u201e',
- 'becaus;': '\u2235',
- 'Because;': '\u2235',
- 'because;': '\u2235',
- 'bemptyv;': '\u29b0',
- 'bepsi;': '\u03f6',
- 'bernou;': '\u212c',
- 'Bernoullis;': '\u212c',
- 'Beta;': '\u0392',
- 'beta;': '\u03b2',
- 'beth;': '\u2136',
- 'between;': '\u226c',
- 'Bfr;': '\U0001d505',
- 'bfr;': '\U0001d51f',
- 'bigcap;': '\u22c2',
- 'bigcirc;': '\u25ef',
- 'bigcup;': '\u22c3',
- 'bigodot;': '\u2a00',
- 'bigoplus;': '\u2a01',
- 'bigotimes;': '\u2a02',
- 'bigsqcup;': '\u2a06',
- 'bigstar;': '\u2605',
- 'bigtriangledown;': '\u25bd',
- 'bigtriangleup;': '\u25b3',
- 'biguplus;': '\u2a04',
- 'bigvee;': '\u22c1',
- 'bigwedge;': '\u22c0',
- 'bkarow;': '\u290d',
- 'blacklozenge;': '\u29eb',
- 'blacksquare;': '\u25aa',
- 'blacktriangle;': '\u25b4',
- 'blacktriangledown;': '\u25be',
- 'blacktriangleleft;': '\u25c2',
- 'blacktriangleright;': '\u25b8',
- 'blank;': '\u2423',
- 'blk12;': '\u2592',
- 'blk14;': '\u2591',
- 'blk34;': '\u2593',
- 'block;': '\u2588',
- 'bne;': '=\u20e5',
- 'bnequiv;': '\u2261\u20e5',
- 'bNot;': '\u2aed',
- 'bnot;': '\u2310',
- 'Bopf;': '\U0001d539',
- 'bopf;': '\U0001d553',
- 'bot;': '\u22a5',
- 'bottom;': '\u22a5',
- 'bowtie;': '\u22c8',
- 'boxbox;': '\u29c9',
- 'boxDL;': '\u2557',
- 'boxDl;': '\u2556',
- 'boxdL;': '\u2555',
- 'boxdl;': '\u2510',
- 'boxDR;': '\u2554',
- 'boxDr;': '\u2553',
- 'boxdR;': '\u2552',
- 'boxdr;': '\u250c',
- 'boxH;': '\u2550',
- 'boxh;': '\u2500',
- 'boxHD;': '\u2566',
- 'boxHd;': '\u2564',
- 'boxhD;': '\u2565',
- 'boxhd;': '\u252c',
- 'boxHU;': '\u2569',
- 'boxHu;': '\u2567',
- 'boxhU;': '\u2568',
- 'boxhu;': '\u2534',
- 'boxminus;': '\u229f',
- 'boxplus;': '\u229e',
- 'boxtimes;': '\u22a0',
- 'boxUL;': '\u255d',
- 'boxUl;': '\u255c',
- 'boxuL;': '\u255b',
- 'boxul;': '\u2518',
- 'boxUR;': '\u255a',
- 'boxUr;': '\u2559',
- 'boxuR;': '\u2558',
- 'boxur;': '\u2514',
- 'boxV;': '\u2551',
- 'boxv;': '\u2502',
- 'boxVH;': '\u256c',
- 'boxVh;': '\u256b',
- 'boxvH;': '\u256a',
- 'boxvh;': '\u253c',
- 'boxVL;': '\u2563',
- 'boxVl;': '\u2562',
- 'boxvL;': '\u2561',
- 'boxvl;': '\u2524',
- 'boxVR;': '\u2560',
- 'boxVr;': '\u255f',
- 'boxvR;': '\u255e',
- 'boxvr;': '\u251c',
- 'bprime;': '\u2035',
- 'Breve;': '\u02d8',
- 'breve;': '\u02d8',
- 'brvbar': '\xa6',
- 'brvbar;': '\xa6',
- 'Bscr;': '\u212c',
- 'bscr;': '\U0001d4b7',
- 'bsemi;': '\u204f',
- 'bsim;': '\u223d',
- 'bsime;': '\u22cd',
- 'bsol;': '\\',
- 'bsolb;': '\u29c5',
- 'bsolhsub;': '\u27c8',
- 'bull;': '\u2022',
- 'bullet;': '\u2022',
- 'bump;': '\u224e',
- 'bumpE;': '\u2aae',
- 'bumpe;': '\u224f',
- 'Bumpeq;': '\u224e',
- 'bumpeq;': '\u224f',
- 'Cacute;': '\u0106',
- 'cacute;': '\u0107',
- 'Cap;': '\u22d2',
- 'cap;': '\u2229',
- 'capand;': '\u2a44',
- 'capbrcup;': '\u2a49',
- 'capcap;': '\u2a4b',
- 'capcup;': '\u2a47',
- 'capdot;': '\u2a40',
- 'CapitalDifferentialD;': '\u2145',
- 'caps;': '\u2229\ufe00',
- 'caret;': '\u2041',
- 'caron;': '\u02c7',
- 'Cayleys;': '\u212d',
- 'ccaps;': '\u2a4d',
- 'Ccaron;': '\u010c',
- 'ccaron;': '\u010d',
- 'Ccedil': '\xc7',
- 'ccedil': '\xe7',
- 'Ccedil;': '\xc7',
- 'ccedil;': '\xe7',
- 'Ccirc;': '\u0108',
- 'ccirc;': '\u0109',
- 'Cconint;': '\u2230',
- 'ccups;': '\u2a4c',
- 'ccupssm;': '\u2a50',
- 'Cdot;': '\u010a',
- 'cdot;': '\u010b',
- 'cedil': '\xb8',
- 'cedil;': '\xb8',
- 'Cedilla;': '\xb8',
- 'cemptyv;': '\u29b2',
- 'cent': '\xa2',
- 'cent;': '\xa2',
- 'CenterDot;': '\xb7',
- 'centerdot;': '\xb7',
- 'Cfr;': '\u212d',
- 'cfr;': '\U0001d520',
- 'CHcy;': '\u0427',
- 'chcy;': '\u0447',
- 'check;': '\u2713',
- 'checkmark;': '\u2713',
- 'Chi;': '\u03a7',
- 'chi;': '\u03c7',
- 'cir;': '\u25cb',
- 'circ;': '\u02c6',
- 'circeq;': '\u2257',
- 'circlearrowleft;': '\u21ba',
- 'circlearrowright;': '\u21bb',
- 'circledast;': '\u229b',
- 'circledcirc;': '\u229a',
- 'circleddash;': '\u229d',
- 'CircleDot;': '\u2299',
- 'circledR;': '\xae',
- 'circledS;': '\u24c8',
- 'CircleMinus;': '\u2296',
- 'CirclePlus;': '\u2295',
- 'CircleTimes;': '\u2297',
- 'cirE;': '\u29c3',
- 'cire;': '\u2257',
- 'cirfnint;': '\u2a10',
- 'cirmid;': '\u2aef',
- 'cirscir;': '\u29c2',
- 'ClockwiseContourIntegral;': '\u2232',
- 'CloseCurlyDoubleQuote;': '\u201d',
- 'CloseCurlyQuote;': '\u2019',
- 'clubs;': '\u2663',
- 'clubsuit;': '\u2663',
- 'Colon;': '\u2237',
- 'colon;': ':',
- 'Colone;': '\u2a74',
- 'colone;': '\u2254',
- 'coloneq;': '\u2254',
- 'comma;': ',',
- 'commat;': '@',
- 'comp;': '\u2201',
- 'compfn;': '\u2218',
- 'complement;': '\u2201',
- 'complexes;': '\u2102',
- 'cong;': '\u2245',
- 'congdot;': '\u2a6d',
- 'Congruent;': '\u2261',
- 'Conint;': '\u222f',
- 'conint;': '\u222e',
- 'ContourIntegral;': '\u222e',
- 'Copf;': '\u2102',
- 'copf;': '\U0001d554',
- 'coprod;': '\u2210',
- 'Coproduct;': '\u2210',
- 'COPY': '\xa9',
- 'copy': '\xa9',
- 'COPY;': '\xa9',
- 'copy;': '\xa9',
- 'copysr;': '\u2117',
- 'CounterClockwiseContourIntegral;': '\u2233',
- 'crarr;': '\u21b5',
- 'Cross;': '\u2a2f',
- 'cross;': '\u2717',
- 'Cscr;': '\U0001d49e',
- 'cscr;': '\U0001d4b8',
- 'csub;': '\u2acf',
- 'csube;': '\u2ad1',
- 'csup;': '\u2ad0',
- 'csupe;': '\u2ad2',
- 'ctdot;': '\u22ef',
- 'cudarrl;': '\u2938',
- 'cudarrr;': '\u2935',
- 'cuepr;': '\u22de',
- 'cuesc;': '\u22df',
- 'cularr;': '\u21b6',
- 'cularrp;': '\u293d',
- 'Cup;': '\u22d3',
- 'cup;': '\u222a',
- 'cupbrcap;': '\u2a48',
- 'CupCap;': '\u224d',
- 'cupcap;': '\u2a46',
- 'cupcup;': '\u2a4a',
- 'cupdot;': '\u228d',
- 'cupor;': '\u2a45',
- 'cups;': '\u222a\ufe00',
- 'curarr;': '\u21b7',
- 'curarrm;': '\u293c',
- 'curlyeqprec;': '\u22de',
- 'curlyeqsucc;': '\u22df',
- 'curlyvee;': '\u22ce',
- 'curlywedge;': '\u22cf',
- 'curren': '\xa4',
- 'curren;': '\xa4',
- 'curvearrowleft;': '\u21b6',
- 'curvearrowright;': '\u21b7',
- 'cuvee;': '\u22ce',
- 'cuwed;': '\u22cf',
- 'cwconint;': '\u2232',
- 'cwint;': '\u2231',
- 'cylcty;': '\u232d',
- 'Dagger;': '\u2021',
- 'dagger;': '\u2020',
- 'daleth;': '\u2138',
- 'Darr;': '\u21a1',
- 'dArr;': '\u21d3',
- 'darr;': '\u2193',
- 'dash;': '\u2010',
- 'Dashv;': '\u2ae4',
- 'dashv;': '\u22a3',
- 'dbkarow;': '\u290f',
- 'dblac;': '\u02dd',
- 'Dcaron;': '\u010e',
- 'dcaron;': '\u010f',
- 'Dcy;': '\u0414',
- 'dcy;': '\u0434',
- 'DD;': '\u2145',
- 'dd;': '\u2146',
- 'ddagger;': '\u2021',
- 'ddarr;': '\u21ca',
- 'DDotrahd;': '\u2911',
- 'ddotseq;': '\u2a77',
- 'deg': '\xb0',
- 'deg;': '\xb0',
- 'Del;': '\u2207',
- 'Delta;': '\u0394',
- 'delta;': '\u03b4',
- 'demptyv;': '\u29b1',
- 'dfisht;': '\u297f',
- 'Dfr;': '\U0001d507',
- 'dfr;': '\U0001d521',
- 'dHar;': '\u2965',
- 'dharl;': '\u21c3',
- 'dharr;': '\u21c2',
- 'DiacriticalAcute;': '\xb4',
- 'DiacriticalDot;': '\u02d9',
- 'DiacriticalDoubleAcute;': '\u02dd',
- 'DiacriticalGrave;': '`',
- 'DiacriticalTilde;': '\u02dc',
- 'diam;': '\u22c4',
- 'Diamond;': '\u22c4',
- 'diamond;': '\u22c4',
- 'diamondsuit;': '\u2666',
- 'diams;': '\u2666',
- 'die;': '\xa8',
- 'DifferentialD;': '\u2146',
- 'digamma;': '\u03dd',
- 'disin;': '\u22f2',
- 'div;': '\xf7',
- 'divide': '\xf7',
- 'divide;': '\xf7',
- 'divideontimes;': '\u22c7',
- 'divonx;': '\u22c7',
- 'DJcy;': '\u0402',
- 'djcy;': '\u0452',
- 'dlcorn;': '\u231e',
- 'dlcrop;': '\u230d',
- 'dollar;': '$',
- 'Dopf;': '\U0001d53b',
- 'dopf;': '\U0001d555',
- 'Dot;': '\xa8',
- 'dot;': '\u02d9',
- 'DotDot;': '\u20dc',
- 'doteq;': '\u2250',
- 'doteqdot;': '\u2251',
- 'DotEqual;': '\u2250',
- 'dotminus;': '\u2238',
- 'dotplus;': '\u2214',
- 'dotsquare;': '\u22a1',
- 'doublebarwedge;': '\u2306',
- 'DoubleContourIntegral;': '\u222f',
- 'DoubleDot;': '\xa8',
- 'DoubleDownArrow;': '\u21d3',
- 'DoubleLeftArrow;': '\u21d0',
- 'DoubleLeftRightArrow;': '\u21d4',
- 'DoubleLeftTee;': '\u2ae4',
- 'DoubleLongLeftArrow;': '\u27f8',
- 'DoubleLongLeftRightArrow;': '\u27fa',
- 'DoubleLongRightArrow;': '\u27f9',
- 'DoubleRightArrow;': '\u21d2',
- 'DoubleRightTee;': '\u22a8',
- 'DoubleUpArrow;': '\u21d1',
- 'DoubleUpDownArrow;': '\u21d5',
- 'DoubleVerticalBar;': '\u2225',
- 'DownArrow;': '\u2193',
- 'Downarrow;': '\u21d3',
- 'downarrow;': '\u2193',
- 'DownArrowBar;': '\u2913',
- 'DownArrowUpArrow;': '\u21f5',
- 'DownBreve;': '\u0311',
- 'downdownarrows;': '\u21ca',
- 'downharpoonleft;': '\u21c3',
- 'downharpoonright;': '\u21c2',
- 'DownLeftRightVector;': '\u2950',
- 'DownLeftTeeVector;': '\u295e',
- 'DownLeftVector;': '\u21bd',
- 'DownLeftVectorBar;': '\u2956',
- 'DownRightTeeVector;': '\u295f',
- 'DownRightVector;': '\u21c1',
- 'DownRightVectorBar;': '\u2957',
- 'DownTee;': '\u22a4',
- 'DownTeeArrow;': '\u21a7',
- 'drbkarow;': '\u2910',
- 'drcorn;': '\u231f',
- 'drcrop;': '\u230c',
- 'Dscr;': '\U0001d49f',
- 'dscr;': '\U0001d4b9',
- 'DScy;': '\u0405',
- 'dscy;': '\u0455',
- 'dsol;': '\u29f6',
- 'Dstrok;': '\u0110',
- 'dstrok;': '\u0111',
- 'dtdot;': '\u22f1',
- 'dtri;': '\u25bf',
- 'dtrif;': '\u25be',
- 'duarr;': '\u21f5',
- 'duhar;': '\u296f',
- 'dwangle;': '\u29a6',
- 'DZcy;': '\u040f',
- 'dzcy;': '\u045f',
- 'dzigrarr;': '\u27ff',
- 'Eacute': '\xc9',
- 'eacute': '\xe9',
- 'Eacute;': '\xc9',
- 'eacute;': '\xe9',
- 'easter;': '\u2a6e',
- 'Ecaron;': '\u011a',
- 'ecaron;': '\u011b',
- 'ecir;': '\u2256',
- 'Ecirc': '\xca',
- 'ecirc': '\xea',
- 'Ecirc;': '\xca',
- 'ecirc;': '\xea',
- 'ecolon;': '\u2255',
- 'Ecy;': '\u042d',
- 'ecy;': '\u044d',
- 'eDDot;': '\u2a77',
- 'Edot;': '\u0116',
- 'eDot;': '\u2251',
- 'edot;': '\u0117',
- 'ee;': '\u2147',
- 'efDot;': '\u2252',
- 'Efr;': '\U0001d508',
- 'efr;': '\U0001d522',
- 'eg;': '\u2a9a',
- 'Egrave': '\xc8',
- 'egrave': '\xe8',
- 'Egrave;': '\xc8',
- 'egrave;': '\xe8',
- 'egs;': '\u2a96',
- 'egsdot;': '\u2a98',
- 'el;': '\u2a99',
- 'Element;': '\u2208',
- 'elinters;': '\u23e7',
- 'ell;': '\u2113',
- 'els;': '\u2a95',
- 'elsdot;': '\u2a97',
- 'Emacr;': '\u0112',
- 'emacr;': '\u0113',
- 'empty;': '\u2205',
- 'emptyset;': '\u2205',
- 'EmptySmallSquare;': '\u25fb',
- 'emptyv;': '\u2205',
- 'EmptyVerySmallSquare;': '\u25ab',
- 'emsp13;': '\u2004',
- 'emsp14;': '\u2005',
- 'emsp;': '\u2003',
- 'ENG;': '\u014a',
- 'eng;': '\u014b',
- 'ensp;': '\u2002',
- 'Eogon;': '\u0118',
- 'eogon;': '\u0119',
- 'Eopf;': '\U0001d53c',
- 'eopf;': '\U0001d556',
- 'epar;': '\u22d5',
- 'eparsl;': '\u29e3',
- 'eplus;': '\u2a71',
- 'epsi;': '\u03b5',
- 'Epsilon;': '\u0395',
- 'epsilon;': '\u03b5',
- 'epsiv;': '\u03f5',
- 'eqcirc;': '\u2256',
- 'eqcolon;': '\u2255',
- 'eqsim;': '\u2242',
- 'eqslantgtr;': '\u2a96',
- 'eqslantless;': '\u2a95',
- 'Equal;': '\u2a75',
- 'equals;': '=',
- 'EqualTilde;': '\u2242',
- 'equest;': '\u225f',
- 'Equilibrium;': '\u21cc',
- 'equiv;': '\u2261',
- 'equivDD;': '\u2a78',
- 'eqvparsl;': '\u29e5',
- 'erarr;': '\u2971',
- 'erDot;': '\u2253',
- 'Escr;': '\u2130',
- 'escr;': '\u212f',
- 'esdot;': '\u2250',
- 'Esim;': '\u2a73',
- 'esim;': '\u2242',
- 'Eta;': '\u0397',
- 'eta;': '\u03b7',
- 'ETH': '\xd0',
- 'eth': '\xf0',
- 'ETH;': '\xd0',
- 'eth;': '\xf0',
- 'Euml': '\xcb',
- 'euml': '\xeb',
- 'Euml;': '\xcb',
- 'euml;': '\xeb',
- 'euro;': '\u20ac',
- 'excl;': '!',
- 'exist;': '\u2203',
- 'Exists;': '\u2203',
- 'expectation;': '\u2130',
- 'ExponentialE;': '\u2147',
- 'exponentiale;': '\u2147',
- 'fallingdotseq;': '\u2252',
- 'Fcy;': '\u0424',
- 'fcy;': '\u0444',
- 'female;': '\u2640',
- 'ffilig;': '\ufb03',
- 'fflig;': '\ufb00',
- 'ffllig;': '\ufb04',
- 'Ffr;': '\U0001d509',
- 'ffr;': '\U0001d523',
- 'filig;': '\ufb01',
- 'FilledSmallSquare;': '\u25fc',
- 'FilledVerySmallSquare;': '\u25aa',
- 'fjlig;': 'fj',
- 'flat;': '\u266d',
- 'fllig;': '\ufb02',
- 'fltns;': '\u25b1',
- 'fnof;': '\u0192',
- 'Fopf;': '\U0001d53d',
- 'fopf;': '\U0001d557',
- 'ForAll;': '\u2200',
- 'forall;': '\u2200',
- 'fork;': '\u22d4',
- 'forkv;': '\u2ad9',
- 'Fouriertrf;': '\u2131',
- 'fpartint;': '\u2a0d',
- 'frac12': '\xbd',
- 'frac12;': '\xbd',
- 'frac13;': '\u2153',
- 'frac14': '\xbc',
- 'frac14;': '\xbc',
- 'frac15;': '\u2155',
- 'frac16;': '\u2159',
- 'frac18;': '\u215b',
- 'frac23;': '\u2154',
- 'frac25;': '\u2156',
- 'frac34': '\xbe',
- 'frac34;': '\xbe',
- 'frac35;': '\u2157',
- 'frac38;': '\u215c',
- 'frac45;': '\u2158',
- 'frac56;': '\u215a',
- 'frac58;': '\u215d',
- 'frac78;': '\u215e',
- 'frasl;': '\u2044',
- 'frown;': '\u2322',
- 'Fscr;': '\u2131',
- 'fscr;': '\U0001d4bb',
- 'gacute;': '\u01f5',
- 'Gamma;': '\u0393',
- 'gamma;': '\u03b3',
- 'Gammad;': '\u03dc',
- 'gammad;': '\u03dd',
- 'gap;': '\u2a86',
- 'Gbreve;': '\u011e',
- 'gbreve;': '\u011f',
- 'Gcedil;': '\u0122',
- 'Gcirc;': '\u011c',
- 'gcirc;': '\u011d',
- 'Gcy;': '\u0413',
- 'gcy;': '\u0433',
- 'Gdot;': '\u0120',
- 'gdot;': '\u0121',
- 'gE;': '\u2267',
- 'ge;': '\u2265',
- 'gEl;': '\u2a8c',
- 'gel;': '\u22db',
- 'geq;': '\u2265',
- 'geqq;': '\u2267',
- 'geqslant;': '\u2a7e',
- 'ges;': '\u2a7e',
- 'gescc;': '\u2aa9',
- 'gesdot;': '\u2a80',
- 'gesdoto;': '\u2a82',
- 'gesdotol;': '\u2a84',
- 'gesl;': '\u22db\ufe00',
- 'gesles;': '\u2a94',
- 'Gfr;': '\U0001d50a',
- 'gfr;': '\U0001d524',
- 'Gg;': '\u22d9',
- 'gg;': '\u226b',
- 'ggg;': '\u22d9',
- 'gimel;': '\u2137',
- 'GJcy;': '\u0403',
- 'gjcy;': '\u0453',
- 'gl;': '\u2277',
- 'gla;': '\u2aa5',
- 'glE;': '\u2a92',
- 'glj;': '\u2aa4',
- 'gnap;': '\u2a8a',
- 'gnapprox;': '\u2a8a',
- 'gnE;': '\u2269',
- 'gne;': '\u2a88',
- 'gneq;': '\u2a88',
- 'gneqq;': '\u2269',
- 'gnsim;': '\u22e7',
- 'Gopf;': '\U0001d53e',
- 'gopf;': '\U0001d558',
- 'grave;': '`',
- 'GreaterEqual;': '\u2265',
- 'GreaterEqualLess;': '\u22db',
- 'GreaterFullEqual;': '\u2267',
- 'GreaterGreater;': '\u2aa2',
- 'GreaterLess;': '\u2277',
- 'GreaterSlantEqual;': '\u2a7e',
- 'GreaterTilde;': '\u2273',
- 'Gscr;': '\U0001d4a2',
- 'gscr;': '\u210a',
- 'gsim;': '\u2273',
- 'gsime;': '\u2a8e',
- 'gsiml;': '\u2a90',
- 'GT': '>',
- 'gt': '>',
- 'GT;': '>',
- 'Gt;': '\u226b',
- 'gt;': '>',
- 'gtcc;': '\u2aa7',
- 'gtcir;': '\u2a7a',
- 'gtdot;': '\u22d7',
- 'gtlPar;': '\u2995',
- 'gtquest;': '\u2a7c',
- 'gtrapprox;': '\u2a86',
- 'gtrarr;': '\u2978',
- 'gtrdot;': '\u22d7',
- 'gtreqless;': '\u22db',
- 'gtreqqless;': '\u2a8c',
- 'gtrless;': '\u2277',
- 'gtrsim;': '\u2273',
- 'gvertneqq;': '\u2269\ufe00',
- 'gvnE;': '\u2269\ufe00',
- 'Hacek;': '\u02c7',
- 'hairsp;': '\u200a',
- 'half;': '\xbd',
- 'hamilt;': '\u210b',
- 'HARDcy;': '\u042a',
- 'hardcy;': '\u044a',
- 'hArr;': '\u21d4',
- 'harr;': '\u2194',
- 'harrcir;': '\u2948',
- 'harrw;': '\u21ad',
- 'Hat;': '^',
- 'hbar;': '\u210f',
- 'Hcirc;': '\u0124',
- 'hcirc;': '\u0125',
- 'hearts;': '\u2665',
- 'heartsuit;': '\u2665',
- 'hellip;': '\u2026',
- 'hercon;': '\u22b9',
- 'Hfr;': '\u210c',
- 'hfr;': '\U0001d525',
- 'HilbertSpace;': '\u210b',
- 'hksearow;': '\u2925',
- 'hkswarow;': '\u2926',
- 'hoarr;': '\u21ff',
- 'homtht;': '\u223b',
- 'hookleftarrow;': '\u21a9',
- 'hookrightarrow;': '\u21aa',
- 'Hopf;': '\u210d',
- 'hopf;': '\U0001d559',
- 'horbar;': '\u2015',
- 'HorizontalLine;': '\u2500',
- 'Hscr;': '\u210b',
- 'hscr;': '\U0001d4bd',
- 'hslash;': '\u210f',
- 'Hstrok;': '\u0126',
- 'hstrok;': '\u0127',
- 'HumpDownHump;': '\u224e',
- 'HumpEqual;': '\u224f',
- 'hybull;': '\u2043',
- 'hyphen;': '\u2010',
- 'Iacute': '\xcd',
- 'iacute': '\xed',
- 'Iacute;': '\xcd',
- 'iacute;': '\xed',
- 'ic;': '\u2063',
- 'Icirc': '\xce',
- 'icirc': '\xee',
- 'Icirc;': '\xce',
- 'icirc;': '\xee',
- 'Icy;': '\u0418',
- 'icy;': '\u0438',
- 'Idot;': '\u0130',
- 'IEcy;': '\u0415',
- 'iecy;': '\u0435',
- 'iexcl': '\xa1',
- 'iexcl;': '\xa1',
- 'iff;': '\u21d4',
- 'Ifr;': '\u2111',
- 'ifr;': '\U0001d526',
- 'Igrave': '\xcc',
- 'igrave': '\xec',
- 'Igrave;': '\xcc',
- 'igrave;': '\xec',
- 'ii;': '\u2148',
- 'iiiint;': '\u2a0c',
- 'iiint;': '\u222d',
- 'iinfin;': '\u29dc',
- 'iiota;': '\u2129',
- 'IJlig;': '\u0132',
- 'ijlig;': '\u0133',
- 'Im;': '\u2111',
- 'Imacr;': '\u012a',
- 'imacr;': '\u012b',
- 'image;': '\u2111',
- 'ImaginaryI;': '\u2148',
- 'imagline;': '\u2110',
- 'imagpart;': '\u2111',
- 'imath;': '\u0131',
- 'imof;': '\u22b7',
- 'imped;': '\u01b5',
- 'Implies;': '\u21d2',
- 'in;': '\u2208',
- 'incare;': '\u2105',
- 'infin;': '\u221e',
- 'infintie;': '\u29dd',
- 'inodot;': '\u0131',
- 'Int;': '\u222c',
- 'int;': '\u222b',
- 'intcal;': '\u22ba',
- 'integers;': '\u2124',
- 'Integral;': '\u222b',
- 'intercal;': '\u22ba',
- 'Intersection;': '\u22c2',
- 'intlarhk;': '\u2a17',
- 'intprod;': '\u2a3c',
- 'InvisibleComma;': '\u2063',
- 'InvisibleTimes;': '\u2062',
- 'IOcy;': '\u0401',
- 'iocy;': '\u0451',
- 'Iogon;': '\u012e',
- 'iogon;': '\u012f',
- 'Iopf;': '\U0001d540',
- 'iopf;': '\U0001d55a',
- 'Iota;': '\u0399',
- 'iota;': '\u03b9',
- 'iprod;': '\u2a3c',
- 'iquest': '\xbf',
- 'iquest;': '\xbf',
- 'Iscr;': '\u2110',
- 'iscr;': '\U0001d4be',
- 'isin;': '\u2208',
- 'isindot;': '\u22f5',
- 'isinE;': '\u22f9',
- 'isins;': '\u22f4',
- 'isinsv;': '\u22f3',
- 'isinv;': '\u2208',
- 'it;': '\u2062',
- 'Itilde;': '\u0128',
- 'itilde;': '\u0129',
- 'Iukcy;': '\u0406',
- 'iukcy;': '\u0456',
- 'Iuml': '\xcf',
- 'iuml': '\xef',
- 'Iuml;': '\xcf',
- 'iuml;': '\xef',
- 'Jcirc;': '\u0134',
- 'jcirc;': '\u0135',
- 'Jcy;': '\u0419',
- 'jcy;': '\u0439',
- 'Jfr;': '\U0001d50d',
- 'jfr;': '\U0001d527',
- 'jmath;': '\u0237',
- 'Jopf;': '\U0001d541',
- 'jopf;': '\U0001d55b',
- 'Jscr;': '\U0001d4a5',
- 'jscr;': '\U0001d4bf',
- 'Jsercy;': '\u0408',
- 'jsercy;': '\u0458',
- 'Jukcy;': '\u0404',
- 'jukcy;': '\u0454',
- 'Kappa;': '\u039a',
- 'kappa;': '\u03ba',
- 'kappav;': '\u03f0',
- 'Kcedil;': '\u0136',
- 'kcedil;': '\u0137',
- 'Kcy;': '\u041a',
- 'kcy;': '\u043a',
- 'Kfr;': '\U0001d50e',
- 'kfr;': '\U0001d528',
- 'kgreen;': '\u0138',
- 'KHcy;': '\u0425',
- 'khcy;': '\u0445',
- 'KJcy;': '\u040c',
- 'kjcy;': '\u045c',
- 'Kopf;': '\U0001d542',
- 'kopf;': '\U0001d55c',
- 'Kscr;': '\U0001d4a6',
- 'kscr;': '\U0001d4c0',
- 'lAarr;': '\u21da',
- 'Lacute;': '\u0139',
- 'lacute;': '\u013a',
- 'laemptyv;': '\u29b4',
- 'lagran;': '\u2112',
- 'Lambda;': '\u039b',
- 'lambda;': '\u03bb',
- 'Lang;': '\u27ea',
- 'lang;': '\u27e8',
- 'langd;': '\u2991',
- 'langle;': '\u27e8',
- 'lap;': '\u2a85',
- 'Laplacetrf;': '\u2112',
- 'laquo': '\xab',
- 'laquo;': '\xab',
- 'Larr;': '\u219e',
- 'lArr;': '\u21d0',
- 'larr;': '\u2190',
- 'larrb;': '\u21e4',
- 'larrbfs;': '\u291f',
- 'larrfs;': '\u291d',
- 'larrhk;': '\u21a9',
- 'larrlp;': '\u21ab',
- 'larrpl;': '\u2939',
- 'larrsim;': '\u2973',
- 'larrtl;': '\u21a2',
- 'lat;': '\u2aab',
- 'lAtail;': '\u291b',
- 'latail;': '\u2919',
- 'late;': '\u2aad',
- 'lates;': '\u2aad\ufe00',
- 'lBarr;': '\u290e',
- 'lbarr;': '\u290c',
- 'lbbrk;': '\u2772',
- 'lbrace;': '{',
- 'lbrack;': '[',
- 'lbrke;': '\u298b',
- 'lbrksld;': '\u298f',
- 'lbrkslu;': '\u298d',
- 'Lcaron;': '\u013d',
- 'lcaron;': '\u013e',
- 'Lcedil;': '\u013b',
- 'lcedil;': '\u013c',
- 'lceil;': '\u2308',
- 'lcub;': '{',
- 'Lcy;': '\u041b',
- 'lcy;': '\u043b',
- 'ldca;': '\u2936',
- 'ldquo;': '\u201c',
- 'ldquor;': '\u201e',
- 'ldrdhar;': '\u2967',
- 'ldrushar;': '\u294b',
- 'ldsh;': '\u21b2',
- 'lE;': '\u2266',
- 'le;': '\u2264',
- 'LeftAngleBracket;': '\u27e8',
- 'LeftArrow;': '\u2190',
- 'Leftarrow;': '\u21d0',
- 'leftarrow;': '\u2190',
- 'LeftArrowBar;': '\u21e4',
- 'LeftArrowRightArrow;': '\u21c6',
- 'leftarrowtail;': '\u21a2',
- 'LeftCeiling;': '\u2308',
- 'LeftDoubleBracket;': '\u27e6',
- 'LeftDownTeeVector;': '\u2961',
- 'LeftDownVector;': '\u21c3',
- 'LeftDownVectorBar;': '\u2959',
- 'LeftFloor;': '\u230a',
- 'leftharpoondown;': '\u21bd',
- 'leftharpoonup;': '\u21bc',
- 'leftleftarrows;': '\u21c7',
- 'LeftRightArrow;': '\u2194',
- 'Leftrightarrow;': '\u21d4',
- 'leftrightarrow;': '\u2194',
- 'leftrightarrows;': '\u21c6',
- 'leftrightharpoons;': '\u21cb',
- 'leftrightsquigarrow;': '\u21ad',
- 'LeftRightVector;': '\u294e',
- 'LeftTee;': '\u22a3',
- 'LeftTeeArrow;': '\u21a4',
- 'LeftTeeVector;': '\u295a',
- 'leftthreetimes;': '\u22cb',
- 'LeftTriangle;': '\u22b2',
- 'LeftTriangleBar;': '\u29cf',
- 'LeftTriangleEqual;': '\u22b4',
- 'LeftUpDownVector;': '\u2951',
- 'LeftUpTeeVector;': '\u2960',
- 'LeftUpVector;': '\u21bf',
- 'LeftUpVectorBar;': '\u2958',
- 'LeftVector;': '\u21bc',
- 'LeftVectorBar;': '\u2952',
- 'lEg;': '\u2a8b',
- 'leg;': '\u22da',
- 'leq;': '\u2264',
- 'leqq;': '\u2266',
- 'leqslant;': '\u2a7d',
- 'les;': '\u2a7d',
- 'lescc;': '\u2aa8',
- 'lesdot;': '\u2a7f',
- 'lesdoto;': '\u2a81',
- 'lesdotor;': '\u2a83',
- 'lesg;': '\u22da\ufe00',
- 'lesges;': '\u2a93',
- 'lessapprox;': '\u2a85',
- 'lessdot;': '\u22d6',
- 'lesseqgtr;': '\u22da',
- 'lesseqqgtr;': '\u2a8b',
- 'LessEqualGreater;': '\u22da',
- 'LessFullEqual;': '\u2266',
- 'LessGreater;': '\u2276',
- 'lessgtr;': '\u2276',
- 'LessLess;': '\u2aa1',
- 'lesssim;': '\u2272',
- 'LessSlantEqual;': '\u2a7d',
- 'LessTilde;': '\u2272',
- 'lfisht;': '\u297c',
- 'lfloor;': '\u230a',
- 'Lfr;': '\U0001d50f',
- 'lfr;': '\U0001d529',
- 'lg;': '\u2276',
- 'lgE;': '\u2a91',
- 'lHar;': '\u2962',
- 'lhard;': '\u21bd',
- 'lharu;': '\u21bc',
- 'lharul;': '\u296a',
- 'lhblk;': '\u2584',
- 'LJcy;': '\u0409',
- 'ljcy;': '\u0459',
- 'Ll;': '\u22d8',
- 'll;': '\u226a',
- 'llarr;': '\u21c7',
- 'llcorner;': '\u231e',
- 'Lleftarrow;': '\u21da',
- 'llhard;': '\u296b',
- 'lltri;': '\u25fa',
- 'Lmidot;': '\u013f',
- 'lmidot;': '\u0140',
- 'lmoust;': '\u23b0',
- 'lmoustache;': '\u23b0',
- 'lnap;': '\u2a89',
- 'lnapprox;': '\u2a89',
- 'lnE;': '\u2268',
- 'lne;': '\u2a87',
- 'lneq;': '\u2a87',
- 'lneqq;': '\u2268',
- 'lnsim;': '\u22e6',
- 'loang;': '\u27ec',
- 'loarr;': '\u21fd',
- 'lobrk;': '\u27e6',
- 'LongLeftArrow;': '\u27f5',
- 'Longleftarrow;': '\u27f8',
- 'longleftarrow;': '\u27f5',
- 'LongLeftRightArrow;': '\u27f7',
- 'Longleftrightarrow;': '\u27fa',
- 'longleftrightarrow;': '\u27f7',
- 'longmapsto;': '\u27fc',
- 'LongRightArrow;': '\u27f6',
- 'Longrightarrow;': '\u27f9',
- 'longrightarrow;': '\u27f6',
- 'looparrowleft;': '\u21ab',
- 'looparrowright;': '\u21ac',
- 'lopar;': '\u2985',
- 'Lopf;': '\U0001d543',
- 'lopf;': '\U0001d55d',
- 'loplus;': '\u2a2d',
- 'lotimes;': '\u2a34',
- 'lowast;': '\u2217',
- 'lowbar;': '_',
- 'LowerLeftArrow;': '\u2199',
- 'LowerRightArrow;': '\u2198',
- 'loz;': '\u25ca',
- 'lozenge;': '\u25ca',
- 'lozf;': '\u29eb',
- 'lpar;': '(',
- 'lparlt;': '\u2993',
- 'lrarr;': '\u21c6',
- 'lrcorner;': '\u231f',
- 'lrhar;': '\u21cb',
- 'lrhard;': '\u296d',
- 'lrm;': '\u200e',
- 'lrtri;': '\u22bf',
- 'lsaquo;': '\u2039',
- 'Lscr;': '\u2112',
- 'lscr;': '\U0001d4c1',
- 'Lsh;': '\u21b0',
- 'lsh;': '\u21b0',
- 'lsim;': '\u2272',
- 'lsime;': '\u2a8d',
- 'lsimg;': '\u2a8f',
- 'lsqb;': '[',
- 'lsquo;': '\u2018',
- 'lsquor;': '\u201a',
- 'Lstrok;': '\u0141',
- 'lstrok;': '\u0142',
- 'LT': '<',
- 'lt': '<',
- 'LT;': '<',
- 'Lt;': '\u226a',
- 'lt;': '<',
- 'ltcc;': '\u2aa6',
- 'ltcir;': '\u2a79',
- 'ltdot;': '\u22d6',
- 'lthree;': '\u22cb',
- 'ltimes;': '\u22c9',
- 'ltlarr;': '\u2976',
- 'ltquest;': '\u2a7b',
- 'ltri;': '\u25c3',
- 'ltrie;': '\u22b4',
- 'ltrif;': '\u25c2',
- 'ltrPar;': '\u2996',
- 'lurdshar;': '\u294a',
- 'luruhar;': '\u2966',
- 'lvertneqq;': '\u2268\ufe00',
- 'lvnE;': '\u2268\ufe00',
- 'macr': '\xaf',
- 'macr;': '\xaf',
- 'male;': '\u2642',
- 'malt;': '\u2720',
- 'maltese;': '\u2720',
- 'Map;': '\u2905',
- 'map;': '\u21a6',
- 'mapsto;': '\u21a6',
- 'mapstodown;': '\u21a7',
- 'mapstoleft;': '\u21a4',
- 'mapstoup;': '\u21a5',
- 'marker;': '\u25ae',
- 'mcomma;': '\u2a29',
- 'Mcy;': '\u041c',
- 'mcy;': '\u043c',
- 'mdash;': '\u2014',
- 'mDDot;': '\u223a',
- 'measuredangle;': '\u2221',
- 'MediumSpace;': '\u205f',
- 'Mellintrf;': '\u2133',
- 'Mfr;': '\U0001d510',
- 'mfr;': '\U0001d52a',
- 'mho;': '\u2127',
- 'micro': '\xb5',
- 'micro;': '\xb5',
- 'mid;': '\u2223',
- 'midast;': '*',
- 'midcir;': '\u2af0',
- 'middot': '\xb7',
- 'middot;': '\xb7',
- 'minus;': '\u2212',
- 'minusb;': '\u229f',
- 'minusd;': '\u2238',
- 'minusdu;': '\u2a2a',
- 'MinusPlus;': '\u2213',
- 'mlcp;': '\u2adb',
- 'mldr;': '\u2026',
- 'mnplus;': '\u2213',
- 'models;': '\u22a7',
- 'Mopf;': '\U0001d544',
- 'mopf;': '\U0001d55e',
- 'mp;': '\u2213',
- 'Mscr;': '\u2133',
- 'mscr;': '\U0001d4c2',
- 'mstpos;': '\u223e',
- 'Mu;': '\u039c',
- 'mu;': '\u03bc',
- 'multimap;': '\u22b8',
- 'mumap;': '\u22b8',
- 'nabla;': '\u2207',
- 'Nacute;': '\u0143',
- 'nacute;': '\u0144',
- 'nang;': '\u2220\u20d2',
- 'nap;': '\u2249',
- 'napE;': '\u2a70\u0338',
- 'napid;': '\u224b\u0338',
- 'napos;': '\u0149',
- 'napprox;': '\u2249',
- 'natur;': '\u266e',
- 'natural;': '\u266e',
- 'naturals;': '\u2115',
- 'nbsp': '\xa0',
- 'nbsp;': '\xa0',
- 'nbump;': '\u224e\u0338',
- 'nbumpe;': '\u224f\u0338',
- 'ncap;': '\u2a43',
- 'Ncaron;': '\u0147',
- 'ncaron;': '\u0148',
- 'Ncedil;': '\u0145',
- 'ncedil;': '\u0146',
- 'ncong;': '\u2247',
- 'ncongdot;': '\u2a6d\u0338',
- 'ncup;': '\u2a42',
- 'Ncy;': '\u041d',
- 'ncy;': '\u043d',
- 'ndash;': '\u2013',
- 'ne;': '\u2260',
- 'nearhk;': '\u2924',
- 'neArr;': '\u21d7',
- 'nearr;': '\u2197',
- 'nearrow;': '\u2197',
- 'nedot;': '\u2250\u0338',
- 'NegativeMediumSpace;': '\u200b',
- 'NegativeThickSpace;': '\u200b',
- 'NegativeThinSpace;': '\u200b',
- 'NegativeVeryThinSpace;': '\u200b',
- 'nequiv;': '\u2262',
- 'nesear;': '\u2928',
- 'nesim;': '\u2242\u0338',
- 'NestedGreaterGreater;': '\u226b',
- 'NestedLessLess;': '\u226a',
- 'NewLine;': '\n',
- 'nexist;': '\u2204',
- 'nexists;': '\u2204',
- 'Nfr;': '\U0001d511',
- 'nfr;': '\U0001d52b',
- 'ngE;': '\u2267\u0338',
- 'nge;': '\u2271',
- 'ngeq;': '\u2271',
- 'ngeqq;': '\u2267\u0338',
- 'ngeqslant;': '\u2a7e\u0338',
- 'nges;': '\u2a7e\u0338',
- 'nGg;': '\u22d9\u0338',
- 'ngsim;': '\u2275',
- 'nGt;': '\u226b\u20d2',
- 'ngt;': '\u226f',
- 'ngtr;': '\u226f',
- 'nGtv;': '\u226b\u0338',
- 'nhArr;': '\u21ce',
- 'nharr;': '\u21ae',
- 'nhpar;': '\u2af2',
- 'ni;': '\u220b',
- 'nis;': '\u22fc',
- 'nisd;': '\u22fa',
- 'niv;': '\u220b',
- 'NJcy;': '\u040a',
- 'njcy;': '\u045a',
- 'nlArr;': '\u21cd',
- 'nlarr;': '\u219a',
- 'nldr;': '\u2025',
- 'nlE;': '\u2266\u0338',
- 'nle;': '\u2270',
- 'nLeftarrow;': '\u21cd',
- 'nleftarrow;': '\u219a',
- 'nLeftrightarrow;': '\u21ce',
- 'nleftrightarrow;': '\u21ae',
- 'nleq;': '\u2270',
- 'nleqq;': '\u2266\u0338',
- 'nleqslant;': '\u2a7d\u0338',
- 'nles;': '\u2a7d\u0338',
- 'nless;': '\u226e',
- 'nLl;': '\u22d8\u0338',
- 'nlsim;': '\u2274',
- 'nLt;': '\u226a\u20d2',
- 'nlt;': '\u226e',
- 'nltri;': '\u22ea',
- 'nltrie;': '\u22ec',
- 'nLtv;': '\u226a\u0338',
- 'nmid;': '\u2224',
- 'NoBreak;': '\u2060',
- 'NonBreakingSpace;': '\xa0',
- 'Nopf;': '\u2115',
- 'nopf;': '\U0001d55f',
- 'not': '\xac',
- 'Not;': '\u2aec',
- 'not;': '\xac',
- 'NotCongruent;': '\u2262',
- 'NotCupCap;': '\u226d',
- 'NotDoubleVerticalBar;': '\u2226',
- 'NotElement;': '\u2209',
- 'NotEqual;': '\u2260',
- 'NotEqualTilde;': '\u2242\u0338',
- 'NotExists;': '\u2204',
- 'NotGreater;': '\u226f',
- 'NotGreaterEqual;': '\u2271',
- 'NotGreaterFullEqual;': '\u2267\u0338',
- 'NotGreaterGreater;': '\u226b\u0338',
- 'NotGreaterLess;': '\u2279',
- 'NotGreaterSlantEqual;': '\u2a7e\u0338',
- 'NotGreaterTilde;': '\u2275',
- 'NotHumpDownHump;': '\u224e\u0338',
- 'NotHumpEqual;': '\u224f\u0338',
- 'notin;': '\u2209',
- 'notindot;': '\u22f5\u0338',
- 'notinE;': '\u22f9\u0338',
- 'notinva;': '\u2209',
- 'notinvb;': '\u22f7',
- 'notinvc;': '\u22f6',
- 'NotLeftTriangle;': '\u22ea',
- 'NotLeftTriangleBar;': '\u29cf\u0338',
- 'NotLeftTriangleEqual;': '\u22ec',
- 'NotLess;': '\u226e',
- 'NotLessEqual;': '\u2270',
- 'NotLessGreater;': '\u2278',
- 'NotLessLess;': '\u226a\u0338',
- 'NotLessSlantEqual;': '\u2a7d\u0338',
- 'NotLessTilde;': '\u2274',
- 'NotNestedGreaterGreater;': '\u2aa2\u0338',
- 'NotNestedLessLess;': '\u2aa1\u0338',
- 'notni;': '\u220c',
- 'notniva;': '\u220c',
- 'notnivb;': '\u22fe',
- 'notnivc;': '\u22fd',
- 'NotPrecedes;': '\u2280',
- 'NotPrecedesEqual;': '\u2aaf\u0338',
- 'NotPrecedesSlantEqual;': '\u22e0',
- 'NotReverseElement;': '\u220c',
- 'NotRightTriangle;': '\u22eb',
- 'NotRightTriangleBar;': '\u29d0\u0338',
- 'NotRightTriangleEqual;': '\u22ed',
- 'NotSquareSubset;': '\u228f\u0338',
- 'NotSquareSubsetEqual;': '\u22e2',
- 'NotSquareSuperset;': '\u2290\u0338',
- 'NotSquareSupersetEqual;': '\u22e3',
- 'NotSubset;': '\u2282\u20d2',
- 'NotSubsetEqual;': '\u2288',
- 'NotSucceeds;': '\u2281',
- 'NotSucceedsEqual;': '\u2ab0\u0338',
- 'NotSucceedsSlantEqual;': '\u22e1',
- 'NotSucceedsTilde;': '\u227f\u0338',
- 'NotSuperset;': '\u2283\u20d2',
- 'NotSupersetEqual;': '\u2289',
- 'NotTilde;': '\u2241',
- 'NotTildeEqual;': '\u2244',
- 'NotTildeFullEqual;': '\u2247',
- 'NotTildeTilde;': '\u2249',
- 'NotVerticalBar;': '\u2224',
- 'npar;': '\u2226',
- 'nparallel;': '\u2226',
- 'nparsl;': '\u2afd\u20e5',
- 'npart;': '\u2202\u0338',
- 'npolint;': '\u2a14',
- 'npr;': '\u2280',
- 'nprcue;': '\u22e0',
- 'npre;': '\u2aaf\u0338',
- 'nprec;': '\u2280',
- 'npreceq;': '\u2aaf\u0338',
- 'nrArr;': '\u21cf',
- 'nrarr;': '\u219b',
- 'nrarrc;': '\u2933\u0338',
- 'nrarrw;': '\u219d\u0338',
- 'nRightarrow;': '\u21cf',
- 'nrightarrow;': '\u219b',
- 'nrtri;': '\u22eb',
- 'nrtrie;': '\u22ed',
- 'nsc;': '\u2281',
- 'nsccue;': '\u22e1',
- 'nsce;': '\u2ab0\u0338',
- 'Nscr;': '\U0001d4a9',
- 'nscr;': '\U0001d4c3',
- 'nshortmid;': '\u2224',
- 'nshortparallel;': '\u2226',
- 'nsim;': '\u2241',
- 'nsime;': '\u2244',
- 'nsimeq;': '\u2244',
- 'nsmid;': '\u2224',
- 'nspar;': '\u2226',
- 'nsqsube;': '\u22e2',
- 'nsqsupe;': '\u22e3',
- 'nsub;': '\u2284',
- 'nsubE;': '\u2ac5\u0338',
- 'nsube;': '\u2288',
- 'nsubset;': '\u2282\u20d2',
- 'nsubseteq;': '\u2288',
- 'nsubseteqq;': '\u2ac5\u0338',
- 'nsucc;': '\u2281',
- 'nsucceq;': '\u2ab0\u0338',
- 'nsup;': '\u2285',
- 'nsupE;': '\u2ac6\u0338',
- 'nsupe;': '\u2289',
- 'nsupset;': '\u2283\u20d2',
- 'nsupseteq;': '\u2289',
- 'nsupseteqq;': '\u2ac6\u0338',
- 'ntgl;': '\u2279',
- 'Ntilde': '\xd1',
- 'ntilde': '\xf1',
- 'Ntilde;': '\xd1',
- 'ntilde;': '\xf1',
- 'ntlg;': '\u2278',
- 'ntriangleleft;': '\u22ea',
- 'ntrianglelefteq;': '\u22ec',
- 'ntriangleright;': '\u22eb',
- 'ntrianglerighteq;': '\u22ed',
- 'Nu;': '\u039d',
- 'nu;': '\u03bd',
- 'num;': '#',
- 'numero;': '\u2116',
- 'numsp;': '\u2007',
- 'nvap;': '\u224d\u20d2',
- 'nVDash;': '\u22af',
- 'nVdash;': '\u22ae',
- 'nvDash;': '\u22ad',
- 'nvdash;': '\u22ac',
- 'nvge;': '\u2265\u20d2',
- 'nvgt;': '>\u20d2',
- 'nvHarr;': '\u2904',
- 'nvinfin;': '\u29de',
- 'nvlArr;': '\u2902',
- 'nvle;': '\u2264\u20d2',
- 'nvlt;': '<\u20d2',
- 'nvltrie;': '\u22b4\u20d2',
- 'nvrArr;': '\u2903',
- 'nvrtrie;': '\u22b5\u20d2',
- 'nvsim;': '\u223c\u20d2',
- 'nwarhk;': '\u2923',
- 'nwArr;': '\u21d6',
- 'nwarr;': '\u2196',
- 'nwarrow;': '\u2196',
- 'nwnear;': '\u2927',
- 'Oacute': '\xd3',
- 'oacute': '\xf3',
- 'Oacute;': '\xd3',
- 'oacute;': '\xf3',
- 'oast;': '\u229b',
- 'ocir;': '\u229a',
- 'Ocirc': '\xd4',
- 'ocirc': '\xf4',
- 'Ocirc;': '\xd4',
- 'ocirc;': '\xf4',
- 'Ocy;': '\u041e',
- 'ocy;': '\u043e',
- 'odash;': '\u229d',
- 'Odblac;': '\u0150',
- 'odblac;': '\u0151',
- 'odiv;': '\u2a38',
- 'odot;': '\u2299',
- 'odsold;': '\u29bc',
- 'OElig;': '\u0152',
- 'oelig;': '\u0153',
- 'ofcir;': '\u29bf',
- 'Ofr;': '\U0001d512',
- 'ofr;': '\U0001d52c',
- 'ogon;': '\u02db',
- 'Ograve': '\xd2',
- 'ograve': '\xf2',
- 'Ograve;': '\xd2',
- 'ograve;': '\xf2',
- 'ogt;': '\u29c1',
- 'ohbar;': '\u29b5',
- 'ohm;': '\u03a9',
- 'oint;': '\u222e',
- 'olarr;': '\u21ba',
- 'olcir;': '\u29be',
- 'olcross;': '\u29bb',
- 'oline;': '\u203e',
- 'olt;': '\u29c0',
- 'Omacr;': '\u014c',
- 'omacr;': '\u014d',
- 'Omega;': '\u03a9',
- 'omega;': '\u03c9',
- 'Omicron;': '\u039f',
- 'omicron;': '\u03bf',
- 'omid;': '\u29b6',
- 'ominus;': '\u2296',
- 'Oopf;': '\U0001d546',
- 'oopf;': '\U0001d560',
- 'opar;': '\u29b7',
- 'OpenCurlyDoubleQuote;': '\u201c',
- 'OpenCurlyQuote;': '\u2018',
- 'operp;': '\u29b9',
- 'oplus;': '\u2295',
- 'Or;': '\u2a54',
- 'or;': '\u2228',
- 'orarr;': '\u21bb',
- 'ord;': '\u2a5d',
- 'order;': '\u2134',
- 'orderof;': '\u2134',
- 'ordf': '\xaa',
- 'ordf;': '\xaa',
- 'ordm': '\xba',
- 'ordm;': '\xba',
- 'origof;': '\u22b6',
- 'oror;': '\u2a56',
- 'orslope;': '\u2a57',
- 'orv;': '\u2a5b',
- 'oS;': '\u24c8',
- 'Oscr;': '\U0001d4aa',
- 'oscr;': '\u2134',
- 'Oslash': '\xd8',
- 'oslash': '\xf8',
- 'Oslash;': '\xd8',
- 'oslash;': '\xf8',
- 'osol;': '\u2298',
- 'Otilde': '\xd5',
- 'otilde': '\xf5',
- 'Otilde;': '\xd5',
- 'otilde;': '\xf5',
- 'Otimes;': '\u2a37',
- 'otimes;': '\u2297',
- 'otimesas;': '\u2a36',
- 'Ouml': '\xd6',
- 'ouml': '\xf6',
- 'Ouml;': '\xd6',
- 'ouml;': '\xf6',
- 'ovbar;': '\u233d',
- 'OverBar;': '\u203e',
- 'OverBrace;': '\u23de',
- 'OverBracket;': '\u23b4',
- 'OverParenthesis;': '\u23dc',
- 'par;': '\u2225',
- 'para': '\xb6',
- 'para;': '\xb6',
- 'parallel;': '\u2225',
- 'parsim;': '\u2af3',
- 'parsl;': '\u2afd',
- 'part;': '\u2202',
- 'PartialD;': '\u2202',
- 'Pcy;': '\u041f',
- 'pcy;': '\u043f',
- 'percnt;': '%',
- 'period;': '.',
- 'permil;': '\u2030',
- 'perp;': '\u22a5',
- 'pertenk;': '\u2031',
- 'Pfr;': '\U0001d513',
- 'pfr;': '\U0001d52d',
- 'Phi;': '\u03a6',
- 'phi;': '\u03c6',
- 'phiv;': '\u03d5',
- 'phmmat;': '\u2133',
- 'phone;': '\u260e',
- 'Pi;': '\u03a0',
- 'pi;': '\u03c0',
- 'pitchfork;': '\u22d4',
- 'piv;': '\u03d6',
- 'planck;': '\u210f',
- 'planckh;': '\u210e',
- 'plankv;': '\u210f',
- 'plus;': '+',
- 'plusacir;': '\u2a23',
- 'plusb;': '\u229e',
- 'pluscir;': '\u2a22',
- 'plusdo;': '\u2214',
- 'plusdu;': '\u2a25',
- 'pluse;': '\u2a72',
- 'PlusMinus;': '\xb1',
- 'plusmn': '\xb1',
- 'plusmn;': '\xb1',
- 'plussim;': '\u2a26',
- 'plustwo;': '\u2a27',
- 'pm;': '\xb1',
- 'Poincareplane;': '\u210c',
- 'pointint;': '\u2a15',
- 'Popf;': '\u2119',
- 'popf;': '\U0001d561',
- 'pound': '\xa3',
- 'pound;': '\xa3',
- 'Pr;': '\u2abb',
- 'pr;': '\u227a',
- 'prap;': '\u2ab7',
- 'prcue;': '\u227c',
- 'prE;': '\u2ab3',
- 'pre;': '\u2aaf',
- 'prec;': '\u227a',
- 'precapprox;': '\u2ab7',
- 'preccurlyeq;': '\u227c',
- 'Precedes;': '\u227a',
- 'PrecedesEqual;': '\u2aaf',
- 'PrecedesSlantEqual;': '\u227c',
- 'PrecedesTilde;': '\u227e',
- 'preceq;': '\u2aaf',
- 'precnapprox;': '\u2ab9',
- 'precneqq;': '\u2ab5',
- 'precnsim;': '\u22e8',
- 'precsim;': '\u227e',
- 'Prime;': '\u2033',
- 'prime;': '\u2032',
- 'primes;': '\u2119',
- 'prnap;': '\u2ab9',
- 'prnE;': '\u2ab5',
- 'prnsim;': '\u22e8',
- 'prod;': '\u220f',
- 'Product;': '\u220f',
- 'profalar;': '\u232e',
- 'profline;': '\u2312',
- 'profsurf;': '\u2313',
- 'prop;': '\u221d',
- 'Proportion;': '\u2237',
- 'Proportional;': '\u221d',
- 'propto;': '\u221d',
- 'prsim;': '\u227e',
- 'prurel;': '\u22b0',
- 'Pscr;': '\U0001d4ab',
- 'pscr;': '\U0001d4c5',
- 'Psi;': '\u03a8',
- 'psi;': '\u03c8',
- 'puncsp;': '\u2008',
- 'Qfr;': '\U0001d514',
- 'qfr;': '\U0001d52e',
- 'qint;': '\u2a0c',
- 'Qopf;': '\u211a',
- 'qopf;': '\U0001d562',
- 'qprime;': '\u2057',
- 'Qscr;': '\U0001d4ac',
- 'qscr;': '\U0001d4c6',
- 'quaternions;': '\u210d',
- 'quatint;': '\u2a16',
- 'quest;': '?',
- 'questeq;': '\u225f',
- 'QUOT': '"',
- 'quot': '"',
- 'QUOT;': '"',
- 'quot;': '"',
- 'rAarr;': '\u21db',
- 'race;': '\u223d\u0331',
- 'Racute;': '\u0154',
- 'racute;': '\u0155',
- 'radic;': '\u221a',
- 'raemptyv;': '\u29b3',
- 'Rang;': '\u27eb',
- 'rang;': '\u27e9',
- 'rangd;': '\u2992',
- 'range;': '\u29a5',
- 'rangle;': '\u27e9',
- 'raquo': '\xbb',
- 'raquo;': '\xbb',
- 'Rarr;': '\u21a0',
- 'rArr;': '\u21d2',
- 'rarr;': '\u2192',
- 'rarrap;': '\u2975',
- 'rarrb;': '\u21e5',
- 'rarrbfs;': '\u2920',
- 'rarrc;': '\u2933',
- 'rarrfs;': '\u291e',
- 'rarrhk;': '\u21aa',
- 'rarrlp;': '\u21ac',
- 'rarrpl;': '\u2945',
- 'rarrsim;': '\u2974',
- 'Rarrtl;': '\u2916',
- 'rarrtl;': '\u21a3',
- 'rarrw;': '\u219d',
- 'rAtail;': '\u291c',
- 'ratail;': '\u291a',
- 'ratio;': '\u2236',
- 'rationals;': '\u211a',
- 'RBarr;': '\u2910',
- 'rBarr;': '\u290f',
- 'rbarr;': '\u290d',
- 'rbbrk;': '\u2773',
- 'rbrace;': '}',
- 'rbrack;': ']',
- 'rbrke;': '\u298c',
- 'rbrksld;': '\u298e',
- 'rbrkslu;': '\u2990',
- 'Rcaron;': '\u0158',
- 'rcaron;': '\u0159',
- 'Rcedil;': '\u0156',
- 'rcedil;': '\u0157',
- 'rceil;': '\u2309',
- 'rcub;': '}',
- 'Rcy;': '\u0420',
- 'rcy;': '\u0440',
- 'rdca;': '\u2937',
- 'rdldhar;': '\u2969',
- 'rdquo;': '\u201d',
- 'rdquor;': '\u201d',
- 'rdsh;': '\u21b3',
- 'Re;': '\u211c',
- 'real;': '\u211c',
- 'realine;': '\u211b',
- 'realpart;': '\u211c',
- 'reals;': '\u211d',
- 'rect;': '\u25ad',
- 'REG': '\xae',
- 'reg': '\xae',
- 'REG;': '\xae',
- 'reg;': '\xae',
- 'ReverseElement;': '\u220b',
- 'ReverseEquilibrium;': '\u21cb',
- 'ReverseUpEquilibrium;': '\u296f',
- 'rfisht;': '\u297d',
- 'rfloor;': '\u230b',
- 'Rfr;': '\u211c',
- 'rfr;': '\U0001d52f',
- 'rHar;': '\u2964',
- 'rhard;': '\u21c1',
- 'rharu;': '\u21c0',
- 'rharul;': '\u296c',
- 'Rho;': '\u03a1',
- 'rho;': '\u03c1',
- 'rhov;': '\u03f1',
- 'RightAngleBracket;': '\u27e9',
- 'RightArrow;': '\u2192',
- 'Rightarrow;': '\u21d2',
- 'rightarrow;': '\u2192',
- 'RightArrowBar;': '\u21e5',
- 'RightArrowLeftArrow;': '\u21c4',
- 'rightarrowtail;': '\u21a3',
- 'RightCeiling;': '\u2309',
- 'RightDoubleBracket;': '\u27e7',
- 'RightDownTeeVector;': '\u295d',
- 'RightDownVector;': '\u21c2',
- 'RightDownVectorBar;': '\u2955',
- 'RightFloor;': '\u230b',
- 'rightharpoondown;': '\u21c1',
- 'rightharpoonup;': '\u21c0',
- 'rightleftarrows;': '\u21c4',
- 'rightleftharpoons;': '\u21cc',
- 'rightrightarrows;': '\u21c9',
- 'rightsquigarrow;': '\u219d',
- 'RightTee;': '\u22a2',
- 'RightTeeArrow;': '\u21a6',
- 'RightTeeVector;': '\u295b',
- 'rightthreetimes;': '\u22cc',
- 'RightTriangle;': '\u22b3',
- 'RightTriangleBar;': '\u29d0',
- 'RightTriangleEqual;': '\u22b5',
- 'RightUpDownVector;': '\u294f',
- 'RightUpTeeVector;': '\u295c',
- 'RightUpVector;': '\u21be',
- 'RightUpVectorBar;': '\u2954',
- 'RightVector;': '\u21c0',
- 'RightVectorBar;': '\u2953',
- 'ring;': '\u02da',
- 'risingdotseq;': '\u2253',
- 'rlarr;': '\u21c4',
- 'rlhar;': '\u21cc',
- 'rlm;': '\u200f',
- 'rmoust;': '\u23b1',
- 'rmoustache;': '\u23b1',
- 'rnmid;': '\u2aee',
- 'roang;': '\u27ed',
- 'roarr;': '\u21fe',
- 'robrk;': '\u27e7',
- 'ropar;': '\u2986',
- 'Ropf;': '\u211d',
- 'ropf;': '\U0001d563',
- 'roplus;': '\u2a2e',
- 'rotimes;': '\u2a35',
- 'RoundImplies;': '\u2970',
- 'rpar;': ')',
- 'rpargt;': '\u2994',
- 'rppolint;': '\u2a12',
- 'rrarr;': '\u21c9',
- 'Rrightarrow;': '\u21db',
- 'rsaquo;': '\u203a',
- 'Rscr;': '\u211b',
- 'rscr;': '\U0001d4c7',
- 'Rsh;': '\u21b1',
- 'rsh;': '\u21b1',
- 'rsqb;': ']',
- 'rsquo;': '\u2019',
- 'rsquor;': '\u2019',
- 'rthree;': '\u22cc',
- 'rtimes;': '\u22ca',
- 'rtri;': '\u25b9',
- 'rtrie;': '\u22b5',
- 'rtrif;': '\u25b8',
- 'rtriltri;': '\u29ce',
- 'RuleDelayed;': '\u29f4',
- 'ruluhar;': '\u2968',
- 'rx;': '\u211e',
- 'Sacute;': '\u015a',
- 'sacute;': '\u015b',
- 'sbquo;': '\u201a',
- 'Sc;': '\u2abc',
- 'sc;': '\u227b',
- 'scap;': '\u2ab8',
- 'Scaron;': '\u0160',
- 'scaron;': '\u0161',
- 'sccue;': '\u227d',
- 'scE;': '\u2ab4',
- 'sce;': '\u2ab0',
- 'Scedil;': '\u015e',
- 'scedil;': '\u015f',
- 'Scirc;': '\u015c',
- 'scirc;': '\u015d',
- 'scnap;': '\u2aba',
- 'scnE;': '\u2ab6',
- 'scnsim;': '\u22e9',
- 'scpolint;': '\u2a13',
- 'scsim;': '\u227f',
- 'Scy;': '\u0421',
- 'scy;': '\u0441',
- 'sdot;': '\u22c5',
- 'sdotb;': '\u22a1',
- 'sdote;': '\u2a66',
- 'searhk;': '\u2925',
- 'seArr;': '\u21d8',
- 'searr;': '\u2198',
- 'searrow;': '\u2198',
- 'sect': '\xa7',
- 'sect;': '\xa7',
- 'semi;': ';',
- 'seswar;': '\u2929',
- 'setminus;': '\u2216',
- 'setmn;': '\u2216',
- 'sext;': '\u2736',
- 'Sfr;': '\U0001d516',
- 'sfr;': '\U0001d530',
- 'sfrown;': '\u2322',
- 'sharp;': '\u266f',
- 'SHCHcy;': '\u0429',
- 'shchcy;': '\u0449',
- 'SHcy;': '\u0428',
- 'shcy;': '\u0448',
- 'ShortDownArrow;': '\u2193',
- 'ShortLeftArrow;': '\u2190',
- 'shortmid;': '\u2223',
- 'shortparallel;': '\u2225',
- 'ShortRightArrow;': '\u2192',
- 'ShortUpArrow;': '\u2191',
- 'shy': '\xad',
- 'shy;': '\xad',
- 'Sigma;': '\u03a3',
- 'sigma;': '\u03c3',
- 'sigmaf;': '\u03c2',
- 'sigmav;': '\u03c2',
- 'sim;': '\u223c',
- 'simdot;': '\u2a6a',
- 'sime;': '\u2243',
- 'simeq;': '\u2243',
- 'simg;': '\u2a9e',
- 'simgE;': '\u2aa0',
- 'siml;': '\u2a9d',
- 'simlE;': '\u2a9f',
- 'simne;': '\u2246',
- 'simplus;': '\u2a24',
- 'simrarr;': '\u2972',
- 'slarr;': '\u2190',
- 'SmallCircle;': '\u2218',
- 'smallsetminus;': '\u2216',
- 'smashp;': '\u2a33',
- 'smeparsl;': '\u29e4',
- 'smid;': '\u2223',
- 'smile;': '\u2323',
- 'smt;': '\u2aaa',
- 'smte;': '\u2aac',
- 'smtes;': '\u2aac\ufe00',
- 'SOFTcy;': '\u042c',
- 'softcy;': '\u044c',
- 'sol;': '/',
- 'solb;': '\u29c4',
- 'solbar;': '\u233f',
- 'Sopf;': '\U0001d54a',
- 'sopf;': '\U0001d564',
- 'spades;': '\u2660',
- 'spadesuit;': '\u2660',
- 'spar;': '\u2225',
- 'sqcap;': '\u2293',
- 'sqcaps;': '\u2293\ufe00',
- 'sqcup;': '\u2294',
- 'sqcups;': '\u2294\ufe00',
- 'Sqrt;': '\u221a',
- 'sqsub;': '\u228f',
- 'sqsube;': '\u2291',
- 'sqsubset;': '\u228f',
- 'sqsubseteq;': '\u2291',
- 'sqsup;': '\u2290',
- 'sqsupe;': '\u2292',
- 'sqsupset;': '\u2290',
- 'sqsupseteq;': '\u2292',
- 'squ;': '\u25a1',
- 'Square;': '\u25a1',
- 'square;': '\u25a1',
- 'SquareIntersection;': '\u2293',
- 'SquareSubset;': '\u228f',
- 'SquareSubsetEqual;': '\u2291',
- 'SquareSuperset;': '\u2290',
- 'SquareSupersetEqual;': '\u2292',
- 'SquareUnion;': '\u2294',
- 'squarf;': '\u25aa',
- 'squf;': '\u25aa',
- 'srarr;': '\u2192',
- 'Sscr;': '\U0001d4ae',
- 'sscr;': '\U0001d4c8',
- 'ssetmn;': '\u2216',
- 'ssmile;': '\u2323',
- 'sstarf;': '\u22c6',
- 'Star;': '\u22c6',
- 'star;': '\u2606',
- 'starf;': '\u2605',
- 'straightepsilon;': '\u03f5',
- 'straightphi;': '\u03d5',
- 'strns;': '\xaf',
- 'Sub;': '\u22d0',
- 'sub;': '\u2282',
- 'subdot;': '\u2abd',
- 'subE;': '\u2ac5',
- 'sube;': '\u2286',
- 'subedot;': '\u2ac3',
- 'submult;': '\u2ac1',
- 'subnE;': '\u2acb',
- 'subne;': '\u228a',
- 'subplus;': '\u2abf',
- 'subrarr;': '\u2979',
- 'Subset;': '\u22d0',
- 'subset;': '\u2282',
- 'subseteq;': '\u2286',
- 'subseteqq;': '\u2ac5',
- 'SubsetEqual;': '\u2286',
- 'subsetneq;': '\u228a',
- 'subsetneqq;': '\u2acb',
- 'subsim;': '\u2ac7',
- 'subsub;': '\u2ad5',
- 'subsup;': '\u2ad3',
- 'succ;': '\u227b',
- 'succapprox;': '\u2ab8',
- 'succcurlyeq;': '\u227d',
- 'Succeeds;': '\u227b',
- 'SucceedsEqual;': '\u2ab0',
- 'SucceedsSlantEqual;': '\u227d',
- 'SucceedsTilde;': '\u227f',
- 'succeq;': '\u2ab0',
- 'succnapprox;': '\u2aba',
- 'succneqq;': '\u2ab6',
- 'succnsim;': '\u22e9',
- 'succsim;': '\u227f',
- 'SuchThat;': '\u220b',
- 'Sum;': '\u2211',
- 'sum;': '\u2211',
- 'sung;': '\u266a',
- 'sup1': '\xb9',
- 'sup1;': '\xb9',
- 'sup2': '\xb2',
- 'sup2;': '\xb2',
- 'sup3': '\xb3',
- 'sup3;': '\xb3',
- 'Sup;': '\u22d1',
- 'sup;': '\u2283',
- 'supdot;': '\u2abe',
- 'supdsub;': '\u2ad8',
- 'supE;': '\u2ac6',
- 'supe;': '\u2287',
- 'supedot;': '\u2ac4',
- 'Superset;': '\u2283',
- 'SupersetEqual;': '\u2287',
- 'suphsol;': '\u27c9',
- 'suphsub;': '\u2ad7',
- 'suplarr;': '\u297b',
- 'supmult;': '\u2ac2',
- 'supnE;': '\u2acc',
- 'supne;': '\u228b',
- 'supplus;': '\u2ac0',
- 'Supset;': '\u22d1',
- 'supset;': '\u2283',
- 'supseteq;': '\u2287',
- 'supseteqq;': '\u2ac6',
- 'supsetneq;': '\u228b',
- 'supsetneqq;': '\u2acc',
- 'supsim;': '\u2ac8',
- 'supsub;': '\u2ad4',
- 'supsup;': '\u2ad6',
- 'swarhk;': '\u2926',
- 'swArr;': '\u21d9',
- 'swarr;': '\u2199',
- 'swarrow;': '\u2199',
- 'swnwar;': '\u292a',
- 'szlig': '\xdf',
- 'szlig;': '\xdf',
- 'Tab;': '\t',
- 'target;': '\u2316',
- 'Tau;': '\u03a4',
- 'tau;': '\u03c4',
- 'tbrk;': '\u23b4',
- 'Tcaron;': '\u0164',
- 'tcaron;': '\u0165',
- 'Tcedil;': '\u0162',
- 'tcedil;': '\u0163',
- 'Tcy;': '\u0422',
- 'tcy;': '\u0442',
- 'tdot;': '\u20db',
- 'telrec;': '\u2315',
- 'Tfr;': '\U0001d517',
- 'tfr;': '\U0001d531',
- 'there4;': '\u2234',
- 'Therefore;': '\u2234',
- 'therefore;': '\u2234',
- 'Theta;': '\u0398',
- 'theta;': '\u03b8',
- 'thetasym;': '\u03d1',
- 'thetav;': '\u03d1',
- 'thickapprox;': '\u2248',
- 'thicksim;': '\u223c',
- 'ThickSpace;': '\u205f\u200a',
- 'thinsp;': '\u2009',
- 'ThinSpace;': '\u2009',
- 'thkap;': '\u2248',
- 'thksim;': '\u223c',
- 'THORN': '\xde',
- 'thorn': '\xfe',
- 'THORN;': '\xde',
- 'thorn;': '\xfe',
- 'Tilde;': '\u223c',
- 'tilde;': '\u02dc',
- 'TildeEqual;': '\u2243',
- 'TildeFullEqual;': '\u2245',
- 'TildeTilde;': '\u2248',
- 'times': '\xd7',
- 'times;': '\xd7',
- 'timesb;': '\u22a0',
- 'timesbar;': '\u2a31',
- 'timesd;': '\u2a30',
- 'tint;': '\u222d',
- 'toea;': '\u2928',
- 'top;': '\u22a4',
- 'topbot;': '\u2336',
- 'topcir;': '\u2af1',
- 'Topf;': '\U0001d54b',
- 'topf;': '\U0001d565',
- 'topfork;': '\u2ada',
- 'tosa;': '\u2929',
- 'tprime;': '\u2034',
- 'TRADE;': '\u2122',
- 'trade;': '\u2122',
- 'triangle;': '\u25b5',
- 'triangledown;': '\u25bf',
- 'triangleleft;': '\u25c3',
- 'trianglelefteq;': '\u22b4',
- 'triangleq;': '\u225c',
- 'triangleright;': '\u25b9',
- 'trianglerighteq;': '\u22b5',
- 'tridot;': '\u25ec',
- 'trie;': '\u225c',
- 'triminus;': '\u2a3a',
- 'TripleDot;': '\u20db',
- 'triplus;': '\u2a39',
- 'trisb;': '\u29cd',
- 'tritime;': '\u2a3b',
- 'trpezium;': '\u23e2',
- 'Tscr;': '\U0001d4af',
- 'tscr;': '\U0001d4c9',
- 'TScy;': '\u0426',
- 'tscy;': '\u0446',
- 'TSHcy;': '\u040b',
- 'tshcy;': '\u045b',
- 'Tstrok;': '\u0166',
- 'tstrok;': '\u0167',
- 'twixt;': '\u226c',
- 'twoheadleftarrow;': '\u219e',
- 'twoheadrightarrow;': '\u21a0',
- 'Uacute': '\xda',
- 'uacute': '\xfa',
- 'Uacute;': '\xda',
- 'uacute;': '\xfa',
- 'Uarr;': '\u219f',
- 'uArr;': '\u21d1',
- 'uarr;': '\u2191',
- 'Uarrocir;': '\u2949',
- 'Ubrcy;': '\u040e',
- 'ubrcy;': '\u045e',
- 'Ubreve;': '\u016c',
- 'ubreve;': '\u016d',
- 'Ucirc': '\xdb',
- 'ucirc': '\xfb',
- 'Ucirc;': '\xdb',
- 'ucirc;': '\xfb',
- 'Ucy;': '\u0423',
- 'ucy;': '\u0443',
- 'udarr;': '\u21c5',
- 'Udblac;': '\u0170',
- 'udblac;': '\u0171',
- 'udhar;': '\u296e',
- 'ufisht;': '\u297e',
- 'Ufr;': '\U0001d518',
- 'ufr;': '\U0001d532',
- 'Ugrave': '\xd9',
- 'ugrave': '\xf9',
- 'Ugrave;': '\xd9',
- 'ugrave;': '\xf9',
- 'uHar;': '\u2963',
- 'uharl;': '\u21bf',
- 'uharr;': '\u21be',
- 'uhblk;': '\u2580',
- 'ulcorn;': '\u231c',
- 'ulcorner;': '\u231c',
- 'ulcrop;': '\u230f',
- 'ultri;': '\u25f8',
- 'Umacr;': '\u016a',
- 'umacr;': '\u016b',
- 'uml': '\xa8',
- 'uml;': '\xa8',
- 'UnderBar;': '_',
- 'UnderBrace;': '\u23df',
- 'UnderBracket;': '\u23b5',
- 'UnderParenthesis;': '\u23dd',
- 'Union;': '\u22c3',
- 'UnionPlus;': '\u228e',
- 'Uogon;': '\u0172',
- 'uogon;': '\u0173',
- 'Uopf;': '\U0001d54c',
- 'uopf;': '\U0001d566',
- 'UpArrow;': '\u2191',
- 'Uparrow;': '\u21d1',
- 'uparrow;': '\u2191',
- 'UpArrowBar;': '\u2912',
- 'UpArrowDownArrow;': '\u21c5',
- 'UpDownArrow;': '\u2195',
- 'Updownarrow;': '\u21d5',
- 'updownarrow;': '\u2195',
- 'UpEquilibrium;': '\u296e',
- 'upharpoonleft;': '\u21bf',
- 'upharpoonright;': '\u21be',
- 'uplus;': '\u228e',
- 'UpperLeftArrow;': '\u2196',
- 'UpperRightArrow;': '\u2197',
- 'Upsi;': '\u03d2',
- 'upsi;': '\u03c5',
- 'upsih;': '\u03d2',
- 'Upsilon;': '\u03a5',
- 'upsilon;': '\u03c5',
- 'UpTee;': '\u22a5',
- 'UpTeeArrow;': '\u21a5',
- 'upuparrows;': '\u21c8',
- 'urcorn;': '\u231d',
- 'urcorner;': '\u231d',
- 'urcrop;': '\u230e',
- 'Uring;': '\u016e',
- 'uring;': '\u016f',
- 'urtri;': '\u25f9',
- 'Uscr;': '\U0001d4b0',
- 'uscr;': '\U0001d4ca',
- 'utdot;': '\u22f0',
- 'Utilde;': '\u0168',
- 'utilde;': '\u0169',
- 'utri;': '\u25b5',
- 'utrif;': '\u25b4',
- 'uuarr;': '\u21c8',
- 'Uuml': '\xdc',
- 'uuml': '\xfc',
- 'Uuml;': '\xdc',
- 'uuml;': '\xfc',
- 'uwangle;': '\u29a7',
- 'vangrt;': '\u299c',
- 'varepsilon;': '\u03f5',
- 'varkappa;': '\u03f0',
- 'varnothing;': '\u2205',
- 'varphi;': '\u03d5',
- 'varpi;': '\u03d6',
- 'varpropto;': '\u221d',
- 'vArr;': '\u21d5',
- 'varr;': '\u2195',
- 'varrho;': '\u03f1',
- 'varsigma;': '\u03c2',
- 'varsubsetneq;': '\u228a\ufe00',
- 'varsubsetneqq;': '\u2acb\ufe00',
- 'varsupsetneq;': '\u228b\ufe00',
- 'varsupsetneqq;': '\u2acc\ufe00',
- 'vartheta;': '\u03d1',
- 'vartriangleleft;': '\u22b2',
- 'vartriangleright;': '\u22b3',
- 'Vbar;': '\u2aeb',
- 'vBar;': '\u2ae8',
- 'vBarv;': '\u2ae9',
- 'Vcy;': '\u0412',
- 'vcy;': '\u0432',
- 'VDash;': '\u22ab',
- 'Vdash;': '\u22a9',
- 'vDash;': '\u22a8',
- 'vdash;': '\u22a2',
- 'Vdashl;': '\u2ae6',
- 'Vee;': '\u22c1',
- 'vee;': '\u2228',
- 'veebar;': '\u22bb',
- 'veeeq;': '\u225a',
- 'vellip;': '\u22ee',
- 'Verbar;': '\u2016',
- 'verbar;': '|',
- 'Vert;': '\u2016',
- 'vert;': '|',
- 'VerticalBar;': '\u2223',
- 'VerticalLine;': '|',
- 'VerticalSeparator;': '\u2758',
- 'VerticalTilde;': '\u2240',
- 'VeryThinSpace;': '\u200a',
- 'Vfr;': '\U0001d519',
- 'vfr;': '\U0001d533',
- 'vltri;': '\u22b2',
- 'vnsub;': '\u2282\u20d2',
- 'vnsup;': '\u2283\u20d2',
- 'Vopf;': '\U0001d54d',
- 'vopf;': '\U0001d567',
- 'vprop;': '\u221d',
- 'vrtri;': '\u22b3',
- 'Vscr;': '\U0001d4b1',
- 'vscr;': '\U0001d4cb',
- 'vsubnE;': '\u2acb\ufe00',
- 'vsubne;': '\u228a\ufe00',
- 'vsupnE;': '\u2acc\ufe00',
- 'vsupne;': '\u228b\ufe00',
- 'Vvdash;': '\u22aa',
- 'vzigzag;': '\u299a',
- 'Wcirc;': '\u0174',
- 'wcirc;': '\u0175',
- 'wedbar;': '\u2a5f',
- 'Wedge;': '\u22c0',
- 'wedge;': '\u2227',
- 'wedgeq;': '\u2259',
- 'weierp;': '\u2118',
- 'Wfr;': '\U0001d51a',
- 'wfr;': '\U0001d534',
- 'Wopf;': '\U0001d54e',
- 'wopf;': '\U0001d568',
- 'wp;': '\u2118',
- 'wr;': '\u2240',
- 'wreath;': '\u2240',
- 'Wscr;': '\U0001d4b2',
- 'wscr;': '\U0001d4cc',
- 'xcap;': '\u22c2',
- 'xcirc;': '\u25ef',
- 'xcup;': '\u22c3',
- 'xdtri;': '\u25bd',
- 'Xfr;': '\U0001d51b',
- 'xfr;': '\U0001d535',
- 'xhArr;': '\u27fa',
- 'xharr;': '\u27f7',
- 'Xi;': '\u039e',
- 'xi;': '\u03be',
- 'xlArr;': '\u27f8',
- 'xlarr;': '\u27f5',
- 'xmap;': '\u27fc',
- 'xnis;': '\u22fb',
- 'xodot;': '\u2a00',
- 'Xopf;': '\U0001d54f',
- 'xopf;': '\U0001d569',
- 'xoplus;': '\u2a01',
- 'xotime;': '\u2a02',
- 'xrArr;': '\u27f9',
- 'xrarr;': '\u27f6',
- 'Xscr;': '\U0001d4b3',
- 'xscr;': '\U0001d4cd',
- 'xsqcup;': '\u2a06',
- 'xuplus;': '\u2a04',
- 'xutri;': '\u25b3',
- 'xvee;': '\u22c1',
- 'xwedge;': '\u22c0',
- 'Yacute': '\xdd',
- 'yacute': '\xfd',
- 'Yacute;': '\xdd',
- 'yacute;': '\xfd',
- 'YAcy;': '\u042f',
- 'yacy;': '\u044f',
- 'Ycirc;': '\u0176',
- 'ycirc;': '\u0177',
- 'Ycy;': '\u042b',
- 'ycy;': '\u044b',
- 'yen': '\xa5',
- 'yen;': '\xa5',
- 'Yfr;': '\U0001d51c',
- 'yfr;': '\U0001d536',
- 'YIcy;': '\u0407',
- 'yicy;': '\u0457',
- 'Yopf;': '\U0001d550',
- 'yopf;': '\U0001d56a',
- 'Yscr;': '\U0001d4b4',
- 'yscr;': '\U0001d4ce',
- 'YUcy;': '\u042e',
- 'yucy;': '\u044e',
- 'yuml': '\xff',
- 'Yuml;': '\u0178',
- 'yuml;': '\xff',
- 'Zacute;': '\u0179',
- 'zacute;': '\u017a',
- 'Zcaron;': '\u017d',
- 'zcaron;': '\u017e',
- 'Zcy;': '\u0417',
- 'zcy;': '\u0437',
- 'Zdot;': '\u017b',
- 'zdot;': '\u017c',
- 'zeetrf;': '\u2128',
- 'ZeroWidthSpace;': '\u200b',
- 'Zeta;': '\u0396',
- 'zeta;': '\u03b6',
- 'Zfr;': '\u2128',
- 'zfr;': '\U0001d537',
- 'ZHcy;': '\u0416',
- 'zhcy;': '\u0436',
- 'zigrarr;': '\u21dd',
- 'Zopf;': '\u2124',
- 'zopf;': '\U0001d56b',
- 'Zscr;': '\U0001d4b5',
- 'zscr;': '\U0001d4cf',
- 'zwj;': '\u200d',
- 'zwnj;': '\u200c',
- }
-
-try:
- import http.client as compat_http_client
-except ImportError: # Python 2
- import httplib as compat_http_client
-
-try:
- from urllib.error import HTTPError as compat_HTTPError
-except ImportError: # Python 2
- from urllib2 import HTTPError as compat_HTTPError
-
-try:
- from urllib.request import urlretrieve as compat_urlretrieve
-except ImportError: # Python 2
- from urllib import urlretrieve as compat_urlretrieve
-
-try:
- from html.parser import HTMLParser as compat_HTMLParser
-except ImportError: # Python 2
- from HTMLParser import HTMLParser as compat_HTMLParser
-
-try: # Python 2
- from HTMLParser import HTMLParseError as compat_HTMLParseError
-except ImportError: # Python <3.4
- try:
- from html.parser import HTMLParseError as compat_HTMLParseError
- except ImportError: # Python >3.4
-
- # HTMLParseError has been deprecated in Python 3.3 and removed in
- # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
- # and uniform cross-version exceptiong handling
- class compat_HTMLParseError(Exception):
- pass
-
-try:
- from subprocess import DEVNULL
- compat_subprocess_get_DEVNULL = lambda: DEVNULL
-except ImportError:
- compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
-
-try:
- import http.server as compat_http_server
-except ImportError:
- import BaseHTTPServer as compat_http_server
-
-try:
- compat_str = unicode # Python 2
-except NameError:
- compat_str = str
-
-try:
- from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
- from urllib.parse import unquote as compat_urllib_parse_unquote
- from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
-except ImportError: # Python 2
- _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
- else re.compile(r'([\x00-\x7f]+)'))
-
- # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
- # implementations from cpython 3.4.3's stdlib. Python 2's version
- # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
-
- def compat_urllib_parse_unquote_to_bytes(string):
- """unquote_to_bytes('abc%20def') -> b'abc def'."""
- # Note: strings are encoded as UTF-8. This is only an issue if it contains
- # unescaped non-ASCII characters, which URIs should not.
- if not string:
- # Is it a string-like object?
- string.split
- return b''
- if isinstance(string, compat_str):
- string = string.encode('utf-8')
- bits = string.split(b'%')
- if len(bits) == 1:
- return string
- res = [bits[0]]
- append = res.append
- for item in bits[1:]:
- try:
- append(compat_urllib_parse._hextochr[item[:2]])
- append(item[2:])
- except KeyError:
- append(b'%')
- append(item)
- return b''.join(res)
-
- def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
- """Replace %xx escapes by their single-character equivalent. The optional
- encoding and errors parameters specify how to decode percent-encoded
- sequences into Unicode characters, as accepted by the bytes.decode()
- method.
- By default, percent-encoded sequences are decoded with UTF-8, and invalid
- sequences are replaced by a placeholder character.
-
- unquote('abc%20def') -> 'abc def'.
- """
- if '%' not in string:
- string.split
- return string
- if encoding is None:
- encoding = 'utf-8'
- if errors is None:
- errors = 'replace'
- bits = _asciire.split(string)
- res = [bits[0]]
- append = res.append
- for i in range(1, len(bits), 2):
- append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
- append(bits[i + 1])
- return ''.join(res)
-
- def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
- """Like unquote(), but also replace plus signs by spaces, as required for
- unquoting HTML form values.
-
- unquote_plus('%7e/abc+def') -> '~/abc def'
- """
- string = string.replace('+', ' ')
- return compat_urllib_parse_unquote(string, encoding, errors)
-
-try:
- from urllib.parse import urlencode as compat_urllib_parse_urlencode
-except ImportError: # Python 2
- # Python 2 will choke in urlencode on mixture of byte and unicode strings.
- # Possible solutions are to either port it from python 3 with all
- # the friends or manually ensure input query contains only byte strings.
- # We will stick with latter thus recursively encoding the whole query.
- def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
- def encode_elem(e):
- if isinstance(e, dict):
- e = encode_dict(e)
- elif isinstance(e, (list, tuple,)):
- list_e = encode_list(e)
- e = tuple(list_e) if isinstance(e, tuple) else list_e
- elif isinstance(e, compat_str):
- e = e.encode(encoding)
- return e
-
- def encode_dict(d):
- return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
-
- def encode_list(l):
- return [encode_elem(e) for e in l]
-
- return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
-
-try:
- from urllib.request import DataHandler as compat_urllib_request_DataHandler
-except ImportError: # Python < 3.4
- # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
- class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
- def data_open(self, req):
- # data URLs as specified in RFC 2397.
- #
- # ignores POSTed data
- #
- # syntax:
- # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
- # mediatype := [ type "/" subtype ] *( ";" parameter )
- # data := *urlchar
- # parameter := attribute "=" value
- url = req.get_full_url()
-
- scheme, data = url.split(':', 1)
- mediatype, data = data.split(',', 1)
-
- # even base64 encoded data URLs might be quoted so unquote in any case:
- data = compat_urllib_parse_unquote_to_bytes(data)
- if mediatype.endswith(';base64'):
- data = binascii.a2b_base64(data)
- mediatype = mediatype[:-7]
-
- if not mediatype:
- mediatype = 'text/plain;charset=US-ASCII'
-
- headers = email.message_from_string(
- 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
-
- return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
-
-try:
- compat_basestring = basestring # Python 2
-except NameError:
- compat_basestring = str
-
-try:
- compat_chr = unichr # Python 2
-except NameError:
- compat_chr = chr
-
-try:
- from xml.etree.ElementTree import ParseError as compat_xml_parse_error
-except ImportError: # Python 2.6
- from xml.parsers.expat import ExpatError as compat_xml_parse_error
-
-
-etree = xml.etree.ElementTree
-
-
-class _TreeBuilder(etree.TreeBuilder):
- def doctype(self, name, pubid, system):
- pass
-
-
-if sys.version_info[0] >= 3:
- def compat_etree_fromstring(text):
- return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
-else:
- # python 2.x tries to encode unicode strings with ascii (see the
- # XMLParser._fixtext method)
- try:
- _etree_iter = etree.Element.iter
- except AttributeError: # Python <=2.6
- def _etree_iter(root):
- for el in root.findall('*'):
- yield el
- for sub in _etree_iter(el):
- yield sub
-
- # on 2.6 XML doesn't have a parser argument, function copied from CPython
- # 2.7 source
- def _XML(text, parser=None):
- if not parser:
- parser = etree.XMLParser(target=_TreeBuilder())
- parser.feed(text)
- return parser.close()
-
- def _element_factory(*args, **kwargs):
- el = etree.Element(*args, **kwargs)
- for k, v in el.items():
- if isinstance(v, bytes):
- el.set(k, v.decode('utf-8'))
- return el
-
- def compat_etree_fromstring(text):
- doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
- for el in _etree_iter(doc):
- if el.text is not None and isinstance(el.text, bytes):
- el.text = el.text.decode('utf-8')
- return doc
-
-if hasattr(etree, 'register_namespace'):
- compat_etree_register_namespace = etree.register_namespace
-else:
- def compat_etree_register_namespace(prefix, uri):
- """Register a namespace prefix.
- The registry is global, and any existing mapping for either the
- given prefix or the namespace URI will be removed.
- *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
- attributes in this namespace will be serialized with prefix if possible.
- ValueError is raised if prefix is reserved or is invalid.
- """
- if re.match(r"ns\d+$", prefix):
- raise ValueError("Prefix format reserved for internal use")
- for k, v in list(etree._namespace_map.items()):
- if k == uri or v == prefix:
- del etree._namespace_map[k]
- etree._namespace_map[uri] = prefix
-
-if sys.version_info < (2, 7):
- # Here comes the crazy part: In 2.6, if the xpath is a unicode,
- # .//node does not match if a node is a direct child of . !
- def compat_xpath(xpath):
- if isinstance(xpath, compat_str):
- xpath = xpath.encode('ascii')
- return xpath
-else:
- compat_xpath = lambda xpath: xpath
-
-try:
- from urllib.parse import parse_qs as compat_parse_qs
-except ImportError: # Python 2
- # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
- # Python 2's version is apparently totally broken
-
- def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
- qs, _coerce_result = qs, compat_str
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
- r = []
- for name_value in pairs:
- if not name_value and not strict_parsing:
- continue
- nv = name_value.split('=', 1)
- if len(nv) != 2:
- if strict_parsing:
- raise ValueError('bad query field: %r' % (name_value,))
- # Handle case of a control-name with no equal sign
- if keep_blank_values:
- nv.append('')
- else:
- continue
- if len(nv[1]) or keep_blank_values:
- name = nv[0].replace('+', ' ')
- name = compat_urllib_parse_unquote(
- name, encoding=encoding, errors=errors)
- name = _coerce_result(name)
- value = nv[1].replace('+', ' ')
- value = compat_urllib_parse_unquote(
- value, encoding=encoding, errors=errors)
- value = _coerce_result(value)
- r.append((name, value))
- return r
-
- def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
- parsed_result = {}
- pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
- encoding=encoding, errors=errors)
- for name, value in pairs:
- if name in parsed_result:
- parsed_result[name].append(value)
- else:
- parsed_result[name] = [value]
- return parsed_result
-
-
-compat_os_name = os._name if os.name == 'java' else os.name
-
-
-if compat_os_name == 'nt':
- def compat_shlex_quote(s):
- return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
-else:
- try:
- from shlex import quote as compat_shlex_quote
- except ImportError: # Python < 3.3
- def compat_shlex_quote(s):
- if re.match(r'^[-_\w./]+$', s):
- return s
- else:
- return "'" + s.replace("'", "'\"'\"'") + "'"
-
-
-try:
- args = shlex.split('中文')
- assert (isinstance(args, list) and
- isinstance(args[0], compat_str) and
- args[0] == '中文')
- compat_shlex_split = shlex.split
-except (AssertionError, UnicodeEncodeError):
- # Working around shlex issue with unicode strings on some python 2
- # versions (see http://bugs.python.org/issue1548891)
- def compat_shlex_split(s, comments=False, posix=True):
- if isinstance(s, compat_str):
- s = s.encode('utf-8')
- return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
-
-
-def compat_ord(c):
- if type(c) is int:
- return c
- else:
- return ord(c)
-
-
-if sys.version_info >= (3, 0):
- compat_getenv = os.getenv
- compat_expanduser = os.path.expanduser
-
- def compat_setenv(key, value, env=os.environ):
- env[key] = value
-else:
- # Environment variables should be decoded with filesystem encoding.
- # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
-
- def compat_getenv(key, default=None):
- from .utils import get_filesystem_encoding
- env = os.getenv(key, default)
- if env:
- env = env.decode(get_filesystem_encoding())
- return env
-
- def compat_setenv(key, value, env=os.environ):
- def encode(v):
- from .utils import get_filesystem_encoding
- return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
- env[encode(key)] = encode(value)
-
- # HACK: The default implementations of os.path.expanduser from cpython do not decode
- # environment variables with filesystem encoding. We will work around this by
- # providing adjusted implementations.
- # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
- # for different platforms with correct environment variables decoding.
-
- if compat_os_name == 'posix':
- def compat_expanduser(path):
- """Expand ~ and ~user constructions. If user or $HOME is unknown,
- do nothing."""
- if not path.startswith('~'):
- return path
- i = path.find('/', 1)
- if i < 0:
- i = len(path)
- if i == 1:
- if 'HOME' not in os.environ:
- import pwd
- userhome = pwd.getpwuid(os.getuid()).pw_dir
- else:
- userhome = compat_getenv('HOME')
- else:
- import pwd
- try:
- pwent = pwd.getpwnam(path[1:i])
- except KeyError:
- return path
- userhome = pwent.pw_dir
- userhome = userhome.rstrip('/')
- return (userhome + path[i:]) or '/'
- elif compat_os_name in ('nt', 'ce'):
- def compat_expanduser(path):
- """Expand ~ and ~user constructs.
-
- If user or $HOME is unknown, do nothing."""
- if path[:1] != '~':
- return path
- i, n = 1, len(path)
- while i < n and path[i] not in '/\\':
- i = i + 1
-
- if 'HOME' in os.environ:
- userhome = compat_getenv('HOME')
- elif 'USERPROFILE' in os.environ:
- userhome = compat_getenv('USERPROFILE')
- elif 'HOMEPATH' not in os.environ:
- return path
- else:
- try:
- drive = compat_getenv('HOMEDRIVE')
- except KeyError:
- drive = ''
- userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
-
- if i != 1: # ~user
- userhome = os.path.join(os.path.dirname(userhome), path[1:i])
-
- return userhome + path[i:]
- else:
- compat_expanduser = os.path.expanduser
-
-
-if sys.version_info < (3, 0):
- def compat_print(s):
- from .utils import preferredencoding
- print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
-else:
- def compat_print(s):
- assert isinstance(s, compat_str)
- print(s)
-
-
-if sys.version_info < (3, 0) and sys.platform == 'win32':
- def compat_getpass(prompt, *args, **kwargs):
- if isinstance(prompt, compat_str):
- from .utils import preferredencoding
- prompt = prompt.encode(preferredencoding())
- return getpass.getpass(prompt, *args, **kwargs)
-else:
- compat_getpass = getpass.getpass
-
-try:
- compat_input = raw_input
-except NameError: # Python 3
- compat_input = input
-
-# Python < 2.6.5 require kwargs to be bytes
-try:
- def _testfunc(x):
- pass
- _testfunc(**{'x': 0})
-except TypeError:
- def compat_kwargs(kwargs):
- return dict((bytes(k), v) for k, v in kwargs.items())
-else:
- compat_kwargs = lambda kwargs: kwargs
-
-
-try:
- compat_numeric_types = (int, float, long, complex)
-except NameError: # Python 3
- compat_numeric_types = (int, float, complex)
-
-
-try:
- compat_integer_types = (int, long)
-except NameError: # Python 3
- compat_integer_types = (int, )
-
-
-if sys.version_info < (2, 7):
- def compat_socket_create_connection(address, timeout, source_address=None):
- host, port = address
- err = None
- for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
- af, socktype, proto, canonname, sa = res
- sock = None
- try:
- sock = socket.socket(af, socktype, proto)
- sock.settimeout(timeout)
- if source_address:
- sock.bind(source_address)
- sock.connect(sa)
- return sock
- except socket.error as _:
- err = _
- if sock is not None:
- sock.close()
- if err is not None:
- raise err
- else:
- raise socket.error('getaddrinfo returns an empty list')
-else:
- compat_socket_create_connection = socket.create_connection
-
-
-# Fix https://github.com/rg3/youtube-dl/issues/4223
-# See http://bugs.python.org/issue9161 for what is broken
-def workaround_optparse_bug9161():
- op = optparse.OptionParser()
- og = optparse.OptionGroup(op, 'foo')
- try:
- og.add_option('-t')
- except TypeError:
- real_add_option = optparse.OptionGroup.add_option
-
- def _compat_add_option(self, *args, **kwargs):
- enc = lambda v: (
- v.encode('ascii', 'replace') if isinstance(v, compat_str)
- else v)
- bargs = [enc(a) for a in args]
- bkwargs = dict(
- (k, enc(v)) for k, v in kwargs.items())
- return real_add_option(self, *bargs, **bkwargs)
- optparse.OptionGroup.add_option = _compat_add_option
-
-
-if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
- compat_get_terminal_size = shutil.get_terminal_size
-else:
- _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
-
- def compat_get_terminal_size(fallback=(80, 24)):
- columns = compat_getenv('COLUMNS')
- if columns:
- columns = int(columns)
- else:
- columns = None
- lines = compat_getenv('LINES')
- if lines:
- lines = int(lines)
- else:
- lines = None
-
- if columns is None or lines is None or columns <= 0 or lines <= 0:
- try:
- sp = subprocess.Popen(
- ['stty', 'size'],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- out, err = sp.communicate()
- _lines, _columns = map(int, out.split())
- except Exception:
- _columns, _lines = _terminal_size(*fallback)
-
- if columns is None or columns <= 0:
- columns = _columns
- if lines is None or lines <= 0:
- lines = _lines
- return _terminal_size(columns, lines)
-
-try:
- itertools.count(start=0, step=1)
- compat_itertools_count = itertools.count
-except TypeError: # Python 2.6
- def compat_itertools_count(start=0, step=1):
- n = start
- while True:
- yield n
- n += step
-
-if sys.version_info >= (3, 0):
- from tokenize import tokenize as compat_tokenize_tokenize
-else:
- from tokenize import generate_tokens as compat_tokenize_tokenize
-
-
-try:
- struct.pack('!I', 0)
-except TypeError:
- # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
- # See https://bugs.python.org/issue19099
- def compat_struct_pack(spec, *args):
- if isinstance(spec, compat_str):
- spec = spec.encode('ascii')
- return struct.pack(spec, *args)
-
- def compat_struct_unpack(spec, *args):
- if isinstance(spec, compat_str):
- spec = spec.encode('ascii')
- return struct.unpack(spec, *args)
-
- class compat_Struct(struct.Struct):
- def __init__(self, fmt):
- if isinstance(fmt, compat_str):
- fmt = fmt.encode('ascii')
- super(compat_Struct, self).__init__(fmt)
-else:
- compat_struct_pack = struct.pack
- compat_struct_unpack = struct.unpack
- if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
- class compat_Struct(struct.Struct):
- def unpack(self, string):
- if not isinstance(string, buffer): # noqa: F821
- string = buffer(string) # noqa: F821
- return super(compat_Struct, self).unpack(string)
- else:
- compat_Struct = struct.Struct
-
-
-try:
- from future_builtins import zip as compat_zip
-except ImportError: # not 2.6+ or is 3.x
- try:
- from itertools import izip as compat_zip # < 2.5 or 3.x
- except ImportError:
- compat_zip = zip
-
-
-if sys.version_info < (3, 3):
- def compat_b64decode(s, *args, **kwargs):
- if isinstance(s, compat_str):
- s = s.encode('ascii')
- return base64.b64decode(s, *args, **kwargs)
-else:
- compat_b64decode = base64.b64decode
-
-
-if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
- # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
- # names, see the original PyPy issue [1] and the youtube-dl one [2].
- # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
- # 2. https://github.com/rg3/youtube-dl/pull/4392
- def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
- real = ctypes.WINFUNCTYPE(*args, **kwargs)
-
- def resf(tpl, *args, **kwargs):
- funcname, dll = tpl
- return real((str(funcname), dll), *args, **kwargs)
-
- return resf
-else:
- def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
- return ctypes.WINFUNCTYPE(*args, **kwargs)
-
-
-__all__ = [
- 'compat_HTMLParseError',
- 'compat_HTMLParser',
- 'compat_HTTPError',
- 'compat_Struct',
- 'compat_b64decode',
- 'compat_basestring',
- 'compat_chr',
- 'compat_cookiejar',
- 'compat_cookies',
- 'compat_ctypes_WINFUNCTYPE',
- 'compat_etree_fromstring',
- 'compat_etree_register_namespace',
- 'compat_expanduser',
- 'compat_get_terminal_size',
- 'compat_getenv',
- 'compat_getpass',
- 'compat_html_entities',
- 'compat_html_entities_html5',
- 'compat_http_client',
- 'compat_http_server',
- 'compat_input',
- 'compat_integer_types',
- 'compat_itertools_count',
- 'compat_kwargs',
- 'compat_numeric_types',
- 'compat_ord',
- 'compat_os_name',
- 'compat_parse_qs',
- 'compat_print',
- 'compat_setenv',
- 'compat_shlex_quote',
- 'compat_shlex_split',
- 'compat_socket_create_connection',
- 'compat_str',
- 'compat_struct_pack',
- 'compat_struct_unpack',
- 'compat_subprocess_get_DEVNULL',
- 'compat_tokenize_tokenize',
- 'compat_urllib_error',
- 'compat_urllib_parse',
- 'compat_urllib_parse_unquote',
- 'compat_urllib_parse_unquote_plus',
- 'compat_urllib_parse_unquote_to_bytes',
- 'compat_urllib_parse_urlencode',
- 'compat_urllib_parse_urlparse',
- 'compat_urllib_request',
- 'compat_urllib_request_DataHandler',
- 'compat_urllib_response',
- 'compat_urlparse',
- 'compat_urlretrieve',
- 'compat_xml_parse_error',
- 'compat_xpath',
- 'compat_zip',
- 'workaround_optparse_bug9161',
-]
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
deleted file mode 100644
index 2e485df..0000000
--- a/youtube_dl/downloader/__init__.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import FileDownloader
-from .f4m import F4mFD
-from .hls import HlsFD
-from .http import HttpFD
-from .rtmp import RtmpFD
-from .dash import DashSegmentsFD
-from .rtsp import RtspFD
-from .ism import IsmFD
-from .external import (
- get_external_downloader,
- FFmpegFD,
-)
-
-from ..utils import (
- determine_protocol,
-)
-
-PROTOCOL_MAP = {
- 'rtmp': RtmpFD,
- 'm3u8_native': HlsFD,
- 'm3u8': FFmpegFD,
- 'mms': RtspFD,
- 'rtsp': RtspFD,
- 'f4m': F4mFD,
- 'http_dash_segments': DashSegmentsFD,
- 'ism': IsmFD,
-}
-
-
-def get_suitable_downloader(info_dict, params={}):
- """Get the downloader class that can handle the info dict."""
- protocol = determine_protocol(info_dict)
- info_dict['protocol'] = protocol
-
- # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
- # return FFmpegFD
-
- external_downloader = params.get('external_downloader')
- if external_downloader is not None:
- ed = get_external_downloader(external_downloader)
- if ed.can_download(info_dict):
- return ed
-
- if protocol.startswith('m3u8') and info_dict.get('is_live'):
- return FFmpegFD
-
- if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
- return HlsFD
-
- if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False:
- return FFmpegFD
-
- return PROTOCOL_MAP.get(protocol, HttpFD)
-
-
-__all__ = [
- 'get_suitable_downloader',
- 'FileDownloader',
-]
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
deleted file mode 100644
index 5979833..0000000
--- a/youtube_dl/downloader/common.py
+++ /dev/null
@@ -1,389 +0,0 @@
-from __future__ import division, unicode_literals
-
-import os
-import re
-import sys
-import time
-import random
-
-from ..compat import compat_os_name
-from ..utils import (
- decodeArgument,
- encodeFilename,
- error_to_compat_str,
- format_bytes,
- shell_quote,
- timeconvert,
-)
-
-
-class FileDownloader(object):
- """File Downloader class.
-
- File downloader objects are the ones responsible of downloading the
- actual video file and writing it to disk.
-
- File downloaders accept a lot of parameters. In order not to saturate
- the object constructor with arguments, it receives a dictionary of
- options instead.
-
- Available options:
-
- verbose: Print additional info to stdout.
- quiet: Do not print messages to stdout.
- ratelimit: Download speed limit, in bytes/sec.
- retries: Number of times to retry for HTTP error 5xx
- buffersize: Size of download buffer in bytes.
- noresizebuffer: Do not automatically resize the download buffer.
- continuedl: Try to continue downloads if possible.
- noprogress: Do not print the progress bar.
- logtostderr: Log messages to stderr instead of stdout.
- consoletitle: Display progress in console window's titlebar.
- nopart: Do not use temporary .part files.
- updatetime: Use the Last-modified header to set output file timestamps.
- test: Download only first bytes to test the downloader.
- min_filesize: Skip files smaller than this size
- max_filesize: Skip files larger than this size
- xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
- external_downloader_args: A list of additional command-line arguments for the
- external downloader.
- hls_use_mpegts: Use the mpegts container for HLS videos.
- http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
- useful for bypassing bandwidth throttling imposed by
- a webserver (experimental)
-
- Subclasses of this one must re-define the real_download method.
- """
-
- _TEST_FILE_SIZE = 10241
- params = None
-
- def __init__(self, ydl, params):
- """Create a FileDownloader object with the given options."""
- self.ydl = ydl
- self._progress_hooks = []
- self.params = params
- self.add_progress_hook(self.report_progress)
-
- @staticmethod
- def format_seconds(seconds):
- (mins, secs) = divmod(seconds, 60)
- (hours, mins) = divmod(mins, 60)
- if hours > 99:
- return '--:--:--'
- if hours == 0:
- return '%02d:%02d' % (mins, secs)
- else:
- return '%02d:%02d:%02d' % (hours, mins, secs)
-
- @staticmethod
- def calc_percent(byte_counter, data_len):
- if data_len is None:
- return None
- return float(byte_counter) / float(data_len) * 100.0
-
- @staticmethod
- def format_percent(percent):
- if percent is None:
- return '---.-%'
- return '%6s' % ('%3.1f%%' % percent)
-
- @staticmethod
- def calc_eta(start, now, total, current):
- if total is None:
- return None
- if now is None:
- now = time.time()
- dif = now - start
- if current == 0 or dif < 0.001: # One millisecond
- return None
- rate = float(current) / dif
- return int((float(total) - float(current)) / rate)
-
- @staticmethod
- def format_eta(eta):
- if eta is None:
- return '--:--'
- return FileDownloader.format_seconds(eta)
-
- @staticmethod
- def calc_speed(start, now, bytes):
- dif = now - start
- if bytes == 0 or dif < 0.001: # One millisecond
- return None
- return float(bytes) / dif
-
- @staticmethod
- def format_speed(speed):
- if speed is None:
- return '%10s' % '---b/s'
- return '%10s' % ('%s/s' % format_bytes(speed))
-
- @staticmethod
- def format_retries(retries):
- return 'inf' if retries == float('inf') else '%.0f' % retries
-
- @staticmethod
- def best_block_size(elapsed_time, bytes):
- new_min = max(bytes / 2.0, 1.0)
- new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
- if elapsed_time < 0.001:
- return int(new_max)
- rate = bytes / elapsed_time
- if rate > new_max:
- return int(new_max)
- if rate < new_min:
- return int(new_min)
- return int(rate)
-
- @staticmethod
- def parse_bytes(bytestr):
- """Parse a string indicating a byte quantity into an integer."""
- matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
- if matchobj is None:
- return None
- number = float(matchobj.group(1))
- multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
- return int(round(number * multiplier))
-
- def to_screen(self, *args, **kargs):
- self.ydl.to_screen(*args, **kargs)
-
- def to_stderr(self, message):
- self.ydl.to_screen(message)
-
- def to_console_title(self, message):
- self.ydl.to_console_title(message)
-
- def trouble(self, *args, **kargs):
- self.ydl.trouble(*args, **kargs)
-
- def report_warning(self, *args, **kargs):
- self.ydl.report_warning(*args, **kargs)
-
- def report_error(self, *args, **kargs):
- self.ydl.report_error(*args, **kargs)
-
- def slow_down(self, start_time, now, byte_counter):
- """Sleep if the download speed is over the rate limit."""
- rate_limit = self.params.get('ratelimit')
- if rate_limit is None or byte_counter == 0:
- return
- if now is None:
- now = time.time()
- elapsed = now - start_time
- if elapsed <= 0.0:
- return
- speed = float(byte_counter) / elapsed
- if speed > rate_limit:
- time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
-
- def temp_name(self, filename):
- """Returns a temporary filename for the given filename."""
- if self.params.get('nopart', False) or filename == '-' or \
- (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
- return filename
- return filename + '.part'
-
- def undo_temp_name(self, filename):
- if filename.endswith('.part'):
- return filename[:-len('.part')]
- return filename
-
- def ytdl_filename(self, filename):
- return filename + '.ytdl'
-
- def try_rename(self, old_filename, new_filename):
- try:
- if old_filename == new_filename:
- return
- os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
- except (IOError, OSError) as err:
- self.report_error('unable to rename file: %s' % error_to_compat_str(err))
-
- def try_utime(self, filename, last_modified_hdr):
- """Try to set the last-modified time of the given file."""
- if last_modified_hdr is None:
- return
- if not os.path.isfile(encodeFilename(filename)):
- return
- timestr = last_modified_hdr
- if timestr is None:
- return
- filetime = timeconvert(timestr)
- if filetime is None:
- return filetime
- # Ignore obviously invalid dates
- if filetime == 0:
- return
- try:
- os.utime(filename, (time.time(), filetime))
- except Exception:
- pass
- return filetime
-
- def report_destination(self, filename):
- """Report destination filename."""
- self.to_screen('[download] Destination: ' + filename)
-
- def _report_progress_status(self, msg, is_last_line=False):
- fullmsg = '[download] ' + msg
- if self.params.get('progress_with_newline', False):
- self.to_screen(fullmsg)
- else:
- if compat_os_name == 'nt':
- prev_len = getattr(self, '_report_progress_prev_line_length',
- 0)
- if prev_len > len(fullmsg):
- fullmsg += ' ' * (prev_len - len(fullmsg))
- self._report_progress_prev_line_length = len(fullmsg)
- clear_line = '\r'
- else:
- clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r')
- self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
- self.to_console_title('youtube-dl ' + msg)
-
- def report_progress(self, s):
- if s['status'] == 'finished':
- if self.params.get('noprogress', False):
- self.to_screen('[download] Download completed')
- else:
- msg_template = '100%%'
- if s.get('total_bytes') is not None:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
- msg_template += ' of %(_total_bytes_str)s'
- if s.get('elapsed') is not None:
- s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template += ' in %(_elapsed_str)s'
- self._report_progress_status(
- msg_template % s, is_last_line=True)
-
- if self.params.get('noprogress'):
- return
-
- if s['status'] != 'downloading':
- return
-
- if s.get('eta') is not None:
- s['_eta_str'] = self.format_eta(s['eta'])
- else:
- s['_eta_str'] = 'Unknown ETA'
-
- if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
- s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
- elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
- s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
- else:
- if s.get('downloaded_bytes') == 0:
- s['_percent_str'] = self.format_percent(0)
- else:
- s['_percent_str'] = 'Unknown %'
-
- if s.get('speed') is not None:
- s['_speed_str'] = self.format_speed(s['speed'])
- else:
- s['_speed_str'] = 'Unknown speed'
-
- if s.get('total_bytes') is not None:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
- msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
- elif s.get('total_bytes_estimate') is not None:
- s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
- msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
- else:
- if s.get('downloaded_bytes') is not None:
- s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
- if s.get('elapsed'):
- s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
- else:
- msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
- else:
- msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
-
- self._report_progress_status(msg_template % s)
-
- def report_resuming_byte(self, resume_len):
- """Report attempt to resume at given byte."""
- self.to_screen('[download] Resuming download at byte %s' % resume_len)
-
- def report_retry(self, err, count, retries):
- """Report retry in case of HTTP error 5xx"""
- self.to_screen(
- '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
- % (error_to_compat_str(err), count, self.format_retries(retries)))
-
- def report_file_already_downloaded(self, file_name):
- """Report file has already been fully downloaded."""
- try:
- self.to_screen('[download] %s has already been downloaded' % file_name)
- except UnicodeEncodeError:
- self.to_screen('[download] The file has already been downloaded')
-
- def report_unable_to_resume(self):
- """Report it was impossible to resume download."""
- self.to_screen('[download] Unable to resume')
-
- def download(self, filename, info_dict):
- """Download to a filename using the info from info_dict
- Return True on success and False otherwise
- """
-
- nooverwrites_and_exists = (
- self.params.get('nooverwrites', False) and
- os.path.exists(encodeFilename(filename))
- )
-
- if not hasattr(filename, 'write'):
- continuedl_and_exists = (
- self.params.get('continuedl', True) and
- os.path.isfile(encodeFilename(filename)) and
- not self.params.get('nopart', False)
- )
-
- # Check file already present
- if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
- self.report_file_already_downloaded(filename)
- self._hook_progress({
- 'filename': filename,
- 'status': 'finished',
- 'total_bytes': os.path.getsize(encodeFilename(filename)),
- })
- return True
-
- min_sleep_interval = self.params.get('sleep_interval')
- if min_sleep_interval:
- max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
- sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
- self.to_screen(
- '[download] Sleeping %s seconds...' % (
- int(sleep_interval) if sleep_interval.is_integer()
- else '%.2f' % sleep_interval))
- time.sleep(sleep_interval)
-
- return self.real_download(filename, info_dict)
-
- def real_download(self, filename, info_dict):
- """Real download process. Redefine in subclasses."""
- raise NotImplementedError('This method must be implemented by subclasses')
-
- def _hook_progress(self, status):
- for ph in self._progress_hooks:
- ph(status)
-
- def add_progress_hook(self, ph):
- # See YoutubeDl.py (search for progress_hooks) for a description of
- # this interface
- self._progress_hooks.append(ph)
-
- def _debug_cmd(self, args, exe=None):
- if not self.params.get('verbose', False):
- return
-
- str_args = [decodeArgument(a) for a in args]
-
- if exe is None:
- exe = os.path.basename(str_args[0])
-
- self.to_screen('[debug] %s command line: %s' % (
- exe, shell_quote(str_args)))
diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
deleted file mode 100644
index eaa7adf..0000000
--- a/youtube_dl/downloader/dash.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from __future__ import unicode_literals
-
-from .fragment import FragmentFD
-from ..compat import compat_urllib_error
-from ..utils import (
- DownloadError,
- urljoin,
-)
-
-
-class DashSegmentsFD(FragmentFD):
- """
- Download segments in a DASH manifest
- """
-
- FD_NAME = 'dashsegments'
-
- def real_download(self, filename, info_dict):
- fragment_base_url = info_dict.get('fragment_base_url')
- fragments = info_dict['fragments'][:1] if self.params.get(
- 'test', False) else info_dict['fragments']
-
- ctx = {
- 'filename': filename,
- 'total_frags': len(fragments),
- }
-
- self._prepare_and_start_frag_download(ctx)
-
- fragment_retries = self.params.get('fragment_retries', 0)
- skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
-
- frag_index = 0
- for i, fragment in enumerate(fragments):
- frag_index += 1
- if frag_index <= ctx['fragment_index']:
- continue
- # In DASH, the first segment contains necessary headers to
- # generate a valid MP4 file, so always abort for the first segment
- fatal = i == 0 or not skip_unavailable_fragments
- count = 0
- while count <= fragment_retries:
- try:
- fragment_url = fragment.get('url')
- if not fragment_url:
- assert fragment_base_url
- fragment_url = urljoin(fragment_base_url, fragment['path'])
- success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
- if not success:
- return False
- self._append_fragment(ctx, frag_content)
- break
- except compat_urllib_error.HTTPError as err:
- # YouTube may often return 404 HTTP error for a fragment causing the
- # whole download to fail. However if the same fragment is immediately
- # retried with the same request data this usually succeeds (1-2 attemps
- # is usually enough) thus allowing to download the whole file successfully.
- # To be future-proof we will retry all fragments that fail with any
- # HTTP error.
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- except DownloadError:
- # Don't retry fragment if error occurred during HTTP downloading
- # itself since it has own retry settings
- if not fatal:
- self.report_skip_fragment(frag_index)
- break
- raise
-
- if count > fragment_retries:
- if not fatal:
- self.report_skip_fragment(frag_index)
- continue
- self.report_error('giving up after %s fragment retries' % fragment_retries)
- return False
-
- self._finish_frag_download(ctx)
-
- return True
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
deleted file mode 100644
index 958d00a..0000000
--- a/youtube_dl/downloader/external.py
+++ /dev/null
@@ -1,354 +0,0 @@
-from __future__ import unicode_literals
-
-import os.path
-import re
-import subprocess
-import sys
-import time
-
-from .common import FileDownloader
-from ..compat import (
- compat_setenv,
- compat_str,
-)
-from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
-from ..utils import (
- cli_option,
- cli_valueless_option,
- cli_bool_option,
- cli_configuration_args,
- encodeFilename,
- encodeArgument,
- handle_youtubedl_headers,
- check_executable,
- is_outdated_version,
-)
-
-
-class ExternalFD(FileDownloader):
- def real_download(self, filename, info_dict):
- self.report_destination(filename)
- tmpfilename = self.temp_name(filename)
-
- try:
- started = time.time()
- retval = self._call_downloader(tmpfilename, info_dict)
- except KeyboardInterrupt:
- if not info_dict.get('is_live'):
- raise
- # Live stream downloading cancellation should be considered as
- # correct and expected termination thus all postprocessing
- # should take place
- retval = 0
- self.to_screen('[%s] Interrupted by user' % self.get_basename())
-
- if retval == 0:
- status = {
- 'filename': filename,
- 'status': 'finished',
- 'elapsed': time.time() - started,
- }
- if filename != '-':
- fsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
- self.try_rename(tmpfilename, filename)
- status.update({
- 'downloaded_bytes': fsize,
- 'total_bytes': fsize,
- })
- self._hook_progress(status)
- return True
- else:
- self.to_stderr('\n')
- self.report_error('%s exited with code %d' % (
- self.get_basename(), retval))
- return False
-
- @classmethod
- def get_basename(cls):
- return cls.__name__[:-2].lower()
-
- @property
- def exe(self):
- return self.params.get('external_downloader')
-
- @classmethod
- def available(cls):
- return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT])
-
- @classmethod
- def supports(cls, info_dict):
- return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
-
- @classmethod
- def can_download(cls, info_dict):
- return cls.available() and cls.supports(info_dict)
-
- def _option(self, command_option, param):
- return cli_option(self.params, command_option, param)
-
- def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
- return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
-
- def _valueless_option(self, command_option, param, expected_value=True):
- return cli_valueless_option(self.params, command_option, param, expected_value)
-
- def _configuration_args(self, default=[]):
- return cli_configuration_args(self.params, 'external_downloader_args', default)
-
- def _call_downloader(self, tmpfilename, info_dict):
- """ Either overwrite this or implement _make_cmd """
- cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
-
- self._debug_cmd(cmd)
-
- p = subprocess.Popen(
- cmd, stderr=subprocess.PIPE)
- _, stderr = p.communicate()
- if p.returncode != 0:
- self.to_stderr(stderr.decode('utf-8', 'replace'))
- return p.returncode
-
-
-class CurlFD(ExternalFD):
- AVAILABLE_OPT = '-V'
-
- def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '--location', '-o', tmpfilename]
- for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
- cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
- cmd += self._valueless_option('--silent', 'noprogress')
- cmd += self._valueless_option('--verbose', 'verbose')
- cmd += self._option('--limit-rate', 'ratelimit')
- cmd += self._option('--retry', 'retries')
- cmd += self._option('--max-filesize', 'max_filesize')
- cmd += self._option('--interface', 'source_address')
- cmd += self._option('--proxy', 'proxy')
- cmd += self._valueless_option('--insecure', 'nocheckcertificate')
- cmd += self._configuration_args()
- cmd += ['--', info_dict['url']]
- return cmd
-
- def _call_downloader(self, tmpfilename, info_dict):
- cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
-
- self._debug_cmd(cmd)
-
- # curl writes the progress to stderr so don't capture it.
- p = subprocess.Popen(cmd)
- p.communicate()
- return p.returncode
-
-
-class AxelFD(ExternalFD):
- AVAILABLE_OPT = '-V'
-
- def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '-o', tmpfilename]
- for key, val in info_dict['http_headers'].items():
- cmd += ['-H', '%s: %s' % (key, val)]
- cmd += self._configuration_args()
- cmd += ['--', info_dict['url']]
- return cmd
-
-
-class WgetFD(ExternalFD):
- AVAILABLE_OPT = '--version'
-
- def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
- for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
- cmd += self._option('--bind-address', 'source_address')
- cmd += self._option('--proxy', 'proxy')
- cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
- cmd += self._configuration_args()
- cmd += ['--', info_dict['url']]
- return cmd
-
-
-class Aria2cFD(ExternalFD):
- AVAILABLE_OPT = '-v'
-
- def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '-c']
- cmd += self._configuration_args([
- '--min-split-size', '1M', '--max-connection-per-server', '4'])
- dn = os.path.dirname(tmpfilename)
- if dn:
- cmd += ['--dir', dn]
- cmd += ['--out', os.path.basename(tmpfilename)]
- for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
- cmd += self._option('--interface', 'source_address')
- cmd += self._option('--all-proxy', 'proxy')
- cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
- cmd += ['--', info_dict['url']]
- return cmd
-
-
-class HttpieFD(ExternalFD):
- @classmethod
- def available(cls):
- return check_executable('http', ['--version'])
-
- def _make_cmd(self, tmpfilename, info_dict):
- cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
- for key, val in info_dict['http_headers'].items():
- cmd += ['%s:%s' % (key, val)]
- return cmd
-
-
-class FFmpegFD(ExternalFD):
- @classmethod
- def supports(cls, info_dict):
- return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
-
- @classmethod
- def available(cls):
- return FFmpegPostProcessor().available
-
- def _call_downloader(self, tmpfilename, info_dict):
- url = info_dict['url']
- ffpp = FFmpegPostProcessor(downloader=self)
- if not ffpp.available:
- self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
- return False
- ffpp.check_version()
-
- args = [ffpp.executable, '-y']
-
- for log_level in ('quiet', 'verbose'):
- if self.params.get(log_level, False):
- args += ['-loglevel', log_level]
- break
-
- seekable = info_dict.get('_seekable')
- if seekable is not None:
- # setting -seekable prevents ffmpeg from guessing if the server
- # supports seeking(by adding the header `Range: bytes=0-`), which
- # can cause problems in some cases
- # https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127
- # http://trac.ffmpeg.org/ticket/6125#comment:10
- args += ['-seekable', '1' if seekable else '0']
-
- args += self._configuration_args()
-
- # start_time = info_dict.get('start_time') or 0
- # if start_time:
- # args += ['-ss', compat_str(start_time)]
- # end_time = info_dict.get('end_time')
- # if end_time:
- # args += ['-t', compat_str(end_time - start_time)]
-
- if info_dict['http_headers'] and re.match(r'^https?://', url):
- # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
- # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
- headers = handle_youtubedl_headers(info_dict['http_headers'])
- args += [
- '-headers',
- ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
-
- env = None
- proxy = self.params.get('proxy')
- if proxy:
- if not re.match(r'^[\da-zA-Z]+://', proxy):
- proxy = 'http://%s' % proxy
-
- if proxy.startswith('socks'):
- self.report_warning(
- '%s does not support SOCKS proxies. Downloading is likely to fail. '
- 'Consider adding --hls-prefer-native to your command.' % self.get_basename())
-
- # Since December 2015 ffmpeg supports -http_proxy option (see
- # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
- # We could switch to the following code if we are able to detect version properly
- # args += ['-http_proxy', proxy]
- env = os.environ.copy()
- compat_setenv('HTTP_PROXY', proxy, env=env)
- compat_setenv('http_proxy', proxy, env=env)
-
- protocol = info_dict.get('protocol')
-
- if protocol == 'rtmp':
- player_url = info_dict.get('player_url')
- page_url = info_dict.get('page_url')
- app = info_dict.get('app')
- play_path = info_dict.get('play_path')
- tc_url = info_dict.get('tc_url')
- flash_version = info_dict.get('flash_version')
- live = info_dict.get('rtmp_live', False)
- if player_url is not None:
- args += ['-rtmp_swfverify', player_url]
- if page_url is not None:
- args += ['-rtmp_pageurl', page_url]
- if app is not None:
- args += ['-rtmp_app', app]
- if play_path is not None:
- args += ['-rtmp_playpath', play_path]
- if tc_url is not None:
- args += ['-rtmp_tcurl', tc_url]
- if flash_version is not None:
- args += ['-rtmp_flashver', flash_version]
- if live:
- args += ['-rtmp_live', 'live']
-
- args += ['-i', url, '-c', 'copy']
-
- if self.params.get('test', False):
- args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
-
- if protocol in ('m3u8', 'm3u8_native'):
- if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
- args += ['-f', 'mpegts']
- else:
- args += ['-f', 'mp4']
- if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
- args += ['-bsf:a', 'aac_adtstoasc']
- elif protocol == 'rtmp':
- args += ['-f', 'flv']
- else:
- args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
-
- args = [encodeArgument(opt) for opt in args]
- args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
-
- self._debug_cmd(args)
-
- proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
- try:
- retval = proc.wait()
- except KeyboardInterrupt:
- # subprocces.run would send the SIGKILL signal to ffmpeg and the
- # mp4 file couldn't be played, but if we ask ffmpeg to quit it
- # produces a file that is playable (this is mostly useful for live
- # streams). Note that Windows is not affected and produces playable
- # files (see https://github.com/rg3/youtube-dl/issues/8300).
- if sys.platform != 'win32':
- proc.communicate(b'q')
- raise
- return retval
-
-
-class AVconvFD(FFmpegFD):
- pass
-
-
-_BY_NAME = dict(
- (klass.get_basename(), klass)
- for name, klass in globals().items()
- if name.endswith('FD') and name != 'ExternalFD'
-)
-
-
-def list_external_downloaders():
- return sorted(_BY_NAME.keys())
-
-
-def get_external_downloader(external_downloader):
- """ Given the name of the executable, see whether we support the given
- downloader . """
- # Drop .exe extension on Windows
- bn = os.path.splitext(os.path.basename(external_downloader))[0]
- return _BY_NAME[bn]
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
deleted file mode 100644
index 15e71be..0000000
--- a/youtube_dl/downloader/f4m.py
+++ /dev/null
@@ -1,438 +0,0 @@
-from __future__ import division, unicode_literals
-
-import io
-import itertools
-import time
-
-from .fragment import FragmentFD
-from ..compat import (
- compat_b64decode,
- compat_etree_fromstring,
- compat_urlparse,
- compat_urllib_error,
- compat_urllib_parse_urlparse,
- compat_struct_pack,
- compat_struct_unpack,
-)
-from ..utils import (
- fix_xml_ampersands,
- xpath_text,
-)
-
-
-class DataTruncatedError(Exception):
- pass
-
-
-class FlvReader(io.BytesIO):
- """
- Reader for Flv files
- The file format is documented in https://www.adobe.com/devnet/f4v.html
- """
-
- def read_bytes(self, n):
- data = self.read(n)
- if len(data) < n:
- raise DataTruncatedError(
- 'FlvReader error: need %d bytes while only %d bytes got' % (
- n, len(data)))
- return data
-
- # Utility functions for reading numbers and strings
- def read_unsigned_long_long(self):
- return compat_struct_unpack('!Q', self.read_bytes(8))[0]
-
- def read_unsigned_int(self):
- return compat_struct_unpack('!I', self.read_bytes(4))[0]
-
- def read_unsigned_char(self):
- return compat_struct_unpack('!B', self.read_bytes(1))[0]
-
- def read_string(self):
- res = b''
- while True:
- char = self.read_bytes(1)
- if char == b'\x00':
- break
- res += char
- return res
-
- def read_box_info(self):
- """
- Read a box and return the info as a tuple: (box_size, box_type, box_data)
- """
- real_size = size = self.read_unsigned_int()
- box_type = self.read_bytes(4)
- header_end = 8
- if size == 1:
- real_size = self.read_unsigned_long_long()
- header_end = 16
- return real_size, box_type, self.read_bytes(real_size - header_end)
-
- def read_asrt(self):
- # version
- self.read_unsigned_char()
- # flags
- self.read_bytes(3)
- quality_entry_count = self.read_unsigned_char()
- # QualityEntryCount
- for i in range(quality_entry_count):
- self.read_string()
-
- segment_run_count = self.read_unsigned_int()
- segments = []
- for i in range(segment_run_count):
- first_segment = self.read_unsigned_int()
- fragments_per_segment = self.read_unsigned_int()
- segments.append((first_segment, fragments_per_segment))
-
- return {
- 'segment_run': segments,
- }
-
- def read_afrt(self):
- # version
- self.read_unsigned_char()
- # flags
- self.read_bytes(3)
- # time scale
- self.read_unsigned_int()
-
- quality_entry_count = self.read_unsigned_char()
- # QualitySegmentUrlModifiers
- for i in range(quality_entry_count):
- self.read_string()
-
- fragments_count = self.read_unsigned_int()
- fragments = []
- for i in range(fragments_count):
- first = self.read_unsigned_int()
- first_ts = self.read_unsigned_long_long()
- duration = self.read_unsigned_int()
- if duration == 0:
- discontinuity_indicator = self.read_unsigned_char()
- else:
- discontinuity_indicator = None
- fragments.append({
- 'first': first,
- 'ts': first_ts,
- 'duration': duration,
- 'discontinuity_indicator': discontinuity_indicator,
- })
-
- return {
- 'fragments': fragments,
- }
-
- def read_abst(self):
- # version
- self.read_unsigned_char()
- # flags
- self.read_bytes(3)
-
- self.read_unsigned_int() # BootstrapinfoVersion
- # Profile,Live,Update,Reserved
- flags = self.read_unsigned_char()
- live = flags & 0x20 != 0
- # time scale
- self.read_unsigned_int()
- # CurrentMediaTime
- self.read_unsigned_long_long()
- # SmpteTimeCodeOffset
- self.read_unsigned_long_long()
-
- self.read_string() # MovieIdentifier
- server_count = self.read_unsigned_char()
- # ServerEntryTable
- for i in range(server_count):
- self.read_string()
- quality_count = self.read_unsigned_char()
- # QualityEntryTable
- for i in range(quality_count):
- self.read_string()
- # DrmData
- self.read_string()
- # MetaData
- self.read_string()
-
- segments_count = self.read_unsigned_char()
- segments = []
- for i in range(segments_count):
- box_size, box_type, box_data = self.read_box_info()
- assert box_type == b'asrt'
- segment = FlvReader(box_data).read_asrt()
- segments.append(segment)
- fragments_run_count = self.read_unsigned_char()
- fragments = []
- for i in range(fragments_run_count):
- box_size, box_type, box_data = self.read_box_info()
- assert box_type == b'afrt'
- fragments.append(FlvReader(box_data).read_afrt())
-
- return {
- 'segments': segments,
- 'fragments': fragments,
- 'live': live,
- }
-
- def read_bootstrap_info(self):
- total_size, box_type, box_data = self.read_box_info()
- assert box_type == b'abst'
- return FlvReader(box_data).read_abst()
-
-
-def read_bootstrap_info(bootstrap_bytes):
- return FlvReader(bootstrap_bytes).read_bootstrap_info()
-
-
-def build_fragments_list(boot_info):
- """ Return a list of (segment, fragment) for each fragment in the video """
- res = []
- segment_run_table = boot_info['segments'][0]
- fragment_run_entry_table = boot_info['fragments'][0]['fragments']
- first_frag_number = fragment_run_entry_table[0]['first']
- fragments_counter = itertools.count(first_frag_number)
- for segment, fragments_count in segment_run_table['segment_run']:
- # In some live HDS streams (for example Rai), `fragments_count` is
- # abnormal and causing out-of-memory errors. It's OK to change the
- # number of fragments for live streams as they are updated periodically
- if fragments_count == 4294967295 and boot_info['live']:
- fragments_count = 2
- for _ in range(fragments_count):
- res.append((segment, next(fragments_counter)))
-
- if boot_info['live']:
- res = res[-2:]
-
- return res
-
-
-def write_unsigned_int(stream, val):
- stream.write(compat_struct_pack('!I', val))
-
-
-def write_unsigned_int_24(stream, val):
- stream.write(compat_struct_pack('!I', val)[1:])
-
-
-def write_flv_header(stream):
- """Writes the FLV header to stream"""
- # FLV header
- stream.write(b'FLV\x01')
- stream.write(b'\x05')
- stream.write(b'\x00\x00\x00\x09')
- stream.write(b'\x00\x00\x00\x00')
-
-
-def write_metadata_tag(stream, metadata):
- """Writes optional metadata tag to stream"""
- SCRIPT_TAG = b'\x12'
- FLV_TAG_HEADER_LEN = 11
-
- if metadata:
- stream.write(SCRIPT_TAG)
- write_unsigned_int_24(stream, len(metadata))
- stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
- stream.write(metadata)
- write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
-
-
-def remove_encrypted_media(media):
- return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
- 'drmAdditionalHeaderSetId' not in e.attrib,
- media))
-
-
-def _add_ns(prop, ver=1):
- return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
-
-
-def get_base_url(manifest):
- base_url = xpath_text(
- manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
- 'base URL', default=None)
- if base_url:
- base_url = base_url.strip()
- return base_url
-
-
-class F4mFD(FragmentFD):
- """
- A downloader for f4m manifests or AdobeHDS.
- """
-
- FD_NAME = 'f4m'
-
- def _get_unencrypted_media(self, doc):
- media = doc.findall(_add_ns('media'))
- if not media:
- self.report_error('No media found')
- for e in (doc.findall(_add_ns('drmAdditionalHeader')) +
- doc.findall(_add_ns('drmAdditionalHeaderSet'))):
- # If id attribute is missing it's valid for all media nodes
- # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
- if 'id' not in e.attrib:
- self.report_error('Missing ID in f4m DRM')
- media = remove_encrypted_media(media)
- if not media:
- self.report_error('Unsupported DRM')
- return media
-
- def _get_bootstrap_from_url(self, bootstrap_url):
- bootstrap = self.ydl.urlopen(bootstrap_url).read()
- return read_bootstrap_info(bootstrap)
-
- def _update_live_fragments(self, bootstrap_url, latest_fragment):
- fragments_list = []
- retries = 30
- while (not fragments_list) and (retries > 0):
- boot_info = self._get_bootstrap_from_url(bootstrap_url)
- fragments_list = build_fragments_list(boot_info)
- fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
- if not fragments_list:
- # Retry after a while
- time.sleep(5.0)
- retries -= 1
-
- if not fragments_list:
- self.report_error('Failed to update fragments')
-
- return fragments_list
-
- def _parse_bootstrap_node(self, node, base_url):
- # Sometimes non empty inline bootstrap info can be specified along
- # with bootstrap url attribute (e.g. dummy inline bootstrap info
- # contains whitespace characters in [1]). We will prefer bootstrap
- # url over inline bootstrap info when present.
- # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
- bootstrap_url = node.get('url')
- if bootstrap_url:
- bootstrap_url = compat_urlparse.urljoin(
- base_url, bootstrap_url)
- boot_info = self._get_bootstrap_from_url(bootstrap_url)
- else:
- bootstrap_url = None
- bootstrap = compat_b64decode(node.text)
- boot_info = read_bootstrap_info(bootstrap)
- return boot_info, bootstrap_url
-
- def real_download(self, filename, info_dict):
- man_url = info_dict['url']
- requested_bitrate = info_dict.get('tbr')
- self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
-
- urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
- man_url = urlh.geturl()
- # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
- # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
- # and https://github.com/rg3/youtube-dl/issues/7823)
- manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
-
- doc = compat_etree_fromstring(manifest)
- formats = [(int(f.attrib.get('bitrate', -1)), f)
- for f in self._get_unencrypted_media(doc)]
- if requested_bitrate is None or len(formats) == 1:
- # get the best format
- formats = sorted(formats, key=lambda f: f[0])
- rate, media = formats[-1]
- else:
- rate, media = list(filter(
- lambda f: int(f[0]) == requested_bitrate, formats))[0]
-
- # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
- man_base_url = get_base_url(doc) or man_url
-
- base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
- bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
- boot_info, bootstrap_url = self._parse_bootstrap_node(
- bootstrap_node, man_base_url)
- live = boot_info['live']
- metadata_node = media.find(_add_ns('metadata'))
- if metadata_node is not None:
- metadata = compat_b64decode(metadata_node.text)
- else:
- metadata = None
-
- fragments_list = build_fragments_list(boot_info)
- test = self.params.get('test', False)
- if test:
- # We only download the first fragment
- fragments_list = fragments_list[:1]
- total_frags = len(fragments_list)
- # For some akamai manifests we'll need to add a query to the fragment url
- akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
-
- ctx = {
- 'filename': filename,
- 'total_frags': total_frags,
- 'live': live,
- }
-
- self._prepare_frag_download(ctx)
-
- dest_stream = ctx['dest_stream']
-
- if ctx['complete_frags_downloaded_bytes'] == 0:
- write_flv_header(dest_stream)
- if not live:
- write_metadata_tag(dest_stream, metadata)
-
- base_url_parsed = compat_urllib_parse_urlparse(base_url)
-
- self._start_frag_download(ctx)
-
- frag_index = 0
- while fragments_list:
- seg_i, frag_i = fragments_list.pop(0)
- frag_index += 1
- if frag_index <= ctx['fragment_index']:
- continue
- name = 'Seg%d-Frag%d' % (seg_i, frag_i)
- query = []
- if base_url_parsed.query:
- query.append(base_url_parsed.query)
- if akamai_pv:
- query.append(akamai_pv.strip(';'))
- if info_dict.get('extra_param_to_segment_url'):
- query.append(info_dict['extra_param_to_segment_url'])
- url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
- try:
- success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
- if not success:
- return False
- reader = FlvReader(down_data)
- while True:
- try:
- _, box_type, box_data = reader.read_box_info()
- except DataTruncatedError:
- if test:
- # In tests, segments may be truncated, and thus
- # FlvReader may not be able to parse the whole
- # chunk. If so, write the segment as is
- # See https://github.com/rg3/youtube-dl/issues/9214
- dest_stream.write(down_data)
- break
- raise
- if box_type == b'mdat':
- self._append_fragment(ctx, box_data)
- break
- except (compat_urllib_error.HTTPError, ) as err:
- if live and (err.code == 404 or err.code == 410):
- # We didn't keep up with the live window. Continue
- # with the next available fragment.
- msg = 'Fragment %d unavailable' % frag_i
- self.report_warning(msg)
- fragments_list = []
- else:
- raise
-
- if not fragments_list and not test and live and bootstrap_url:
- fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
- total_frags += len(fragments_list)
- if fragments_list and (fragments_list[0][1] > frag_i + 1):
- msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
- self.report_warning(msg)
-
- self._finish_frag_download(ctx)
-
- return True
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
deleted file mode 100644
index 917f6dc..0000000
--- a/youtube_dl/downloader/fragment.py
+++ /dev/null
@@ -1,268 +0,0 @@
-from __future__ import division, unicode_literals
-
-import os
-import time
-import json
-
-from .common import FileDownloader
-from .http import HttpFD
-from ..utils import (
- error_to_compat_str,
- encodeFilename,
- sanitize_open,
- sanitized_Request,
-)
-
-
-class HttpQuietDownloader(HttpFD):
- def to_screen(self, *args, **kargs):
- pass
-
-
-class FragmentFD(FileDownloader):
- """
- A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
-
- Available options:
-
- fragment_retries: Number of times to retry a fragment for HTTP error (DASH
- and hlsnative only)
- skip_unavailable_fragments:
- Skip unavailable fragments (DASH and hlsnative only)
- keep_fragments: Keep downloaded fragments on disk after downloading is
- finished
-
- For each incomplete fragment download youtube-dl keeps on disk a special
- bookkeeping file with download state and metadata (in future such files will
- be used for any incomplete download handled by youtube-dl). This file is
- used to properly handle resuming, check download file consistency and detect
- potential errors. The file has a .ytdl extension and represents a standard
- JSON file of the following format:
-
- extractor:
- Dictionary of extractor related data. TBD.
-
- downloader:
- Dictionary of downloader related data. May contain following data:
- current_fragment:
- Dictionary with current (being downloaded) fragment data:
- index: 0-based index of current fragment among all fragments
- fragment_count:
- Total count of fragments
-
- This feature is experimental and file format may change in future.
- """
-
- def report_retry_fragment(self, err, frag_index, count, retries):
- self.to_screen(
- '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...'
- % (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
-
- def report_skip_fragment(self, frag_index):
- self.to_screen('[download] Skipping fragment %d...' % frag_index)
-
- def _prepare_url(self, info_dict, url):
- headers = info_dict.get('http_headers')
- return sanitized_Request(url, None, headers) if headers else url
-
- def _prepare_and_start_frag_download(self, ctx):
- self._prepare_frag_download(ctx)
- self._start_frag_download(ctx)
-
- @staticmethod
- def __do_ytdl_file(ctx):
- return not ctx['live'] and not ctx['tmpfilename'] == '-'
-
- def _read_ytdl_file(self, ctx):
- assert 'ytdl_corrupt' not in ctx
- stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
- try:
- ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
- except Exception:
- ctx['ytdl_corrupt'] = True
- finally:
- stream.close()
-
- def _write_ytdl_file(self, ctx):
- frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
- downloader = {
- 'current_fragment': {
- 'index': ctx['fragment_index'],
- },
- }
- if ctx.get('fragment_count') is not None:
- downloader['fragment_count'] = ctx['fragment_count']
- frag_index_stream.write(json.dumps({'downloader': downloader}))
- frag_index_stream.close()
-
- def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
- fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
- success = ctx['dl'].download(fragment_filename, {
- 'url': frag_url,
- 'http_headers': headers or info_dict.get('http_headers'),
- })
- if not success:
- return False, None
- down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
- ctx['fragment_filename_sanitized'] = frag_sanitized
- frag_content = down.read()
- down.close()
- return True, frag_content
-
- def _append_fragment(self, ctx, frag_content):
- try:
- ctx['dest_stream'].write(frag_content)
- ctx['dest_stream'].flush()
- finally:
- if self.__do_ytdl_file(ctx):
- self._write_ytdl_file(ctx)
- if not self.params.get('keep_fragments', False):
- os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
- del ctx['fragment_filename_sanitized']
-
- def _prepare_frag_download(self, ctx):
- if 'live' not in ctx:
- ctx['live'] = False
- if not ctx['live']:
- total_frags_str = '%d' % ctx['total_frags']
- ad_frags = ctx.get('ad_frags', 0)
- if ad_frags:
- total_frags_str += ' (not including %d ad)' % ad_frags
- else:
- total_frags_str = 'unknown (live)'
- self.to_screen(
- '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
- self.report_destination(ctx['filename'])
- dl = HttpQuietDownloader(
- self.ydl,
- {
- 'continuedl': True,
- 'quiet': True,
- 'noprogress': True,
- 'ratelimit': self.params.get('ratelimit'),
- 'retries': self.params.get('retries', 0),
- 'nopart': self.params.get('nopart', False),
- 'test': self.params.get('test', False),
- }
- )
- tmpfilename = self.temp_name(ctx['filename'])
- open_mode = 'wb'
- resume_len = 0
-
- # Establish possible resume length
- if os.path.isfile(encodeFilename(tmpfilename)):
- open_mode = 'ab'
- resume_len = os.path.getsize(encodeFilename(tmpfilename))
-
- # Should be initialized before ytdl file check
- ctx.update({
- 'tmpfilename': tmpfilename,
- 'fragment_index': 0,
- })
-
- if self.__do_ytdl_file(ctx):
- if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
- self._read_ytdl_file(ctx)
- is_corrupt = ctx.get('ytdl_corrupt') is True
- is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
- if is_corrupt or is_inconsistent:
- message = (
- '.ytdl file is corrupt' if is_corrupt else
- 'Inconsistent state of incomplete fragment download')
- self.report_warning(
- '%s. Restarting from the beginning...' % message)
- ctx['fragment_index'] = resume_len = 0
- if 'ytdl_corrupt' in ctx:
- del ctx['ytdl_corrupt']
- self._write_ytdl_file(ctx)
- else:
- self._write_ytdl_file(ctx)
- assert ctx['fragment_index'] == 0
-
- dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
-
- ctx.update({
- 'dl': dl,
- 'dest_stream': dest_stream,
- 'tmpfilename': tmpfilename,
- # Total complete fragments downloaded so far in bytes
- 'complete_frags_downloaded_bytes': resume_len,
- })
-
- def _start_frag_download(self, ctx):
- total_frags = ctx['total_frags']
- # This dict stores the download progress, it's updated by the progress
- # hook
- state = {
- 'status': 'downloading',
- 'downloaded_bytes': ctx['complete_frags_downloaded_bytes'],
- 'fragment_index': ctx['fragment_index'],
- 'fragment_count': total_frags,
- 'filename': ctx['filename'],
- 'tmpfilename': ctx['tmpfilename'],
- }
-
- start = time.time()
- ctx.update({
- 'started': start,
- # Amount of fragment's bytes downloaded by the time of the previous
- # frag progress hook invocation
- 'prev_frag_downloaded_bytes': 0,
- })
-
- def frag_progress_hook(s):
- if s['status'] not in ('downloading', 'finished'):
- return
-
- time_now = time.time()
- state['elapsed'] = time_now - start
- frag_total_bytes = s.get('total_bytes') or 0
- if not ctx['live']:
- estimated_size = (
- (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
- (state['fragment_index'] + 1) * total_frags)
- state['total_bytes_estimate'] = estimated_size
-
- if s['status'] == 'finished':
- state['fragment_index'] += 1
- ctx['fragment_index'] = state['fragment_index']
- state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
- ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
- ctx['prev_frag_downloaded_bytes'] = 0
- else:
- frag_downloaded_bytes = s['downloaded_bytes']
- state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
- if not ctx['live']:
- state['eta'] = self.calc_eta(
- start, time_now, estimated_size,
- state['downloaded_bytes'])
- state['speed'] = s.get('speed') or ctx.get('speed')
- ctx['speed'] = state['speed']
- ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
- self._hook_progress(state)
-
- ctx['dl'].add_progress_hook(frag_progress_hook)
-
- return start
-
- def _finish_frag_download(self, ctx):
- ctx['dest_stream'].close()
- if self.__do_ytdl_file(ctx):
- ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
- if os.path.isfile(ytdl_filename):
- os.remove(ytdl_filename)
- elapsed = time.time() - ctx['started']
-
- if ctx['tmpfilename'] == '-':
- downloaded_bytes = ctx['complete_frags_downloaded_bytes']
- else:
- self.try_rename(ctx['tmpfilename'], ctx['filename'])
- downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
-
- self._hook_progress({
- 'downloaded_bytes': downloaded_bytes,
- 'total_bytes': downloaded_bytes,
- 'filename': ctx['filename'],
- 'status': 'finished',
- 'elapsed': elapsed,
- })
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
deleted file mode 100644
index fd30452..0000000
--- a/youtube_dl/downloader/hls.py
+++ /dev/null
@@ -1,204 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-import binascii
-try:
- from Crypto.Cipher import AES
- can_decrypt_frag = True
-except ImportError:
- can_decrypt_frag = False
-
-from .fragment import FragmentFD
-from .external import FFmpegFD
-
-from ..compat import (
- compat_urllib_error,
- compat_urlparse,
- compat_struct_pack,
-)
-from ..utils import (
- parse_m3u8_attributes,
- update_url_query,
-)
-
-
-class HlsFD(FragmentFD):
- """ A limited implementation that does not require ffmpeg """
-
- FD_NAME = 'hlsnative'
-
- @staticmethod
- def can_download(manifest, info_dict):
- UNSUPPORTED_FEATURES = (
- r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
- # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
-
- # Live streams heuristic does not always work (e.g. geo restricted to Germany
- # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
- # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
-
- # This heuristic also is not correct since segments may not be appended as well.
- # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
- # no segments will definitely be appended to the end of the playlist.
- # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
- # # event media playlists [4]
-
- # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
- # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
- # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
- # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
- )
- check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
- is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
- check_results.append(can_decrypt_frag or not is_aes128_enc)
- check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
- check_results.append(not info_dict.get('is_live'))
- return all(check_results)
-
- def real_download(self, filename, info_dict):
- man_url = info_dict['url']
- self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
-
- urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
- man_url = urlh.geturl()
- s = urlh.read().decode('utf-8', 'ignore')
-
- if not self.can_download(s, info_dict):
- if info_dict.get('extra_param_to_segment_url'):
- self.report_error('pycrypto not found. Please install it.')
- return False
- self.report_warning(
- 'hlsnative has detected features it does not support, '
- 'extraction will be delegated to ffmpeg')
- fd = FFmpegFD(self.ydl, self.params)
- for ph in self._progress_hooks:
- fd.add_progress_hook(ph)
- return fd.real_download(filename, info_dict)
-
- def is_ad_fragment(s):
- return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
- s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
-
- media_frags = 0
- ad_frags = 0
- ad_frag_next = False
- for line in s.splitlines():
- line = line.strip()
- if not line:
- continue
- if line.startswith('#'):
- if is_ad_fragment(line):
- ad_frags += 1
- ad_frag_next = True
- continue
- if ad_frag_next:
- ad_frag_next = False
- continue
- media_frags += 1
-
- ctx = {
- 'filename': filename,
- 'total_frags': media_frags,
- 'ad_frags': ad_frags,
- }
-
- self._prepare_and_start_frag_download(ctx)
-
- fragment_retries = self.params.get('fragment_retries', 0)
- skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
- test = self.params.get('test', False)
-
- extra_query = None
- extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
- if extra_param_to_segment_url:
- extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
- i = 0
- media_sequence = 0
- decrypt_info = {'METHOD': 'NONE'}
- byte_range = {}
- frag_index = 0
- ad_frag_next = False
- for line in s.splitlines():
- line = line.strip()
- if line:
- if not line.startswith('#'):
- if ad_frag_next:
- ad_frag_next = False
- continue
- frag_index += 1
- if frag_index <= ctx['fragment_index']:
- continue
- frag_url = (
- line
- if re.match(r'^https?://', line)
- else compat_urlparse.urljoin(man_url, line))
- if extra_query:
- frag_url = update_url_query(frag_url, extra_query)
- count = 0
- headers = info_dict.get('http_headers', {})
- if byte_range:
- headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
- while count <= fragment_retries:
- try:
- success, frag_content = self._download_fragment(
- ctx, frag_url, info_dict, headers)
- if not success:
- return False
- break
- except compat_urllib_error.HTTPError as err:
- # Unavailable (possibly temporary) fragments may be served.
- # First we try to retry then either skip or abort.
- # See https://github.com/rg3/youtube-dl/issues/10165,
- # https://github.com/rg3/youtube-dl/issues/10448).
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- if count > fragment_retries:
- if skip_unavailable_fragments:
- i += 1
- media_sequence += 1
- self.report_skip_fragment(frag_index)
- continue
- self.report_error(
- 'giving up after %s fragment retries' % fragment_retries)
- return False
- if decrypt_info['METHOD'] == 'AES-128':
- iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
- decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
- self._prepare_url(info_dict, decrypt_info['URI'])).read()
- frag_content = AES.new(
- decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
- self._append_fragment(ctx, frag_content)
- # We only download the first fragment during the test
- if test:
- break
- i += 1
- media_sequence += 1
- elif line.startswith('#EXT-X-KEY'):
- decrypt_url = decrypt_info.get('URI')
- decrypt_info = parse_m3u8_attributes(line[11:])
- if decrypt_info['METHOD'] == 'AES-128':
- if 'IV' in decrypt_info:
- decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
- if not re.match(r'^https?://', decrypt_info['URI']):
- decrypt_info['URI'] = compat_urlparse.urljoin(
- man_url, decrypt_info['URI'])
- if extra_query:
- decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
- if decrypt_url != decrypt_info['URI']:
- decrypt_info['KEY'] = None
- elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
- media_sequence = int(line[22:])
- elif line.startswith('#EXT-X-BYTERANGE'):
- splitted_byte_range = line[17:].split('@')
- sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
- byte_range = {
- 'start': sub_range_start,
- 'end': sub_range_start + int(splitted_byte_range[0]),
- }
- elif is_ad_fragment(line):
- ad_frag_next = True
-
- self._finish_frag_download(ctx)
-
- return True
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
deleted file mode 100644
index 5b1e960..0000000
--- a/youtube_dl/downloader/http.py
+++ /dev/null
@@ -1,354 +0,0 @@
-from __future__ import unicode_literals
-
-import errno
-import os
-import socket
-import time
-import random
-import re
-
-from .common import FileDownloader
-from ..compat import (
- compat_str,
- compat_urllib_error,
-)
-from ..utils import (
- ContentTooShortError,
- encodeFilename,
- int_or_none,
- sanitize_open,
- sanitized_Request,
- write_xattr,
- XAttrMetadataError,
- XAttrUnavailableError,
-)
-
-
-class HttpFD(FileDownloader):
- def real_download(self, filename, info_dict):
- url = info_dict['url']
-
- class DownloadContext(dict):
- __getattr__ = dict.get
- __setattr__ = dict.__setitem__
- __delattr__ = dict.__delitem__
-
- ctx = DownloadContext()
- ctx.filename = filename
- ctx.tmpfilename = self.temp_name(filename)
- ctx.stream = None
-
- # Do not include the Accept-Encoding header
- headers = {'Youtubedl-no-compression': 'True'}
- add_headers = info_dict.get('http_headers')
- if add_headers:
- headers.update(add_headers)
-
- is_test = self.params.get('test', False)
- chunk_size = self._TEST_FILE_SIZE if is_test else (
- info_dict.get('downloader_options', {}).get('http_chunk_size') or
- self.params.get('http_chunk_size') or 0)
-
- ctx.open_mode = 'wb'
- ctx.resume_len = 0
- ctx.data_len = None
- ctx.block_size = self.params.get('buffersize', 1024)
- ctx.start_time = time.time()
- ctx.chunk_size = None
-
- if self.params.get('continuedl', True):
- # Establish possible resume length
- if os.path.isfile(encodeFilename(ctx.tmpfilename)):
- ctx.resume_len = os.path.getsize(
- encodeFilename(ctx.tmpfilename))
-
- ctx.is_resume = ctx.resume_len > 0
-
- count = 0
- retries = self.params.get('retries', 0)
-
- class SucceedDownload(Exception):
- pass
-
- class RetryDownload(Exception):
- def __init__(self, source_error):
- self.source_error = source_error
-
- class NextFragment(Exception):
- pass
-
- def set_range(req, start, end):
- range_header = 'bytes=%d-' % start
- if end:
- range_header += compat_str(end)
- req.add_header('Range', range_header)
-
- def establish_connection():
- ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
- if not is_test and chunk_size else chunk_size)
- if ctx.resume_len > 0:
- range_start = ctx.resume_len
- if ctx.is_resume:
- self.report_resuming_byte(ctx.resume_len)
- ctx.open_mode = 'ab'
- elif ctx.chunk_size > 0:
- range_start = 0
- else:
- range_start = None
- ctx.is_resume = False
- range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
- if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
- range_end = ctx.data_len - 1
- has_range = range_start is not None
- ctx.has_range = has_range
- request = sanitized_Request(url, None, headers)
- if has_range:
- set_range(request, range_start, range_end)
- # Establish connection
- try:
- ctx.data = self.ydl.urlopen(request)
- # When trying to resume, Content-Range HTTP header of response has to be checked
- # to match the value of requested Range HTTP header. This is due to a webservers
- # that don't support resuming and serve a whole file with no Content-Range
- # set in response despite of requested Range (see
- # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
- if has_range:
- content_range = ctx.data.headers.get('Content-Range')
- if content_range:
- content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
- # Content-Range is present and matches requested Range, resume is possible
- if content_range_m:
- if range_start == int(content_range_m.group(1)):
- content_range_end = int_or_none(content_range_m.group(2))
- content_len = int_or_none(content_range_m.group(3))
- accept_content_len = (
- # Non-chunked download
- not ctx.chunk_size or
- # Chunked download and requested piece or
- # its part is promised to be served
- content_range_end == range_end or
- content_len < range_end)
- if accept_content_len:
- ctx.data_len = content_len
- return
- # Content-Range is either not present or invalid. Assuming remote webserver is
- # trying to send the whole file, resume is not possible, so wiping the local file
- # and performing entire redownload
- self.report_unable_to_resume()
- ctx.resume_len = 0
- ctx.open_mode = 'wb'
- ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
- return
- except (compat_urllib_error.HTTPError, ) as err:
- if err.code == 416:
- # Unable to resume (requested range not satisfiable)
- try:
- # Open the connection again without the range header
- ctx.data = self.ydl.urlopen(
- sanitized_Request(url, None, headers))
- content_length = ctx.data.info()['Content-Length']
- except (compat_urllib_error.HTTPError, ) as err:
- if err.code < 500 or err.code >= 600:
- raise
- else:
- # Examine the reported length
- if (content_length is not None and
- (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
- # The file had already been fully downloaded.
- # Explanation to the above condition: in issue #175 it was revealed that
- # YouTube sometimes adds or removes a few bytes from the end of the file,
- # changing the file size slightly and causing problems for some users. So
- # I decided to implement a suggested change and consider the file
- # completely downloaded if the file size differs less than 100 bytes from
- # the one in the hard drive.
- self.report_file_already_downloaded(ctx.filename)
- self.try_rename(ctx.tmpfilename, ctx.filename)
- self._hook_progress({
- 'filename': ctx.filename,
- 'status': 'finished',
- 'downloaded_bytes': ctx.resume_len,
- 'total_bytes': ctx.resume_len,
- })
- raise SucceedDownload()
- else:
- # The length does not match, we start the download over
- self.report_unable_to_resume()
- ctx.resume_len = 0
- ctx.open_mode = 'wb'
- return
- elif err.code < 500 or err.code >= 600:
- # Unexpected HTTP error
- raise
- raise RetryDownload(err)
- except socket.error as err:
- if err.errno != errno.ECONNRESET:
- # Connection reset is no problem, just retry
- raise
- raise RetryDownload(err)
-
- def download():
- data_len = ctx.data.info().get('Content-length', None)
-
- # Range HTTP header may be ignored/unsupported by a webserver
- # (e.g. extractor/scivee.py, extractor/bambuser.py).
- # However, for a test we still would like to download just a piece of a file.
- # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
- # block size when downloading a file.
- if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
- data_len = self._TEST_FILE_SIZE
-
- if data_len is not None:
- data_len = int(data_len) + ctx.resume_len
- min_data_len = self.params.get('min_filesize')
- max_data_len = self.params.get('max_filesize')
- if min_data_len is not None and data_len < min_data_len:
- self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
- return False
- if max_data_len is not None and data_len > max_data_len:
- self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
- return False
-
- byte_counter = 0 + ctx.resume_len
- block_size = ctx.block_size
- start = time.time()
-
- # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
- now = None # needed for slow_down() in the first loop run
- before = start # start measuring
-
- def retry(e):
- to_stdout = ctx.tmpfilename == '-'
- if not to_stdout:
- ctx.stream.close()
- ctx.stream = None
- ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
- raise RetryDownload(e)
-
- while True:
- try:
- # Download and write
- data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
- # socket.timeout is a subclass of socket.error but may not have
- # errno set
- except socket.timeout as e:
- retry(e)
- except socket.error as e:
- if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
- raise
- retry(e)
-
- byte_counter += len(data_block)
-
- # exit loop when download is finished
- if len(data_block) == 0:
- break
-
- # Open destination file just in time
- if ctx.stream is None:
- try:
- ctx.stream, ctx.tmpfilename = sanitize_open(
- ctx.tmpfilename, ctx.open_mode)
- assert ctx.stream is not None
- ctx.filename = self.undo_temp_name(ctx.tmpfilename)
- self.report_destination(ctx.filename)
- except (OSError, IOError) as err:
- self.report_error('unable to open for writing: %s' % str(err))
- return False
-
- if self.params.get('xattr_set_filesize', False) and data_len is not None:
- try:
- write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
- except (XAttrUnavailableError, XAttrMetadataError) as err:
- self.report_error('unable to set filesize xattr: %s' % str(err))
-
- try:
- ctx.stream.write(data_block)
- except (IOError, OSError) as err:
- self.to_stderr('\n')
- self.report_error('unable to write data: %s' % str(err))
- return False
-
- # Apply rate limit
- self.slow_down(start, now, byte_counter - ctx.resume_len)
-
- # end measuring of one loop run
- now = time.time()
- after = now
-
- # Adjust block size
- if not self.params.get('noresizebuffer', False):
- block_size = self.best_block_size(after - before, len(data_block))
-
- before = after
-
- # Progress message
- speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
- if ctx.data_len is None:
- eta = None
- else:
- eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
-
- self._hook_progress({
- 'status': 'downloading',
- 'downloaded_bytes': byte_counter,
- 'total_bytes': ctx.data_len,
- 'tmpfilename': ctx.tmpfilename,
- 'filename': ctx.filename,
- 'eta': eta,
- 'speed': speed,
- 'elapsed': now - ctx.start_time,
- })
-
- if is_test and byte_counter == data_len:
- break
-
- if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
- ctx.resume_len = byte_counter
- # ctx.block_size = block_size
- raise NextFragment()
-
- if ctx.stream is None:
- self.to_stderr('\n')
- self.report_error('Did not get any data blocks')
- return False
- if ctx.tmpfilename != '-':
- ctx.stream.close()
-
- if data_len is not None and byte_counter != data_len:
- err = ContentTooShortError(byte_counter, int(data_len))
- if count <= retries:
- retry(err)
- raise err
-
- self.try_rename(ctx.tmpfilename, ctx.filename)
-
- # Update file modification time
- if self.params.get('updatetime', True):
- info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
-
- self._hook_progress({
- 'downloaded_bytes': byte_counter,
- 'total_bytes': byte_counter,
- 'filename': ctx.filename,
- 'status': 'finished',
- 'elapsed': time.time() - ctx.start_time,
- })
-
- return True
-
- while count <= retries:
- try:
- establish_connection()
- return download()
- except RetryDownload as e:
- count += 1
- if count <= retries:
- self.report_retry(e.source_error, count, retries)
- continue
- except NextFragment:
- continue
- except SucceedDownload:
- return True
-
- self.report_error('giving up after %s retries' % retries)
- return False
diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py
deleted file mode 100644
index 063fcf4..0000000
--- a/youtube_dl/downloader/ism.py
+++ /dev/null
@@ -1,259 +0,0 @@
-from __future__ import unicode_literals
-
-import time
-import binascii
-import io
-
-from .fragment import FragmentFD
-from ..compat import (
- compat_Struct,
- compat_urllib_error,
-)
-
-
-u8 = compat_Struct('>B')
-u88 = compat_Struct('>Bx')
-u16 = compat_Struct('>H')
-u1616 = compat_Struct('>Hxx')
-u32 = compat_Struct('>I')
-u64 = compat_Struct('>Q')
-
-s88 = compat_Struct('>bx')
-s16 = compat_Struct('>h')
-s1616 = compat_Struct('>hxx')
-s32 = compat_Struct('>i')
-
-unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
-
-TRACK_ENABLED = 0x1
-TRACK_IN_MOVIE = 0x2
-TRACK_IN_PREVIEW = 0x4
-
-SELF_CONTAINED = 0x1
-
-
-def box(box_type, payload):
- return u32.pack(8 + len(payload)) + box_type + payload
-
-
-def full_box(box_type, version, flags, payload):
- return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload)
-
-
-def write_piff_header(stream, params):
- track_id = params['track_id']
- fourcc = params['fourcc']
- duration = params['duration']
- timescale = params.get('timescale', 10000000)
- language = params.get('language', 'und')
- height = params.get('height', 0)
- width = params.get('width', 0)
- is_audio = width == 0 and height == 0
- creation_time = modification_time = int(time.time())
-
- ftyp_payload = b'isml' # major brand
- ftyp_payload += u32.pack(1) # minor version
- ftyp_payload += b'piff' + b'iso2' # compatible brands
- stream.write(box(b'ftyp', ftyp_payload)) # File Type Box
-
- mvhd_payload = u64.pack(creation_time)
- mvhd_payload += u64.pack(modification_time)
- mvhd_payload += u32.pack(timescale)
- mvhd_payload += u64.pack(duration)
- mvhd_payload += s1616.pack(1) # rate
- mvhd_payload += s88.pack(1) # volume
- mvhd_payload += u16.pack(0) # reserved
- mvhd_payload += u32.pack(0) * 2 # reserved
- mvhd_payload += unity_matrix
- mvhd_payload += u32.pack(0) * 6 # pre defined
- mvhd_payload += u32.pack(0xffffffff) # next track id
- moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box
-
- tkhd_payload = u64.pack(creation_time)
- tkhd_payload += u64.pack(modification_time)
- tkhd_payload += u32.pack(track_id) # track id
- tkhd_payload += u32.pack(0) # reserved
- tkhd_payload += u64.pack(duration)
- tkhd_payload += u32.pack(0) * 2 # reserved
- tkhd_payload += s16.pack(0) # layer
- tkhd_payload += s16.pack(0) # alternate group
- tkhd_payload += s88.pack(1 if is_audio else 0) # volume
- tkhd_payload += u16.pack(0) # reserved
- tkhd_payload += unity_matrix
- tkhd_payload += u1616.pack(width)
- tkhd_payload += u1616.pack(height)
- trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box
-
- mdhd_payload = u64.pack(creation_time)
- mdhd_payload += u64.pack(modification_time)
- mdhd_payload += u32.pack(timescale)
- mdhd_payload += u64.pack(duration)
- mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60))
- mdhd_payload += u16.pack(0) # pre defined
- mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
-
- hdlr_payload = u32.pack(0) # pre defined
- hdlr_payload += b'soun' if is_audio else b'vide' # handler type
- hdlr_payload += u32.pack(0) * 3 # reserved
- hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name
- mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
-
- if is_audio:
- smhd_payload = s88.pack(0) # balance
- smhd_payload += u16.pack(0) # reserved
- media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
- else:
- vmhd_payload = u16.pack(0) # graphics mode
- vmhd_payload += u16.pack(0) * 3 # opcolor
- media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
- minf_payload = media_header_box
-
- dref_payload = u32.pack(1) # entry count
- dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box
- dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box
- minf_payload += box(b'dinf', dinf_payload) # Data Information Box
-
- stsd_payload = u32.pack(1) # entry count
-
- sample_entry_payload = u8.pack(0) * 6 # reserved
- sample_entry_payload += u16.pack(1) # data reference index
- if is_audio:
- sample_entry_payload += u32.pack(0) * 2 # reserved
- sample_entry_payload += u16.pack(params.get('channels', 2))
- sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
- sample_entry_payload += u16.pack(0) # pre defined
- sample_entry_payload += u16.pack(0) # reserved
- sample_entry_payload += u1616.pack(params['sampling_rate'])
-
- if fourcc == 'AACL':
- sample_entry_box = box(b'mp4a', sample_entry_payload)
- else:
- sample_entry_payload += u16.pack(0) # pre defined
- sample_entry_payload += u16.pack(0) # reserved
- sample_entry_payload += u32.pack(0) * 3 # pre defined
- sample_entry_payload += u16.pack(width)
- sample_entry_payload += u16.pack(height)
- sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi
- sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi
- sample_entry_payload += u32.pack(0) # reserved
- sample_entry_payload += u16.pack(1) # frame count
- sample_entry_payload += u8.pack(0) * 32 # compressor name
- sample_entry_payload += u16.pack(0x18) # depth
- sample_entry_payload += s16.pack(-1) # pre defined
-
- codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8'))
- if fourcc in ('H264', 'AVC1'):
- sps, pps = codec_private_data.split(u32.pack(1))[1:]
- avcc_payload = u8.pack(1) # configuration version
- avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
- avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one
- avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
- avcc_payload += u16.pack(len(sps))
- avcc_payload += sps
- avcc_payload += u8.pack(1) # number of pps
- avcc_payload += u16.pack(len(pps))
- avcc_payload += pps
- sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
- sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
- stsd_payload += sample_entry_box
-
- stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
-
- stts_payload = u32.pack(0) # entry count
- stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box
-
- stsc_payload = u32.pack(0) # entry count
- stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box
-
- stco_payload = u32.pack(0) # entry count
- stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box
-
- minf_payload += box(b'stbl', stbl_payload) # Sample Table Box
-
- mdia_payload += box(b'minf', minf_payload) # Media Information Box
-
- trak_payload += box(b'mdia', mdia_payload) # Media Box
-
- moov_payload += box(b'trak', trak_payload) # Track Box
-
- mehd_payload = u64.pack(duration)
- mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box
-
- trex_payload = u32.pack(track_id) # track id
- trex_payload += u32.pack(1) # default sample description index
- trex_payload += u32.pack(0) # default sample duration
- trex_payload += u32.pack(0) # default sample size
- trex_payload += u32.pack(0) # default sample flags
- mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box
-
- moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box
- stream.write(box(b'moov', moov_payload)) # Movie Box
-
-
-def extract_box_data(data, box_sequence):
- data_reader = io.BytesIO(data)
- while True:
- box_size = u32.unpack(data_reader.read(4))[0]
- box_type = data_reader.read(4)
- if box_type == box_sequence[0]:
- box_data = data_reader.read(box_size - 8)
- if len(box_sequence) == 1:
- return box_data
- return extract_box_data(box_data, box_sequence[1:])
- data_reader.seek(box_size - 8, 1)
-
-
-class IsmFD(FragmentFD):
- """
- Download segments in a ISM manifest
- """
-
- FD_NAME = 'ism'
-
- def real_download(self, filename, info_dict):
- segments = info_dict['fragments'][:1] if self.params.get(
- 'test', False) else info_dict['fragments']
-
- ctx = {
- 'filename': filename,
- 'total_frags': len(segments),
- }
-
- self._prepare_and_start_frag_download(ctx)
-
- fragment_retries = self.params.get('fragment_retries', 0)
- skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
-
- track_written = False
- frag_index = 0
- for i, segment in enumerate(segments):
- frag_index += 1
- if frag_index <= ctx['fragment_index']:
- continue
- count = 0
- while count <= fragment_retries:
- try:
- success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
- if not success:
- return False
- if not track_written:
- tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
- info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
- write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
- track_written = True
- self._append_fragment(ctx, frag_content)
- break
- except compat_urllib_error.HTTPError as err:
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- if count > fragment_retries:
- if skip_unavailable_fragments:
- self.report_skip_fragment(frag_index)
- continue
- self.report_error('giving up after %s fragment retries' % fragment_retries)
- return False
-
- self._finish_frag_download(ctx)
-
- return True
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py
deleted file mode 100644
index fbb7f51..0000000
--- a/youtube_dl/downloader/rtmp.py
+++ /dev/null
@@ -1,214 +0,0 @@
-from __future__ import unicode_literals
-
-import os
-import re
-import subprocess
-import time
-
-from .common import FileDownloader
-from ..compat import compat_str
-from ..utils import (
- check_executable,
- encodeFilename,
- encodeArgument,
- get_exe_version,
-)
-
-
-def rtmpdump_version():
- return get_exe_version(
- 'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)')
-
-
-class RtmpFD(FileDownloader):
- def real_download(self, filename, info_dict):
- def run_rtmpdump(args):
- start = time.time()
- resume_percent = None
- resume_downloaded_data_len = None
- proc = subprocess.Popen(args, stderr=subprocess.PIPE)
- cursor_in_new_line = True
- proc_stderr_closed = False
- try:
- while not proc_stderr_closed:
- # read line from stderr
- line = ''
- while True:
- char = proc.stderr.read(1)
- if not char:
- proc_stderr_closed = True
- break
- if char in [b'\r', b'\n']:
- break
- line += char.decode('ascii', 'replace')
- if not line:
- # proc_stderr_closed is True
- continue
- mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
- if mobj:
- downloaded_data_len = int(float(mobj.group(1)) * 1024)
- percent = float(mobj.group(2))
- if not resume_percent:
- resume_percent = percent
- resume_downloaded_data_len = downloaded_data_len
- time_now = time.time()
- eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
- speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
- data_len = None
- if percent > 0:
- data_len = int(downloaded_data_len * 100 / percent)
- self._hook_progress({
- 'status': 'downloading',
- 'downloaded_bytes': downloaded_data_len,
- 'total_bytes_estimate': data_len,
- 'tmpfilename': tmpfilename,
- 'filename': filename,
- 'eta': eta,
- 'elapsed': time_now - start,
- 'speed': speed,
- })
- cursor_in_new_line = False
- else:
- # no percent for live streams
- mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
- if mobj:
- downloaded_data_len = int(float(mobj.group(1)) * 1024)
- time_now = time.time()
- speed = self.calc_speed(start, time_now, downloaded_data_len)
- self._hook_progress({
- 'downloaded_bytes': downloaded_data_len,
- 'tmpfilename': tmpfilename,
- 'filename': filename,
- 'status': 'downloading',
- 'elapsed': time_now - start,
- 'speed': speed,
- })
- cursor_in_new_line = False
- elif self.params.get('verbose', False):
- if not cursor_in_new_line:
- self.to_screen('')
- cursor_in_new_line = True
- self.to_screen('[rtmpdump] ' + line)
- finally:
- proc.wait()
- if not cursor_in_new_line:
- self.to_screen('')
- return proc.returncode
-
- url = info_dict['url']
- player_url = info_dict.get('player_url')
- page_url = info_dict.get('page_url')
- app = info_dict.get('app')
- play_path = info_dict.get('play_path')
- tc_url = info_dict.get('tc_url')
- flash_version = info_dict.get('flash_version')
- live = info_dict.get('rtmp_live', False)
- conn = info_dict.get('rtmp_conn')
- protocol = info_dict.get('rtmp_protocol')
- real_time = info_dict.get('rtmp_real_time', False)
- no_resume = info_dict.get('no_resume', False)
- continue_dl = self.params.get('continuedl', True)
-
- self.report_destination(filename)
- tmpfilename = self.temp_name(filename)
- test = self.params.get('test', False)
-
- # Check for rtmpdump first
- if not check_executable('rtmpdump', ['-h']):
- self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
- return False
-
- # Download using rtmpdump. rtmpdump returns exit code 2 when
- # the connection was interrupted and resuming appears to be
- # possible. This is part of rtmpdump's normal usage, AFAIK.
- basic_args = [
- 'rtmpdump', '--verbose', '-r', url,
- '-o', tmpfilename]
- if player_url is not None:
- basic_args += ['--swfVfy', player_url]
- if page_url is not None:
- basic_args += ['--pageUrl', page_url]
- if app is not None:
- basic_args += ['--app', app]
- if play_path is not None:
- basic_args += ['--playpath', play_path]
- if tc_url is not None:
- basic_args += ['--tcUrl', tc_url]
- if test:
- basic_args += ['--stop', '1']
- if flash_version is not None:
- basic_args += ['--flashVer', flash_version]
- if live:
- basic_args += ['--live']
- if isinstance(conn, list):
- for entry in conn:
- basic_args += ['--conn', entry]
- elif isinstance(conn, compat_str):
- basic_args += ['--conn', conn]
- if protocol is not None:
- basic_args += ['--protocol', protocol]
- if real_time:
- basic_args += ['--realtime']
-
- args = basic_args
- if not no_resume and continue_dl and not live:
- args += ['--resume']
- if not live and continue_dl:
- args += ['--skip', '1']
-
- args = [encodeArgument(a) for a in args]
-
- self._debug_cmd(args, exe='rtmpdump')
-
- RD_SUCCESS = 0
- RD_FAILED = 1
- RD_INCOMPLETE = 2
- RD_NO_CONNECT = 3
-
- started = time.time()
-
- try:
- retval = run_rtmpdump(args)
- except KeyboardInterrupt:
- if not info_dict.get('is_live'):
- raise
- retval = RD_SUCCESS
- self.to_screen('\n[rtmpdump] Interrupted by user')
-
- if retval == RD_NO_CONNECT:
- self.report_error('[rtmpdump] Could not connect to RTMP server.')
- return False
-
- while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
- prevsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
- time.sleep(5.0) # This seems to be needed
- args = basic_args + ['--resume']
- if retval == RD_FAILED:
- args += ['--skip', '1']
- args = [encodeArgument(a) for a in args]
- retval = run_rtmpdump(args)
- cursize = os.path.getsize(encodeFilename(tmpfilename))
- if prevsize == cursize and retval == RD_FAILED:
- break
- # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
- if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
- self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
- retval = RD_SUCCESS
- break
- if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
- fsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
- self.try_rename(tmpfilename, filename)
- self._hook_progress({
- 'downloaded_bytes': fsize,
- 'total_bytes': fsize,
- 'filename': filename,
- 'status': 'finished',
- 'elapsed': time.time() - started,
- })
- return True
- else:
- self.to_stderr('\n')
- self.report_error('rtmpdump exited with code %d' % retval)
- return False
diff --git a/youtube_dl/downloader/rtsp.py b/youtube_dl/downloader/rtsp.py
deleted file mode 100644
index 939358b..0000000
--- a/youtube_dl/downloader/rtsp.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from __future__ import unicode_literals
-
-import os
-import subprocess
-
-from .common import FileDownloader
-from ..utils import (
- check_executable,
- encodeFilename,
-)
-
-
-class RtspFD(FileDownloader):
- def real_download(self, filename, info_dict):
- url = info_dict['url']
- self.report_destination(filename)
- tmpfilename = self.temp_name(filename)
-
- if check_executable('mplayer', ['-h']):
- args = [
- 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
- '-dumpstream', '-dumpfile', tmpfilename, url]
- elif check_executable('mpv', ['-h']):
- args = [
- 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
- else:
- self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
- return False
-
- self._debug_cmd(args)
-
- retval = subprocess.call(args)
- if retval == 0:
- fsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
- self.try_rename(tmpfilename, filename)
- self._hook_progress({
- 'downloaded_bytes': fsize,
- 'total_bytes': fsize,
- 'filename': filename,
- 'status': 'finished',
- })
- return True
- else:
- self.to_stderr('\n')
- self.report_error('%s exited with code %d' % (args[0], retval))
- return False
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
deleted file mode 100644
index d5a4418..0000000
--- a/youtube_dl/extractor/__init__.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from __future__ import unicode_literals
-
-try:
- from .lazy_extractors import *
- from .lazy_extractors import _ALL_CLASSES
- _LAZY_LOADER = True
-except ImportError:
- _LAZY_LOADER = False
- from .extractors import *
-
- _ALL_CLASSES = [
- klass
- for name, klass in globals().items()
- if name.endswith('IE') and name != 'GenericIE'
- ]
- #_ALL_CLASSES.append(GenericIE)
-
-
-def gen_extractor_classes():
- """ Return a list of supported extractors.
- The order does matter; the first extractor matched is the one handling the URL.
- """
- return _ALL_CLASSES
-
-
-def gen_extractors():
- """ Return a list of an instance of every supported extractor.
- The order does matter; the first extractor matched is the one handling the URL.
- """
- return [klass() for klass in gen_extractor_classes()]
-
-
-def list_extractors(age_limit):
- """
- Return a list of extractors that are suitable for the given age,
- sorted by extractor ID.
- """
-
- return sorted(
- filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
- key=lambda ie: ie.IE_NAME.lower())
-
-
-def get_info_extractor(ie_name):
- """Returns the info extractor class with the given ie_name"""
- return globals()[ie_name + 'IE']
diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py
deleted file mode 100644
index b83b51e..0000000
--- a/youtube_dl/extractor/adobepass.py
+++ /dev/null
@@ -1,1567 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-import time
-import xml.etree.ElementTree as etree
-
-from .common import InfoExtractor
-from ..compat import (
- compat_kwargs,
- compat_urlparse,
-)
-from ..utils import (
- unescapeHTML,
- urlencode_postdata,
- unified_timestamp,
- ExtractorError,
- NO_DEFAULT,
-)
-
-
-MSO_INFO = {
- 'DTV': {
- 'name': 'DIRECTV',
- 'username_field': 'username',
- 'password_field': 'password',
- },
- 'ATTOTT': {
- 'name': 'DIRECTV NOW',
- 'username_field': 'email',
- 'password_field': 'loginpassword',
- },
- 'Rogers': {
- 'name': 'Rogers',
- 'username_field': 'UserName',
- 'password_field': 'UserPassword',
- },
- 'Comcast_SSO': {
- 'name': 'Comcast XFINITY',
- 'username_field': 'user',
- 'password_field': 'passwd',
- },
- 'TWC': {
- 'name': 'Time Warner Cable | Spectrum',
- 'username_field': 'Ecom_User_ID',
- 'password_field': 'Ecom_Password',
- },
- 'Brighthouse': {
- 'name': 'Bright House Networks | Spectrum',
- 'username_field': 'j_username',
- 'password_field': 'j_password',
- },
- 'Charter_Direct': {
- 'name': 'Charter Spectrum',
- 'username_field': 'IDToken1',
- 'password_field': 'IDToken2',
- },
- 'Verizon': {
- 'name': 'Verizon FiOS',
- 'username_field': 'IDToken1',
- 'password_field': 'IDToken2',
- },
- 'thr030': {
- 'name': '3 Rivers Communications'
- },
- 'com140': {
- 'name': 'Access Montana'
- },
- 'acecommunications': {
- 'name': 'AcenTek'
- },
- 'acm010': {
- 'name': 'Acme Communications'
- },
- 'ada020': {
- 'name': 'Adams Cable Service'
- },
- 'alb020': {
- 'name': 'Albany Mutual Telephone'
- },
- 'algona': {
- 'name': 'Algona Municipal Utilities'
- },
- 'allwest': {
- 'name': 'All West Communications'
- },
- 'all025': {
- 'name': 'Allen\'s Communications'
- },
- 'spl010': {
- 'name': 'Alliance Communications'
- },
- 'all070': {
- 'name': 'ALLO Communications'
- },
- 'alpine': {
- 'name': 'Alpine Communications'
- },
- 'hun015': {
- 'name': 'American Broadband'
- },
- 'nwc010': {
- 'name': 'American Broadband Missouri'
- },
- 'com130-02': {
- 'name': 'American Community Networks'
- },
- 'com130-01': {
- 'name': 'American Warrior Networks'
- },
- 'tom020': {
- 'name': 'Amherst Telephone/Tomorrow Valley'
- },
- 'tvc020': {
- 'name': 'Andycable'
- },
- 'arkwest': {
- 'name': 'Arkwest Communications'
- },
- 'art030': {
- 'name': 'Arthur Mutual Telephone Company'
- },
- 'arvig': {
- 'name': 'Arvig'
- },
- 'nttcash010': {
- 'name': 'Ashland Home Net'
- },
- 'astound': {
- 'name': 'Astound (now Wave)'
- },
- 'dix030': {
- 'name': 'ATC Broadband'
- },
- 'ara010': {
- 'name': 'ATC Communications'
- },
- 'she030-02': {
- 'name': 'Ayersville Communications'
- },
- 'baldwin': {
- 'name': 'Baldwin Lightstream'
- },
- 'bal040': {
- 'name': 'Ballard TV'
- },
- 'cit025': {
- 'name': 'Bardstown Cable TV'
- },
- 'bay030': {
- 'name': 'Bay Country Communications'
- },
- 'tel095': {
- 'name': 'Beaver Creek Cooperative Telephone'
- },
- 'bea020': {
- 'name': 'Beaver Valley Cable'
- },
- 'bee010': {
- 'name': 'Bee Line Cable'
- },
- 'wir030': {
- 'name': 'Beehive Broadband'
- },
- 'bra020': {
- 'name': 'BELD'
- },
- 'bel020': {
- 'name': 'Bellevue Municipal Cable'
- },
- 'vol040-01': {
- 'name': 'Ben Lomand Connect / BLTV'
- },
- 'bev010': {
- 'name': 'BEVCOMM'
- },
- 'big020': {
- 'name': 'Big Sandy Broadband'
- },
- 'ble020': {
- 'name': 'Bledsoe Telephone Cooperative'
- },
- 'bvt010': {
- 'name': 'Blue Valley Tele-Communications'
- },
- 'bra050': {
- 'name': 'Brandenburg Telephone Co.'
- },
- 'bte010': {
- 'name': 'Bristol Tennessee Essential Services'
- },
- 'annearundel': {
- 'name': 'Broadstripe'
- },
- 'btc010': {
- 'name': 'BTC Communications'
- },
- 'btc040': {
- 'name': 'BTC Vision - Nahunta'
- },
- 'bul010': {
- 'name': 'Bulloch Telephone Cooperative'
- },
- 'but010': {
- 'name': 'Butler-Bremer Communications'
- },
- 'tel160-csp': {
- 'name': 'C Spire SNAP'
- },
- 'csicable': {
- 'name': 'Cable Services Inc.'
- },
- 'cableamerica': {
- 'name': 'CableAmerica'
- },
- 'cab038': {
- 'name': 'CableSouth Media 3'
- },
- 'weh010-camtel': {
- 'name': 'Cam-Tel Company'
- },
- 'car030': {
- 'name': 'Cameron Communications'
- },
- 'canbytel': {
- 'name': 'Canby Telcom'
- },
- 'crt020': {
- 'name': 'CapRock Tv'
- },
- 'car050': {
- 'name': 'Carnegie Cable'
- },
- 'cas': {
- 'name': 'CAS Cable'
- },
- 'casscomm': {
- 'name': 'CASSCOMM'
- },
- 'mid180-02': {
- 'name': 'Catalina Broadband Solutions'
- },
- 'cccomm': {
- 'name': 'CC Communications'
- },
- 'nttccde010': {
- 'name': 'CDE Lightband'
- },
- 'cfunet': {
- 'name': 'Cedar Falls Utilities'
- },
- 'dem010-01': {
- 'name': 'Celect-Bloomer Telephone Area'
- },
- 'dem010-02': {
- 'name': 'Celect-Bruce Telephone Area'
- },
- 'dem010-03': {
- 'name': 'Celect-Citizens Connected Area'
- },
- 'dem010-04': {
- 'name': 'Celect-Elmwood/Spring Valley Area'
- },
- 'dem010-06': {
- 'name': 'Celect-Mosaic Telecom'
- },
- 'dem010-05': {
- 'name': 'Celect-West WI Telephone Area'
- },
- 'net010-02': {
- 'name': 'Cellcom/Nsight Telservices'
- },
- 'cen100': {
- 'name': 'CentraCom'
- },
- 'nttccst010': {
- 'name': 'Central Scott / CSTV'
- },
- 'cha035': {
- 'name': 'Chaparral CableVision'
- },
- 'cha050': {
- 'name': 'Chariton Valley Communication Corporation, Inc.'
- },
- 'cha060': {
- 'name': 'Chatmoss Cablevision'
- },
- 'nttcche010': {
- 'name': 'Cherokee Communications'
- },
- 'che050': {
- 'name': 'Chesapeake Bay Communications'
- },
- 'cimtel': {
- 'name': 'Cim-Tel Cable, LLC.'
- },
- 'cit180': {
- 'name': 'Citizens Cablevision - Floyd, VA'
- },
- 'cit210': {
- 'name': 'Citizens Cablevision, Inc.'
- },
- 'cit040': {
- 'name': 'Citizens Fiber'
- },
- 'cit250': {
- 'name': 'Citizens Mutual'
- },
- 'war040': {
- 'name': 'Citizens Telephone Corporation'
- },
- 'wat025': {
- 'name': 'City Of Monroe'
- },
- 'wadsworth': {
- 'name': 'CityLink'
- },
- 'nor100': {
- 'name': 'CL Tel'
- },
- 'cla010': {
- 'name': 'Clarence Telephone and Cedar Communications'
- },
- 'ser060': {
- 'name': 'Clear Choice Communications'
- },
- 'tac020': {
- 'name': 'Click! Cable TV'
- },
- 'war020': {
- 'name': 'CLICK1.NET'
- },
- 'cml010': {
- 'name': 'CML Telephone Cooperative Association'
- },
- 'cns': {
- 'name': 'CNS'
- },
- 'com160': {
- 'name': 'Co-Mo Connect'
- },
- 'coa020': {
- 'name': 'Coast Communications'
- },
- 'coa030': {
- 'name': 'Coaxial Cable TV'
- },
- 'mid055': {
- 'name': 'Cobalt TV (Mid-State Community TV)'
- },
- 'col070': {
- 'name': 'Columbia Power & Water Systems'
- },
- 'col080': {
- 'name': 'Columbus Telephone'
- },
- 'nor105': {
- 'name': 'Communications 1 Cablevision, Inc.'
- },
- 'com150': {
- 'name': 'Community Cable & Broadband'
- },
- 'com020': {
- 'name': 'Community Communications Company'
- },
- 'coy010': {
- 'name': 'commZoom'
- },
- 'com025': {
- 'name': 'Complete Communication Services'
- },
- 'cat020': {
- 'name': 'Comporium'
- },
- 'com071': {
- 'name': 'ComSouth Telesys'
- },
- 'consolidatedcable': {
- 'name': 'Consolidated'
- },
- 'conwaycorp': {
- 'name': 'Conway Corporation'
- },
- 'coo050': {
- 'name': 'Coon Valley Telecommunications Inc'
- },
- 'coo080': {
- 'name': 'Cooperative Telephone Company'
- },
- 'cpt010': {
- 'name': 'CP-TEL'
- },
- 'cra010': {
- 'name': 'Craw-Kan Telephone'
- },
- 'crestview': {
- 'name': 'Crestview Cable Communications'
- },
- 'cross': {
- 'name': 'Cross TV'
- },
- 'cro030': {
- 'name': 'Crosslake Communications'
- },
- 'ctc040': {
- 'name': 'CTC - Brainerd MN'
- },
- 'phe030': {
- 'name': 'CTV-Beam - East Alabama'
- },
- 'cun010': {
- 'name': 'Cunningham Telephone & Cable'
- },
- 'dpc010': {
- 'name': 'D & P Communications'
- },
- 'dak030': {
- 'name': 'Dakota Central Telecommunications'
- },
- 'nttcdel010': {
- 'name': 'Delcambre Telephone LLC'
- },
- 'tel160-del': {
- 'name': 'Delta Telephone Company'
- },
- 'sal040': {
- 'name': 'DiamondNet'
- },
- 'ind060-dc': {
- 'name': 'Direct Communications'
- },
- 'doy010': {
- 'name': 'Doylestown Cable TV'
- },
- 'dic010': {
- 'name': 'DRN'
- },
- 'dtc020': {
- 'name': 'DTC'
- },
- 'dtc010': {
- 'name': 'DTC Cable (Delhi)'
- },
- 'dum010': {
- 'name': 'Dumont Telephone Company'
- },
- 'dun010': {
- 'name': 'Dunkerton Telephone Cooperative'
- },
- 'cci010': {
- 'name': 'Duo County Telecom'
- },
- 'eagle': {
- 'name': 'Eagle Communications'
- },
- 'weh010-east': {
- 'name': 'East Arkansas Cable TV'
- },
- 'eatel': {
- 'name': 'EATEL Video, LLC'
- },
- 'ell010': {
- 'name': 'ECTA'
- },
- 'emerytelcom': {
- 'name': 'Emery Telcom Video LLC'
- },
- 'nor200': {
- 'name': 'Empire Access'
- },
- 'endeavor': {
- 'name': 'Endeavor Communications'
- },
- 'sun045': {
- 'name': 'Enhanced Telecommunications Corporation'
- },
- 'mid030': {
- 'name': 'enTouch'
- },
- 'epb020': {
- 'name': 'EPB Smartnet'
- },
- 'jea010': {
- 'name': 'EPlus Broadband'
- },
- 'com065': {
- 'name': 'ETC'
- },
- 'ete010': {
- 'name': 'Etex Communications'
- },
- 'fbc-tele': {
- 'name': 'F&B Communications'
- },
- 'fal010': {
- 'name': 'Falcon Broadband'
- },
- 'fam010': {
- 'name': 'FamilyView CableVision'
- },
- 'far020': {
- 'name': 'Farmers Mutual Telephone Company'
- },
- 'fay010': {
- 'name': 'Fayetteville Public Utilities'
- },
- 'sal060': {
- 'name': 'fibrant'
- },
- 'fid010': {
- 'name': 'Fidelity Communications'
- },
- 'for030': {
- 'name': 'FJ Communications'
- },
- 'fli020': {
- 'name': 'Flint River Communications'
- },
- 'far030': {
- 'name': 'FMT - Jesup'
- },
- 'foo010': {
- 'name': 'Foothills Communications'
- },
- 'for080': {
- 'name': 'Forsyth CableNet'
- },
- 'fbcomm': {
- 'name': 'Frankfort Plant Board'
- },
- 'tel160-fra': {
- 'name': 'Franklin Telephone Company'
- },
- 'nttcftc010': {
- 'name': 'FTC'
- },
- 'fullchannel': {
- 'name': 'Full Channel, Inc.'
- },
- 'gar040': {
- 'name': 'Gardonville Cooperative Telephone Association'
- },
- 'gbt010': {
- 'name': 'GBT Communications, Inc.'
- },
- 'tec010': {
- 'name': 'Genuine Telecom'
- },
- 'clr010': {
- 'name': 'Giant Communications'
- },
- 'gla010': {
- 'name': 'Glasgow EPB'
- },
- 'gle010': {
- 'name': 'Glenwood Telecommunications'
- },
- 'gra060': {
- 'name': 'GLW Broadband Inc.'
- },
- 'goldenwest': {
- 'name': 'Golden West Cablevision'
- },
- 'vis030': {
- 'name': 'Grantsburg Telcom'
- },
- 'gpcom': {
- 'name': 'Great Plains Communications'
- },
- 'gri010': {
- 'name': 'Gridley Cable Inc'
- },
- 'hbc010': {
- 'name': 'H&B Cable Services'
- },
- 'hae010': {
- 'name': 'Haefele TV Inc.'
- },
- 'htc010': {
- 'name': 'Halstad Telephone Company'
- },
- 'har005': {
- 'name': 'Harlan Municipal Utilities'
- },
- 'har020': {
- 'name': 'Hart Communications'
- },
- 'ced010': {
- 'name': 'Hartelco TV'
- },
- 'hea040': {
- 'name': 'Heart of Iowa Communications Cooperative'
- },
- 'htc020': {
- 'name': 'Hickory Telephone Company'
- },
- 'nttchig010': {
- 'name': 'Highland Communication Services'
- },
- 'hig030': {
- 'name': 'Highland Media'
- },
- 'spc010': {
- 'name': 'Hilliary Communications'
- },
- 'hin020': {
- 'name': 'Hinton CATV Co.'
- },
- 'hometel': {
- 'name': 'HomeTel Entertainment, Inc.'
- },
- 'hoodcanal': {
- 'name': 'Hood Canal Communications'
- },
- 'weh010-hope': {
- 'name': 'Hope - Prescott Cable TV'
- },
- 'horizoncable': {
- 'name': 'Horizon Cable TV, Inc.'
- },
- 'hor040': {
- 'name': 'Horizon Chillicothe Telephone'
- },
- 'htc030': {
- 'name': 'HTC Communications Co. - IL'
- },
- 'htccomm': {
- 'name': 'HTC Communications, Inc. - IA'
- },
- 'wal005': {
- 'name': 'Huxley Communications'
- },
- 'imon': {
- 'name': 'ImOn Communications'
- },
- 'ind040': {
- 'name': 'Independence Telecommunications'
- },
- 'rrc010': {
- 'name': 'Inland Networks'
- },
- 'stc020': {
- 'name': 'Innovative Cable TV St Croix'
- },
- 'car100': {
- 'name': 'Innovative Cable TV St Thomas-St John'
- },
- 'icc010': {
- 'name': 'Inside Connect Cable'
- },
- 'int100': {
- 'name': 'Integra Telecom'
- },
- 'int050': {
- 'name': 'Interstate Telecommunications Coop'
- },
- 'irv010': {
- 'name': 'Irvine Cable'
- },
- 'k2c010': {
- 'name': 'K2 Communications'
- },
- 'kal010': {
- 'name': 'Kalida Telephone Company, Inc.'
- },
- 'kal030': {
- 'name': 'Kalona Cooperative Telephone Company'
- },
- 'kmt010': {
- 'name': 'KMTelecom'
- },
- 'kpu010': {
- 'name': 'KPU Telecommunications'
- },
- 'kuh010': {
- 'name': 'Kuhn Communications, Inc.'
- },
- 'lak130': {
- 'name': 'Lakeland Communications'
- },
- 'lan010': {
- 'name': 'Langco'
- },
- 'lau020': {
- 'name': 'Laurel Highland Total Communications, Inc.'
- },
- 'leh010': {
- 'name': 'Lehigh Valley Cooperative Telephone'
- },
- 'bra010': {
- 'name': 'Limestone Cable/Bracken Cable'
- },
- 'loc020': {
- 'name': 'LISCO'
- },
- 'lit020': {
- 'name': 'Litestream'
- },
- 'tel140': {
- 'name': 'LivCom'
- },
- 'loc010': {
- 'name': 'LocalTel Communications'
- },
- 'weh010-longview': {
- 'name': 'Longview - Kilgore Cable TV'
- },
- 'lon030': {
- 'name': 'Lonsdale Video Ventures, LLC'
- },
- 'lns010': {
- 'name': 'Lost Nation-Elwood Telephone Co.'
- },
- 'nttclpc010': {
- 'name': 'LPC Connect'
- },
- 'lumos': {
- 'name': 'Lumos Networks'
- },
- 'madison': {
- 'name': 'Madison Communications'
- },
- 'mad030': {
- 'name': 'Madison County Cable Inc.'
- },
- 'nttcmah010': {
- 'name': 'Mahaska Communication Group'
- },
- 'mar010': {
- 'name': 'Marne & Elk Horn Telephone Company'
- },
- 'mcc040': {
- 'name': 'McClure Telephone Co.'
- },
- 'mctv': {
- 'name': 'MCTV'
- },
- 'merrimac': {
- 'name': 'Merrimac Communications Ltd.'
- },
- 'metronet': {
- 'name': 'Metronet'
- },
- 'mhtc': {
- 'name': 'MHTC'
- },
- 'midhudson': {
- 'name': 'Mid-Hudson Cable'
- },
- 'midrivers': {
- 'name': 'Mid-Rivers Communications'
- },
- 'mid045': {
- 'name': 'Midstate Communications'
- },
- 'mil080': {
- 'name': 'Milford Communications'
- },
- 'min030': {
- 'name': 'MINET'
- },
- 'nttcmin010': {
- 'name': 'Minford TV'
- },
- 'san040-02': {
- 'name': 'Mitchell Telecom'
- },
- 'mlg010': {
- 'name': 'MLGC'
- },
- 'mon060': {
- 'name': 'Mon-Cre TVE'
- },
- 'mou110': {
- 'name': 'Mountain Telephone'
- },
- 'mou050': {
- 'name': 'Mountain Village Cable'
- },
- 'mtacomm': {
- 'name': 'MTA Communications, LLC'
- },
- 'mtc010': {
- 'name': 'MTC Cable'
- },
- 'med040': {
- 'name': 'MTC Technologies'
- },
- 'man060': {
- 'name': 'MTCC'
- },
- 'mtc030': {
- 'name': 'MTCO Communications'
- },
- 'mul050': {
- 'name': 'Mulberry Telecommunications'
- },
- 'mur010': {
- 'name': 'Murray Electric System'
- },
- 'musfiber': {
- 'name': 'MUS FiberNET'
- },
- 'mpw': {
- 'name': 'Muscatine Power & Water'
- },
- 'nttcsli010': {
- 'name': 'myEVTV.com'
- },
- 'nor115': {
- 'name': 'NCC'
- },
- 'nor260': {
- 'name': 'NDTC'
- },
- 'nctc': {
- 'name': 'Nebraska Central Telecom, Inc.'
- },
- 'nel020': {
- 'name': 'Nelsonville TV Cable'
- },
- 'nem010': {
- 'name': 'Nemont'
- },
- 'new075': {
- 'name': 'New Hope Telephone Cooperative'
- },
- 'nor240': {
- 'name': 'NICP'
- },
- 'cic010': {
- 'name': 'NineStar Connect'
- },
- 'nktelco': {
- 'name': 'NKTelco'
- },
- 'nortex': {
- 'name': 'Nortex Communications'
- },
- 'nor140': {
- 'name': 'North Central Telephone Cooperative'
- },
- 'nor030': {
- 'name': 'Northland Communications'
- },
- 'nor075': {
- 'name': 'Northwest Communications'
- },
- 'nor125': {
- 'name': 'Norwood Light Broadband'
- },
- 'net010': {
- 'name': 'Nsight Telservices'
- },
- 'dur010': {
- 'name': 'Ntec'
- },
- 'nts010': {
- 'name': 'NTS Communications'
- },
- 'new045': {
- 'name': 'NU-Telecom'
- },
- 'nulink': {
- 'name': 'NuLink'
- },
- 'jam030': {
- 'name': 'NVC'
- },
- 'far035': {
- 'name': 'OmniTel Communications'
- },
- 'onesource': {
- 'name': 'OneSource Communications'
- },
- 'cit230': {
- 'name': 'Opelika Power Services'
- },
- 'daltonutilities': {
- 'name': 'OptiLink'
- },
- 'mid140': {
- 'name': 'OPTURA'
- },
- 'ote010': {
- 'name': 'OTEC Communication Company'
- },
- 'cci020': {
- 'name': 'Packerland Broadband'
- },
- 'pan010': {
- 'name': 'Panora Telco/Guthrie Center Communications'
- },
- 'otter': {
- 'name': 'Park Region Telephone & Otter Tail Telcom'
- },
- 'mid050': {
- 'name': 'Partner Communications Cooperative'
- },
- 'fib010': {
- 'name': 'Pathway'
- },
- 'paulbunyan': {
- 'name': 'Paul Bunyan Communications'
- },
- 'pem020': {
- 'name': 'Pembroke Telephone Company'
- },
- 'mck010': {
- 'name': 'Peoples Rural Telephone Cooperative'
- },
- 'pul010': {
- 'name': 'PES Energize'
- },
- 'phi010': {
- 'name': 'Philippi Communications System'
- },
- 'phonoscope': {
- 'name': 'Phonoscope Cable'
- },
- 'pin070': {
- 'name': 'Pine Belt Communications, Inc.'
- },
- 'weh010-pine': {
- 'name': 'Pine Bluff Cable TV'
- },
- 'pin060': {
- 'name': 'Pineland Telephone Cooperative'
- },
- 'cam010': {
- 'name': 'Pinpoint Communications'
- },
- 'pio060': {
- 'name': 'Pioneer Broadband'
- },
- 'pioncomm': {
- 'name': 'Pioneer Communications'
- },
- 'pioneer': {
- 'name': 'Pioneer DTV'
- },
- 'pla020': {
- 'name': 'Plant TiftNet, Inc.'
- },
- 'par010': {
- 'name': 'PLWC'
- },
- 'pro035': {
- 'name': 'PMT'
- },
- 'vik011': {
- 'name': 'Polar Cablevision'
- },
- 'pottawatomie': {
- 'name': 'Pottawatomie Telephone Co.'
- },
- 'premiercomm': {
- 'name': 'Premier Communications'
- },
- 'psc010': {
- 'name': 'PSC'
- },
- 'pan020': {
- 'name': 'PTCI'
- },
- 'qco010': {
- 'name': 'QCOL'
- },
- 'qua010': {
- 'name': 'Quality Cablevision'
- },
- 'rad010': {
- 'name': 'Radcliffe Telephone Company'
- },
- 'car040': {
- 'name': 'Rainbow Communications'
- },
- 'rai030': {
- 'name': 'Rainier Connect'
- },
- 'ral010': {
- 'name': 'Ralls Technologies'
- },
- 'rct010': {
- 'name': 'RC Technologies'
- },
- 'red040': {
- 'name': 'Red River Communications'
- },
- 'ree010': {
- 'name': 'Reedsburg Utility Commission'
- },
- 'mol010': {
- 'name': 'Reliance Connects- Oregon'
- },
- 'res020': {
- 'name': 'Reserve Telecommunications'
- },
- 'weh010-resort': {
- 'name': 'Resort TV Cable'
- },
- 'rld010': {
- 'name': 'Richland Grant Telephone Cooperative, Inc.'
- },
- 'riv030': {
- 'name': 'River Valley Telecommunications Coop'
- },
- 'rockportcable': {
- 'name': 'Rock Port Cablevision'
- },
- 'rsf010': {
- 'name': 'RS Fiber'
- },
- 'rtc': {
- 'name': 'RTC Communication Corp'
- },
- 'res040': {
- 'name': 'RTC-Reservation Telephone Coop.'
- },
- 'rte010': {
- 'name': 'RTEC Communications'
- },
- 'stc010': {
- 'name': 'S&T'
- },
- 'san020': {
- 'name': 'San Bruno Cable TV'
- },
- 'san040-01': {
- 'name': 'Santel'
- },
- 'sav010': {
- 'name': 'SCI Broadband-Savage Communications Inc.'
- },
- 'sco050': {
- 'name': 'Scottsboro Electric Power Board'
- },
- 'scr010': {
- 'name': 'Scranton Telephone Company'
- },
- 'selco': {
- 'name': 'SELCO'
- },
- 'she010': {
- 'name': 'Shentel'
- },
- 'she030': {
- 'name': 'Sherwood Mutual Telephone Association, Inc.'
- },
- 'ind060-ssc': {
- 'name': 'Silver Star Communications'
- },
- 'sjoberg': {
- 'name': 'Sjoberg\'s Inc.'
- },
- 'sou025': {
- 'name': 'SKT'
- },
- 'sky050': {
- 'name': 'SkyBest TV'
- },
- 'nttcsmi010': {
- 'name': 'Smithville Communications'
- },
- 'woo010': {
- 'name': 'Solarus'
- },
- 'sou075': {
- 'name': 'South Central Rural Telephone Cooperative'
- },
- 'sou065': {
- 'name': 'South Holt Cablevision, Inc.'
- },
- 'sou035': {
- 'name': 'South Slope Cooperative Communications'
- },
- 'spa020': {
- 'name': 'Spanish Fork Community Network'
- },
- 'spe010': {
- 'name': 'Spencer Municipal Utilities'
- },
- 'spi005': {
- 'name': 'Spillway Communications, Inc.'
- },
- 'srt010': {
- 'name': 'SRT'
- },
- 'cccsmc010': {
- 'name': 'St. Maarten Cable TV'
- },
- 'sta025': {
- 'name': 'Star Communications'
- },
- 'sco020': {
- 'name': 'STE'
- },
- 'uin010': {
- 'name': 'STRATA Networks'
- },
- 'sum010': {
- 'name': 'Sumner Cable TV'
- },
- 'pie010': {
- 'name': 'Surry TV/PCSI TV'
- },
- 'swa010': {
- 'name': 'Swayzee Communications'
- },
- 'sweetwater': {
- 'name': 'Sweetwater Cable Television Co'
- },
- 'weh010-talequah': {
- 'name': 'Tahlequah Cable TV'
- },
- 'tct': {
- 'name': 'TCT'
- },
- 'tel050': {
- 'name': 'Tele-Media Company'
- },
- 'com050': {
- 'name': 'The Community Agency'
- },
- 'thr020': {
- 'name': 'Three River'
- },
- 'cab140': {
- 'name': 'Town & Country Technologies'
- },
- 'tra010': {
- 'name': 'Trans-Video'
- },
- 'tre010': {
- 'name': 'Trenton TV Cable Company'
- },
- 'tcc': {
- 'name': 'Tri County Communications Cooperative'
- },
- 'tri025': {
- 'name': 'TriCounty Telecom'
- },
- 'tri110': {
- 'name': 'TrioTel Communications, Inc.'
- },
- 'tro010': {
- 'name': 'Troy Cablevision, Inc.'
- },
- 'tsc': {
- 'name': 'TSC'
- },
- 'cit220': {
- 'name': 'Tullahoma Utilities Board'
- },
- 'tvc030': {
- 'name': 'TV Cable of Rensselaer'
- },
- 'tvc015': {
- 'name': 'TVC Cable'
- },
- 'cab180': {
- 'name': 'TVision'
- },
- 'twi040': {
- 'name': 'Twin Lakes'
- },
- 'tvtinc': {
- 'name': 'Twin Valley'
- },
- 'uis010': {
- 'name': 'Union Telephone Company'
- },
- 'uni110': {
- 'name': 'United Communications - TN'
- },
- 'uni120': {
- 'name': 'United Services'
- },
- 'uss020': {
- 'name': 'US Sonet'
- },
- 'cab060': {
- 'name': 'USA Communications'
- },
- 'she005': {
- 'name': 'USA Communications/Shellsburg, IA'
- },
- 'val040': {
- 'name': 'Valley TeleCom Group'
- },
- 'val025': {
- 'name': 'Valley Telecommunications'
- },
- 'val030': {
- 'name': 'Valparaiso Broadband'
- },
- 'cla050': {
- 'name': 'Vast Broadband'
- },
- 'sul015': {
- 'name': 'Venture Communications Cooperative, Inc.'
- },
- 'ver025': {
- 'name': 'Vernon Communications Co-op'
- },
- 'weh010-vicksburg': {
- 'name': 'Vicksburg Video'
- },
- 'vis070': {
- 'name': 'Vision Communications'
- },
- 'volcanotel': {
- 'name': 'Volcano Vision, Inc.'
- },
- 'vol040-02': {
- 'name': 'VolFirst / BLTV'
- },
- 'ver070': {
- 'name': 'VTel'
- },
- 'nttcvtx010': {
- 'name': 'VTX1'
- },
- 'bci010-02': {
- 'name': 'Vyve Broadband'
- },
- 'wab020': {
- 'name': 'Wabash Mutual Telephone'
- },
- 'waitsfield': {
- 'name': 'Waitsfield Cable'
- },
- 'wal010': {
- 'name': 'Walnut Communications'
- },
- 'wavebroadband': {
- 'name': 'Wave'
- },
- 'wav030': {
- 'name': 'Waverly Communications Utility'
- },
- 'wbi010': {
- 'name': 'WBI'
- },
- 'web020': {
- 'name': 'Webster-Calhoun Cooperative Telephone Association'
- },
- 'wes005': {
- 'name': 'West Alabama TV Cable'
- },
- 'carolinata': {
- 'name': 'West Carolina Communications'
- },
- 'wct010': {
- 'name': 'West Central Telephone Association'
- },
- 'wes110': {
- 'name': 'West River Cooperative Telephone Company'
- },
- 'ani030': {
- 'name': 'WesTel Systems'
- },
- 'westianet': {
- 'name': 'Western Iowa Networks'
- },
- 'nttcwhi010': {
- 'name': 'Whidbey Telecom'
- },
- 'weh010-white': {
- 'name': 'White County Cable TV'
- },
- 'wes130': {
- 'name': 'Wiatel'
- },
- 'wik010': {
- 'name': 'Wiktel'
- },
- 'wil070': {
- 'name': 'Wilkes Communications, Inc./RiverStreet Networks'
- },
- 'wil015': {
- 'name': 'Wilson Communications'
- },
- 'win010': {
- 'name': 'Windomnet/SMBS'
- },
- 'win090': {
- 'name': 'Windstream Cable TV'
- },
- 'wcta': {
- 'name': 'Winnebago Cooperative Telecom Association'
- },
- 'wtc010': {
- 'name': 'WTC'
- },
- 'wil040': {
- 'name': 'WTC Communications, Inc.'
- },
- 'wya010': {
- 'name': 'Wyandotte Cable'
- },
- 'hin020-02': {
- 'name': 'X-Stream Services'
- },
- 'xit010': {
- 'name': 'XIT Communications'
- },
- 'yel010': {
- 'name': 'Yelcot Communications'
- },
- 'mid180-01': {
- 'name': 'yondoo'
- },
- 'cou060': {
- 'name': 'Zito Media'
- },
-}
-
-
-class AdobePassIE(InfoExtractor):
- _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
- _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
- _MVPD_CACHE = 'ap-mvpd'
-
- _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
-
- def _download_webpage_handle(self, *args, **kwargs):
- headers = kwargs.get('headers', {})
- headers.update(self.geo_verification_headers())
- kwargs['headers'] = headers
- return super(AdobePassIE, self)._download_webpage_handle(
- *args, **compat_kwargs(kwargs))
-
- @staticmethod
- def _get_mvpd_resource(provider_id, title, guid, rating):
- channel = etree.Element('channel')
- channel_title = etree.SubElement(channel, 'title')
- channel_title.text = provider_id
- item = etree.SubElement(channel, 'item')
- resource_title = etree.SubElement(item, 'title')
- resource_title.text = title
- resource_guid = etree.SubElement(item, 'guid')
- resource_guid.text = guid
- resource_rating = etree.SubElement(item, 'media:rating')
- resource_rating.attrib = {'scheme': 'urn:v-chip'}
- resource_rating.text = rating
- return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>'
-
- def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
- def xml_text(xml_str, tag):
- return self._search_regex(
- '<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
-
- def is_expired(token, date_ele):
- token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele)))
- return token_expires and token_expires <= int(time.time())
-
- def post_form(form_page_res, note, data={}):
- form_page, urlh = form_page_res
- post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
- if not re.match(r'https?://', post_url):
- post_url = compat_urlparse.urljoin(urlh.geturl(), post_url)
- form_data = self._hidden_inputs(form_page)
- form_data.update(data)
- return self._download_webpage_handle(
- post_url, video_id, note, data=urlencode_postdata(form_data), headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- })
-
- def raise_mvpd_required():
- raise ExtractorError(
- 'This video is only available for users of participating TV providers. '
- 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier '
- 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True)
-
- def extract_redirect_url(html, url=None, fatal=False):
- # TODO: eliminate code duplication with generic extractor and move
- # redirection code into _download_webpage_handle
- REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
- redirect_url = self._search_regex(
- r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
- r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
- html, 'meta refresh redirect',
- default=NO_DEFAULT if fatal else None, fatal=fatal)
- if not redirect_url:
- return None
- if url:
- redirect_url = compat_urlparse.urljoin(url, unescapeHTML(redirect_url))
- return redirect_url
-
- mvpd_headers = {
- 'ap_42': 'anonymous',
- 'ap_11': 'Linux i686',
- 'ap_z': self._USER_AGENT,
- 'User-Agent': self._USER_AGENT,
- }
-
- guid = xml_text(resource, 'guid') if '<' in resource else resource
- count = 0
- while count < 2:
- requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {}
- authn_token = requestor_info.get('authn_token')
- if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
- authn_token = None
- if not authn_token:
- # TODO add support for other TV Providers
- mso_id = self._downloader.params.get('ap_mso')
- if not mso_id:
- raise_mvpd_required()
- username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
- if not username or not password:
- raise_mvpd_required()
- mso_info = MSO_INFO[mso_id]
-
- provider_redirect_page_res = self._download_webpage_handle(
- self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
- 'Downloading Provider Redirect Page', query={
- 'noflash': 'true',
- 'mso_id': mso_id,
- 'requestor_id': requestor_id,
- 'no_iframe': 'false',
- 'domain_name': 'adobe.com',
- 'redirect_url': url,
- })
-
- if mso_id == 'Comcast_SSO':
- # Comcast page flow varies by video site and whether you
- # are on Comcast's network.
- provider_redirect_page, urlh = provider_redirect_page_res
- if 'automatically signing you in' in provider_redirect_page:
- oauth_redirect_url = self._html_search_regex(
- r'window\.location\s*=\s*[\'"]([^\'"]+)',
- provider_redirect_page, 'oauth redirect')
- self._download_webpage(
- oauth_redirect_url, video_id, 'Confirming auto login')
- else:
- if '<form name="signin"' in provider_redirect_page:
- provider_login_page_res = provider_redirect_page_res
- elif 'http-equiv="refresh"' in provider_redirect_page:
- oauth_redirect_url = extract_redirect_url(
- provider_redirect_page, fatal=True)
- provider_login_page_res = self._download_webpage_handle(
- oauth_redirect_url, video_id,
- self._DOWNLOADING_LOGIN_PAGE)
- else:
- provider_login_page_res = post_form(
- provider_redirect_page_res,
- self._DOWNLOADING_LOGIN_PAGE)
-
- mvpd_confirm_page_res = post_form(
- provider_login_page_res, 'Logging in', {
- mso_info['username_field']: username,
- mso_info['password_field']: password,
- })
- mvpd_confirm_page, urlh = mvpd_confirm_page_res
- if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
- post_form(mvpd_confirm_page_res, 'Confirming Login')
- elif mso_id == 'Verizon':
- # In general, if you're connecting from a Verizon-assigned IP,
- # you will not actually pass your credentials.
- provider_redirect_page, urlh = provider_redirect_page_res
- if 'Please wait ...' in provider_redirect_page:
- saml_redirect_url = self._html_search_regex(
- r'self\.parent\.location=(["\'])(?P<url>.+?)\1',
- provider_redirect_page,
- 'SAML Redirect URL', group='url')
- saml_login_page = self._download_webpage(
- saml_redirect_url, video_id,
- 'Downloading SAML Login Page')
- else:
- saml_login_page_res = post_form(
- provider_redirect_page_res, 'Logging in', {
- mso_info['username_field']: username,
- mso_info['password_field']: password,
- })
- saml_login_page, urlh = saml_login_page_res
- if 'Please try again.' in saml_login_page:
- raise ExtractorError(
- 'We\'re sorry, but either the User ID or Password entered is not correct.')
- saml_login_url = self._search_regex(
- r'xmlHttp\.open\("POST"\s*,\s*(["\'])(?P<url>.+?)\1',
- saml_login_page, 'SAML Login URL', group='url')
- saml_response_json = self._download_json(
- saml_login_url, video_id, 'Downloading SAML Response',
- headers={'Content-Type': 'text/xml'})
- self._download_webpage(
- saml_response_json['targetValue'], video_id,
- 'Confirming Login', data=urlencode_postdata({
- 'SAMLResponse': saml_response_json['SAMLResponse'],
- 'RelayState': saml_response_json['RelayState']
- }), headers={
- 'Content-Type': 'application/x-www-form-urlencoded'
- })
- else:
- # Some providers (e.g. DIRECTV NOW) have another meta refresh
- # based redirect that should be followed.
- provider_redirect_page, urlh = provider_redirect_page_res
- provider_refresh_redirect_url = extract_redirect_url(
- provider_redirect_page, url=urlh.geturl())
- if provider_refresh_redirect_url:
- provider_redirect_page_res = self._download_webpage_handle(
- provider_refresh_redirect_url, video_id,
- 'Downloading Provider Redirect Page (meta refresh)')
- provider_login_page_res = post_form(
- provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
- mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
- mso_info.get('username_field', 'username'): username,
- mso_info.get('password_field', 'password'): password,
- })
- if mso_id != 'Rogers':
- post_form(mvpd_confirm_page_res, 'Confirming Login')
-
- session = self._download_webpage(
- self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
- 'Retrieving Session', data=urlencode_postdata({
- '_method': 'GET',
- 'requestor_id': requestor_id,
- }), headers=mvpd_headers)
- if '<pendingLogout' in session:
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
- count += 1
- continue
- authn_token = unescapeHTML(xml_text(session, 'authnToken'))
- requestor_info['authn_token'] = authn_token
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
-
- authz_token = requestor_info.get(guid)
- if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
- authz_token = None
- if not authz_token:
- authorize = self._download_webpage(
- self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
- 'Retrieving Authorization Token', data=urlencode_postdata({
- 'resource_id': resource,
- 'requestor_id': requestor_id,
- 'authentication_token': authn_token,
- 'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
- 'userMeta': '1',
- }), headers=mvpd_headers)
- if '<pendingLogout' in authorize:
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
- count += 1
- continue
- if '<error' in authorize:
- raise ExtractorError(xml_text(authorize, 'details'), expected=True)
- authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
- requestor_info[guid] = authz_token
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
-
- mvpd_headers.update({
- 'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
- 'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
- })
-
- short_authorize = self._download_webpage(
- self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
- video_id, 'Retrieving Media Token', data=urlencode_postdata({
- 'authz_token': authz_token,
- 'requestor_id': requestor_id,
- 'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
- 'hashed_guid': 'false',
- }), headers=mvpd_headers)
- if '<pendingLogout' in short_authorize:
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
- count += 1
- continue
- return short_authorize
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
deleted file mode 100644
index 5d4db54..0000000
--- a/youtube_dl/extractor/common.py
+++ /dev/null
@@ -1,2862 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import base64
-import datetime
-import hashlib
-import json
-import netrc
-import os
-import random
-import re
-import socket
-import sys
-import time
-import math
-
-from ..compat import (
- compat_cookiejar,
- compat_cookies,
- compat_etree_fromstring,
- compat_getpass,
- compat_integer_types,
- compat_http_client,
- compat_os_name,
- compat_str,
- compat_urllib_error,
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlencode,
- compat_urllib_request,
- compat_urlparse,
- compat_xml_parse_error,
-)
-from ..downloader.f4m import (
- get_base_url,
- remove_encrypted_media,
-)
-from ..utils import (
- NO_DEFAULT,
- age_restricted,
- base_url,
- bug_reports_message,
- clean_html,
- compiled_regex_type,
- determine_ext,
- determine_protocol,
- error_to_compat_str,
- ExtractorError,
- extract_attributes,
- fix_xml_ampersands,
- float_or_none,
- GeoRestrictedError,
- GeoUtils,
- int_or_none,
- js_to_json,
- JSON_LD_RE,
- mimetype2ext,
- orderedSet,
- parse_codecs,
- parse_duration,
- parse_iso8601,
- parse_m3u8_attributes,
- RegexNotFoundError,
- sanitized_Request,
- sanitize_filename,
- unescapeHTML,
- unified_strdate,
- unified_timestamp,
- update_Request,
- update_url_query,
- urljoin,
- url_basename,
- xpath_element,
- xpath_text,
- xpath_with_ns,
-)
-
-
-class InfoExtractor(object):
- """Information Extractor class.
-
- Information extractors are the classes that, given a URL, extract
- information about the video (or videos) the URL refers to. This
- information includes the real video URL, the video title, author and
- others. The information is stored in a dictionary which is then
- passed to the YoutubeDL. The YoutubeDL processes this
- information possibly downloading the video to the file system, among
- other possible outcomes.
-
- The type field determines the type of the result.
- By far the most common value (and the default if _type is missing) is
- "video", which indicates a single video.
-
- For a video, the dictionaries must include the following fields:
-
- id: Video identifier.
- title: Video title, unescaped.
-
- Additionally, it must contain either a formats entry or a url one:
-
- formats: A list of dictionaries for each format available, ordered
- from worst to best quality.
-
- Potential fields:
- * url Mandatory. The URL of the video file
- * manifest_url
- The URL of the manifest file in case of
- fragmented media (DASH, hls, hds)
- * ext Will be calculated from URL if missing
- * format A human-readable description of the format
- ("mp4 container with h264/opus").
- Calculated from the format_id, width, height.
- and format_note fields if missing.
- * format_id A short description of the format
- ("mp4_h264_opus" or "19").
- Technically optional, but strongly recommended.
- * format_note Additional info about the format
- ("3D" or "DASH video")
- * width Width of the video, if known
- * height Height of the video, if known
- * resolution Textual description of width and height
- * tbr Average bitrate of audio and video in KBit/s
- * abr Average audio bitrate in KBit/s
- * acodec Name of the audio codec in use
- * asr Audio sampling rate in Hertz
- * vbr Average video bitrate in KBit/s
- * fps Frame rate
- * vcodec Name of the video codec in use
- * container Name of the container format
- * filesize The number of bytes, if known in advance
- * filesize_approx An estimate for the number of bytes
- * player_url SWF Player URL (used for rtmpdump).
- * protocol The protocol that will be used for the actual
- download, lower-case.
- "http", "https", "rtsp", "rtmp", "rtmpe",
- "m3u8", "m3u8_native" or "http_dash_segments".
- * fragment_base_url
- Base URL for fragments. Each fragment's path
- value (if present) will be relative to
- this URL.
- * fragments A list of fragments of a fragmented media.
- Each fragment entry must contain either an url
- or a path. If an url is present it should be
- considered by a client. Otherwise both path and
- fragment_base_url must be present. Here is
- the list of all potential fields:
- * "url" - fragment's URL
- * "path" - fragment's path relative to
- fragment_base_url
- * "duration" (optional, int or float)
- * "filesize" (optional, int)
- * preference Order number of this format. If this field is
- present and not None, the formats get sorted
- by this field, regardless of all other values.
- -1 for default (order by other properties),
- -2 or smaller for less than default.
- < -1000 to hide the format (if there is
- another one which is strictly better)
- * language Language code, e.g. "de" or "en-US".
- * language_preference Is this in the language mentioned in
- the URL?
- 10 if it's what the URL is about,
- -1 for default (don't know),
- -10 otherwise, other values reserved for now.
- * quality Order number of the video quality of this
- format, irrespective of the file format.
- -1 for default (order by other properties),
- -2 or smaller for less than default.
- * source_preference Order number for this video source
- (quality takes higher priority)
- -1 for default (order by other properties),
- -2 or smaller for less than default.
- * http_headers A dictionary of additional HTTP headers
- to add to the request.
- * stretched_ratio If given and not 1, indicates that the
- video's pixels are not square.
- width : height ratio as float.
- * no_resume The server does not support resuming the
- (HTTP or RTMP) download. Boolean.
- * downloader_options A dictionary of downloader options as
- described in FileDownloader
-
- url: Final video URL.
- ext: Video filename extension.
- format: The video format, defaults to ext (used for --get-format)
- player_url: SWF Player URL (used for rtmpdump).
-
- The following fields are optional:
-
- alt_title: A secondary title of the video.
- display_id An alternative identifier for the video, not necessarily
- unique, but available before title. Typically, id is
- something like "4234987", title "Dancing naked mole rats",
- and display_id "dancing-naked-mole-rats"
- thumbnails: A list of dictionaries, with the following entries:
- * "id" (optional, string) - Thumbnail format ID
- * "url"
- * "preference" (optional, int) - quality of the image
- * "width" (optional, int)
- * "height" (optional, int)
- * "resolution" (optional, string "{width}x{height"},
- deprecated)
- * "filesize" (optional, int)
- thumbnail: Full URL to a video thumbnail image.
- description: Full video description.
- uploader: Full name of the video uploader.
- license: License name the video is licensed under.
- creator: The creator of the video.
- release_date: The date (YYYYMMDD) when the video was released.
- timestamp: UNIX timestamp of the moment the video became available.
- upload_date: Video upload date (YYYYMMDD).
- If not explicitly set, calculated from timestamp.
- uploader_id: Nickname or id of the video uploader.
- uploader_url: Full URL to a personal webpage of the video uploader.
- location: Physical location where the video was filmed.
- subtitles: The available subtitles as a dictionary in the format
- {tag: subformats}. "tag" is usually a language code, and
- "subformats" is a list sorted from lower to higher
- preference, each element is a dictionary with the "ext"
- entry and one of:
- * "data": The subtitles file contents
- * "url": A URL pointing to the subtitles file
- "ext" will be calculated from URL if missing
- automatic_captions: Like 'subtitles', used by the YoutubeIE for
- automatically generated captions
- duration: Length of the video in seconds, as an integer or float.
- view_count: How many users have watched the video on the platform.
- like_count: Number of positive ratings of the video
- dislike_count: Number of negative ratings of the video
- repost_count: Number of reposts of the video
- average_rating: Average rating give by users, the scale used depends on the webpage
- comment_count: Number of comments on the video
- comments: A list of comments, each with one or more of the following
- properties (all but one of text or html optional):
- * "author" - human-readable name of the comment author
- * "author_id" - user ID of the comment author
- * "id" - Comment ID
- * "html" - Comment as HTML
- * "text" - Plain text of the comment
- * "timestamp" - UNIX timestamp of comment
- * "parent" - ID of the comment this one is replying to.
- Set to "root" to indicate that this is a
- comment to the original video.
- age_limit: Age restriction for the video, as an integer (years)
- webpage_url: The URL to the video webpage, if given to youtube-dl it
- should allow to get the same result again. (It will be set
- by YoutubeDL if it's missing)
- categories: A list of categories that the video falls in, for example
- ["Sports", "Berlin"]
- tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"]
- is_live: True, False, or None (=unknown). Whether this video is a
- live stream that goes on instead of a fixed-length video.
- start_time: Time in seconds where the reproduction should start, as
- specified in the URL.
- end_time: Time in seconds where the reproduction should end, as
- specified in the URL.
- chapters: A list of dictionaries, with the following entries:
- * "start_time" - The start time of the chapter in seconds
- * "end_time" - The end time of the chapter in seconds
- * "title" (optional, string)
-
- The following fields should only be used when the video belongs to some logical
- chapter or section:
-
- chapter: Name or title of the chapter the video belongs to.
- chapter_number: Number of the chapter the video belongs to, as an integer.
- chapter_id: Id of the chapter the video belongs to, as a unicode string.
-
- The following fields should only be used when the video is an episode of some
- series, programme or podcast:
-
- series: Title of the series or programme the video episode belongs to.
- season: Title of the season the video episode belongs to.
- season_number: Number of the season the video episode belongs to, as an integer.
- season_id: Id of the season the video episode belongs to, as a unicode string.
- episode: Title of the video episode. Unlike mandatory video title field,
- this field should denote the exact title of the video episode
- without any kind of decoration.
- episode_number: Number of the video episode within a season, as an integer.
- episode_id: Id of the video episode, as a unicode string.
-
- The following fields should only be used when the media is a track or a part of
- a music album:
-
- track: Title of the track.
- track_number: Number of the track within an album or a disc, as an integer.
- track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
- as a unicode string.
- artist: Artist(s) of the track.
- genre: Genre(s) of the track.
- album: Title of the album the track belongs to.
- album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
- album_artist: List of all artists appeared on the album (e.g.
- "Ash Borer / Fell Voices" or "Various Artists", useful for splits
- and compilations).
- disc_number: Number of the disc or other physical medium the track belongs to,
- as an integer.
- release_year: Year (YYYY) when the album was released.
-
- Unless mentioned otherwise, the fields should be Unicode strings.
-
- Unless mentioned otherwise, None is equivalent to absence of information.
-
-
- _type "playlist" indicates multiple videos.
- There must be a key "entries", which is a list, an iterable, or a PagedList
- object, each element of which is a valid dictionary by this specification.
-
- Additionally, playlists can have "id", "title", "description", "uploader",
- "uploader_id", "uploader_url" attributes with the same semantics as videos
- (see above).
-
-
- _type "multi_video" indicates that there are multiple videos that
- form a single show, for examples multiple acts of an opera or TV episode.
- It must have an entries key like a playlist and contain all the keys
- required for a video at the same time.
-
-
- _type "url" indicates that the video must be extracted from another
- location, possibly by a different extractor. Its only required key is:
- "url" - the next URL to extract.
- The key "ie_key" can be set to the class name (minus the trailing "IE",
- e.g. "Youtube") if the extractor class is known in advance.
- Additionally, the dictionary may have any properties of the resolved entity
- known in advance, for example "title" if the title of the referred video is
- known ahead of time.
-
-
- _type "url_transparent" entities have the same specification as "url", but
- indicate that the given additional information is more precise than the one
- associated with the resolved URL.
- This is useful when a site employs a video service that hosts the video and
- its technical metadata, but that video service does not embed a useful
- title, description etc.
-
-
- Subclasses of this one should re-define the _real_initialize() and
- _real_extract() methods and define a _VALID_URL regexp.
- Probably, they should also be added to the list of extractors.
-
- _GEO_BYPASS attribute may be set to False in order to disable
- geo restriction bypass mechanisms for a particular extractor.
- Though it won't disable explicit geo restriction bypass based on
- country code provided with geo_bypass_country.
-
- _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
- countries for this extractor. One of these countries will be used by
- geo restriction bypass mechanism right away in order to bypass
- geo restriction, of course, if the mechanism is not disabled.
-
- _GEO_IP_BLOCKS attribute may contain a list of presumably geo unrestricted
- IP blocks in CIDR notation for this extractor. One of these IP blocks
- will be used by geo restriction bypass mechanism similarly
- to _GEO_COUNTRIES.
-
- Finally, the _WORKING attribute should be set to False for broken IEs
- in order to warn the users and skip the tests.
- """
-
- _ready = False
- _downloader = None
- _x_forwarded_for_ip = None
- _GEO_BYPASS = True
- _GEO_COUNTRIES = None
- _GEO_IP_BLOCKS = None
- _WORKING = True
-
- def __init__(self, downloader=None):
- """Constructor. Receives an optional downloader."""
- self._ready = False
- self._x_forwarded_for_ip = None
- self.set_downloader(downloader)
-
- @classmethod
- def suitable(cls, url):
- """Receives a URL and returns True if suitable for this IE."""
-
- # This does not use has/getattr intentionally - we want to know whether
- # we have cached the regexp for *this* class, whereas getattr would also
- # match the superclass
- if '_VALID_URL_RE' not in cls.__dict__:
- cls._VALID_URL_RE = re.compile(cls._VALID_URL)
- return cls._VALID_URL_RE.match(url) is not None
-
- @classmethod
- def _match_id(cls, url):
- if '_VALID_URL_RE' not in cls.__dict__:
- cls._VALID_URL_RE = re.compile(cls._VALID_URL)
- m = cls._VALID_URL_RE.match(url)
- assert m
- return compat_str(m.group('id'))
-
- @classmethod
- def working(cls):
- """Getter method for _WORKING."""
- return cls._WORKING
-
- def initialize(self):
- """Initializes an instance (authentication, etc)."""
- self._initialize_geo_bypass({
- 'countries': self._GEO_COUNTRIES,
- 'ip_blocks': self._GEO_IP_BLOCKS,
- })
- if not self._ready:
- self._real_initialize()
- self._ready = True
-
- def _initialize_geo_bypass(self, geo_bypass_context):
- """
- Initialize geo restriction bypass mechanism.
-
- This method is used to initialize geo bypass mechanism based on faking
- X-Forwarded-For HTTP header. A random country from provided country list
- is selected and a random IP belonging to this country is generated. This
- IP will be passed as X-Forwarded-For HTTP header in all subsequent
- HTTP requests.
-
- This method will be used for initial geo bypass mechanism initialization
- during the instance initialization with _GEO_COUNTRIES and
- _GEO_IP_BLOCKS.
-
- You may also manually call it from extractor's code if geo bypass
- information is not available beforehand (e.g. obtained during
- extraction) or due to some other reason. In this case you should pass
- this information in geo bypass context passed as first argument. It may
- contain following fields:
-
- countries: List of geo unrestricted countries (similar
- to _GEO_COUNTRIES)
- ip_blocks: List of geo unrestricted IP blocks in CIDR notation
- (similar to _GEO_IP_BLOCKS)
-
- """
- if not self._x_forwarded_for_ip:
-
- # Geo bypass mechanism is explicitly disabled by user
- if not self._downloader.params.get('geo_bypass', True):
- return
-
- if not geo_bypass_context:
- geo_bypass_context = {}
-
- # Backward compatibility: previously _initialize_geo_bypass
- # expected a list of countries, some 3rd party code may still use
- # it this way
- if isinstance(geo_bypass_context, (list, tuple)):
- geo_bypass_context = {
- 'countries': geo_bypass_context,
- }
-
- # The whole point of geo bypass mechanism is to fake IP
- # as X-Forwarded-For HTTP header based on some IP block or
- # country code.
-
- # Path 1: bypassing based on IP block in CIDR notation
-
- # Explicit IP block specified by user, use it right away
- # regardless of whether extractor is geo bypassable or not
- ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
-
- # Otherwise use random IP block from geo bypass context but only
- # if extractor is known as geo bypassable
- if not ip_block:
- ip_blocks = geo_bypass_context.get('ip_blocks')
- if self._GEO_BYPASS and ip_blocks:
- ip_block = random.choice(ip_blocks)
-
- if ip_block:
- self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen(
- '[debug] Using fake IP %s as X-Forwarded-For.'
- % self._x_forwarded_for_ip)
- return
-
- # Path 2: bypassing based on country code
-
- # Explicit country code specified by user, use it right away
- # regardless of whether extractor is geo bypassable or not
- country = self._downloader.params.get('geo_bypass_country', None)
-
- # Otherwise use random country code from geo bypass context but
- # only if extractor is known as geo bypassable
- if not country:
- countries = geo_bypass_context.get('countries')
- if self._GEO_BYPASS and countries:
- country = random.choice(countries)
-
- if country:
- self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen(
- '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
- % (self._x_forwarded_for_ip, country.upper()))
-
- def extract(self, url):
- """Extracts URL information and returns it in list of dicts."""
- try:
- for _ in range(2):
- try:
- self.initialize()
- ie_result = self._real_extract(url)
- if self._x_forwarded_for_ip:
- ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
- return ie_result
- except GeoRestrictedError as e:
- if self.__maybe_fake_ip_and_retry(e.countries):
- continue
- raise
- except ExtractorError:
- raise
- except compat_http_client.IncompleteRead as e:
- raise ExtractorError('A network error has occurred.', cause=e, expected=True)
- except (KeyError, StopIteration) as e:
- raise ExtractorError('An extractor error has occurred.', cause=e)
-
- def __maybe_fake_ip_and_retry(self, countries):
- if (not self._downloader.params.get('geo_bypass_country', None) and
- self._GEO_BYPASS and
- self._downloader.params.get('geo_bypass', True) and
- not self._x_forwarded_for_ip and
- countries):
- country_code = random.choice(countries)
- self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
- if self._x_forwarded_for_ip:
- self.report_warning(
- 'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.'
- % (self._x_forwarded_for_ip, country_code.upper()))
- return True
- return False
-
- def set_downloader(self, downloader):
- """Sets the downloader for this IE."""
- self._downloader = downloader
-
- def _real_initialize(self):
- """Real initialization process. Redefine in subclasses."""
- pass
-
- def _real_extract(self, url):
- """Real extraction process. Redefine in subclasses."""
- pass
-
- @classmethod
- def ie_key(cls):
- """A string for getting the InfoExtractor with get_info_extractor"""
- return compat_str(cls.__name__[:-2])
-
- @property
- def IE_NAME(self):
- return compat_str(type(self).__name__[:-2])
-
- @staticmethod
- def __can_accept_status_code(err, expected_status):
- assert isinstance(err, compat_urllib_error.HTTPError)
- if expected_status is None:
- return False
- if isinstance(expected_status, compat_integer_types):
- return err.code == expected_status
- elif isinstance(expected_status, (list, tuple)):
- return err.code in expected_status
- elif callable(expected_status):
- return expected_status(err.code) is True
- else:
- assert False
-
- def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
- """
- Return the response handle.
-
- See _download_webpage docstring for arguments specification.
- """
- if note is None:
- self.report_download_webpage(video_id)
- elif note is not False:
- if video_id is None:
- self.to_screen('%s' % (note,))
- else:
- self.to_screen('%s: %s' % (video_id, note))
-
- # Some sites check X-Forwarded-For HTTP header in order to figure out
- # the origin of the client behind proxy. This allows bypassing geo
- # restriction by faking this header's value to IP that belongs to some
- # geo unrestricted country. We will do so once we encounter any
- # geo restriction error.
- if self._x_forwarded_for_ip:
- if 'X-Forwarded-For' not in headers:
- headers['X-Forwarded-For'] = self._x_forwarded_for_ip
-
- if isinstance(url_or_request, compat_urllib_request.Request):
- url_or_request = update_Request(
- url_or_request, data=data, headers=headers, query=query)
- else:
- if query:
- url_or_request = update_url_query(url_or_request, query)
- if data is not None or headers:
- url_or_request = sanitized_Request(url_or_request, data, headers)
- try:
- return self._downloader.urlopen(url_or_request)
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- if isinstance(err, compat_urllib_error.HTTPError):
- if self.__can_accept_status_code(err, expected_status):
- return err.fp
-
- if errnote is False:
- return False
- if errnote is None:
- errnote = 'Unable to download webpage'
-
- errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
- if fatal:
- raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
- else:
- self._downloader.report_warning(errmsg)
- return False
-
- def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
- """
- Return a tuple (page content as string, URL handle).
-
- See _download_webpage docstring for arguments specification.
- """
- # Strip hashes from the URL (#1038)
- if isinstance(url_or_request, (compat_str, str)):
- url_or_request = url_or_request.partition('#')[0]
-
- urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
- if urlh is False:
- assert not fatal
- return False
- content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
- return (content, urlh)
-
- @staticmethod
- def _guess_encoding_from_content(content_type, webpage_bytes):
- m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
- if m:
- encoding = m.group(1)
- else:
- m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
- webpage_bytes[:1024])
- if m:
- encoding = m.group(1).decode('ascii')
- elif webpage_bytes.startswith(b'\xff\xfe'):
- encoding = 'utf-16'
- else:
- encoding = 'utf-8'
-
- return encoding
-
- def __check_blocked(self, content):
- first_block = content[:512]
- if ('<title>Access to this site is blocked</title>' in content and
- 'Websense' in first_block):
- msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
- blocked_iframe = self._html_search_regex(
- r'<iframe src="([^"]+)"', content,
- 'Websense information URL', default=None)
- if blocked_iframe:
- msg += ' Visit %s for more details' % blocked_iframe
- raise ExtractorError(msg, expected=True)
- if '<title>The URL you requested has been blocked</title>' in first_block:
- msg = (
- 'Access to this webpage has been blocked by Indian censorship. '
- 'Use a VPN or proxy server (with --proxy) to route around it.')
- block_msg = self._html_search_regex(
- r'</h1><p>(.*?)</p>',
- content, 'block message', default=None)
- if block_msg:
- msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
- raise ExtractorError(msg, expected=True)
- if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content and
- 'blocklist.rkn.gov.ru' in content):
- raise ExtractorError(
- 'Access to this webpage has been blocked by decision of the Russian government. '
- 'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
- expected=True)
-
- def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
- content_type = urlh.headers.get('Content-Type', '')
- webpage_bytes = urlh.read()
- if prefix is not None:
- webpage_bytes = prefix + webpage_bytes
- if not encoding:
- encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
- if self._downloader.params.get('dump_intermediate_pages', False):
- self.to_screen('Dumping request to ' + urlh.geturl())
- dump = base64.b64encode(webpage_bytes).decode('ascii')
- self._downloader.to_screen(dump)
- if self._downloader.params.get('write_pages', False):
- basen = '%s_%s' % (video_id, urlh.geturl())
- if len(basen) > 240:
- h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
- basen = basen[:240 - len(h)] + h
- raw_filename = basen + '.dump'
- filename = sanitize_filename(raw_filename, restricted=True)
- self.to_screen('Saving request to ' + filename)
- # Working around MAX_PATH limitation on Windows (see
- # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
- if compat_os_name == 'nt':
- absfilepath = os.path.abspath(filename)
- if len(absfilepath) > 259:
- filename = '\\\\?\\' + absfilepath
- with open(filename, 'wb') as outf:
- outf.write(webpage_bytes)
-
- try:
- content = webpage_bytes.decode(encoding, 'replace')
- except LookupError:
- content = webpage_bytes.decode('utf-8', 'replace')
-
- self.__check_blocked(content)
-
- return content
-
- def _download_webpage(
- self, url_or_request, video_id, note=None, errnote=None,
- fatal=True, tries=1, timeout=5, encoding=None, data=None,
- headers={}, query={}, expected_status=None):
- """
- Return the data of the page as a string.
-
- Arguments:
- url_or_request -- plain text URL as a string or
- a compat_urllib_request.Requestobject
- video_id -- Video/playlist/item identifier (string)
-
- Keyword arguments:
- note -- note printed before downloading (string)
- errnote -- note printed in case of an error (string)
- fatal -- flag denoting whether error should be considered fatal,
- i.e. whether it should cause ExtractionError to be raised,
- otherwise a warning will be reported and extraction continued
- tries -- number of tries
- timeout -- sleep interval between tries
- encoding -- encoding for a page content decoding, guessed automatically
- when not explicitly specified
- data -- POST data (bytes)
- headers -- HTTP headers (dict)
- query -- URL query (dict)
- expected_status -- allows to accept failed HTTP requests (non 2xx
- status code) by explicitly specifying a set of accepted status
- codes. Can be any of the following entities:
- - an integer type specifying an exact failed status code to
- accept
- - a list or a tuple of integer types specifying a list of
- failed status codes to accept
- - a callable accepting an actual failed status code and
- returning True if it should be accepted
- Note that this argument does not affect success status codes (2xx)
- which are always accepted.
- """
-
- success = False
- try_count = 0
- while success is False:
- try:
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- success = True
- except compat_http_client.IncompleteRead as e:
- try_count += 1
- if try_count >= tries:
- raise e
- self._sleep(timeout, video_id)
- if res is False:
- return res
- else:
- content, _ = res
- return content
-
- def _download_xml_handle(
- self, url_or_request, video_id, note='Downloading XML',
- errnote='Unable to download XML', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle).
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal=fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- if res is False:
- return res
- xml_string, urlh = res
- return self._parse_xml(
- xml_string, video_id, transform_source=transform_source,
- fatal=fatal), urlh
-
- def _download_xml(
- self, url_or_request, video_id,
- note='Downloading XML', errnote='Unable to download XML',
- transform_source=None, fatal=True, encoding=None,
- data=None, headers={}, query={}, expected_status=None):
- """
- Return the xml as an xml.etree.ElementTree.Element.
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_xml_handle(
- url_or_request, video_id, note=note, errnote=errnote,
- transform_source=transform_source, fatal=fatal, encoding=encoding,
- data=data, headers=headers, query=query,
- expected_status=expected_status)
- return res if res is False else res[0]
-
- def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
- if transform_source:
- xml_string = transform_source(xml_string)
- try:
- return compat_etree_fromstring(xml_string.encode('utf-8'))
- except compat_xml_parse_error as ve:
- errmsg = '%s: Failed to parse XML ' % video_id
- if fatal:
- raise ExtractorError(errmsg, cause=ve)
- else:
- self.report_warning(errmsg + str(ve))
-
- def _download_json_handle(
- self, url_or_request, video_id, note='Downloading JSON metadata',
- errnote='Unable to download JSON metadata', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return a tuple (JSON object, URL handle).
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal=fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- if res is False:
- return res
- json_string, urlh = res
- return self._parse_json(
- json_string, video_id, transform_source=transform_source,
- fatal=fatal), urlh
-
- def _download_json(
- self, url_or_request, video_id, note='Downloading JSON metadata',
- errnote='Unable to download JSON metadata', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return the JSON object as a dict.
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_json_handle(
- url_or_request, video_id, note=note, errnote=errnote,
- transform_source=transform_source, fatal=fatal, encoding=encoding,
- data=data, headers=headers, query=query,
- expected_status=expected_status)
- return res if res is False else res[0]
-
- def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
- if transform_source:
- json_string = transform_source(json_string)
- try:
- return json.loads(json_string)
- except ValueError as ve:
- errmsg = '%s: Failed to parse JSON ' % video_id
- if fatal:
- raise ExtractorError(errmsg, cause=ve)
- else:
- self.report_warning(errmsg + str(ve))
-
- def report_warning(self, msg, video_id=None):
- idstr = '' if video_id is None else '%s: ' % video_id
- self._downloader.report_warning(
- '[%s] %s%s' % (self.IE_NAME, idstr, msg))
-
- def to_screen(self, msg):
- """Print msg to screen, prefixing it with '[ie_name]'"""
- self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg))
-
- def report_extraction(self, id_or_name):
- """Report information extraction."""
- self.to_screen('%s: Extracting information' % id_or_name)
-
- def report_download_webpage(self, video_id):
- """Report webpage download."""
- self.to_screen('%s: Downloading webpage' % video_id)
-
- def report_age_confirmation(self):
- """Report attempt to confirm age."""
- self.to_screen('Confirming age')
-
- def report_login(self):
- """Report attempt to log in."""
- self.to_screen('Logging in')
-
- @staticmethod
- def raise_login_required(msg='This video is only available for registered users'):
- raise ExtractorError(
- '%s. Use --username and --password or --netrc to provide account credentials.' % msg,
- expected=True)
-
- @staticmethod
- def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
- raise GeoRestrictedError(msg, countries=countries)
-
- # Methods for following #608
- @staticmethod
- def url_result(url, ie=None, video_id=None, video_title=None):
- """Returns a URL that points to a page that should be processed"""
- # TODO: ie should be the class used for getting the info
- video_info = {'_type': 'url',
- 'url': url,
- 'ie_key': ie}
- if video_id is not None:
- video_info['id'] = video_id
- if video_title is not None:
- video_info['title'] = video_title
- return video_info
-
- def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
- urls = orderedSet(
- self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
- for m in matches)
- return self.playlist_result(
- urls, playlist_id=playlist_id, playlist_title=playlist_title)
-
- @staticmethod
- def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
- """Returns a playlist"""
- video_info = {'_type': 'playlist',
- 'entries': entries}
- if playlist_id:
- video_info['id'] = playlist_id
- if playlist_title:
- video_info['title'] = playlist_title
- if playlist_description:
- video_info['description'] = playlist_description
- return video_info
-
- def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
- """
- Perform a regex search on the given string, using a single or a list of
- patterns returning the first matching group.
- In case of failure return a default value or raise a WARNING or a
- RegexNotFoundError, depending on fatal, specifying the field name.
- """
- if isinstance(pattern, (str, compat_str, compiled_regex_type)):
- mobj = re.search(pattern, string, flags)
- else:
- for p in pattern:
- mobj = re.search(p, string, flags)
- if mobj:
- break
-
- if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
- _name = '\033[0;34m%s\033[0m' % name
- else:
- _name = name
-
- if mobj:
- if group is None:
- # return the first matching group
- return next(g for g in mobj.groups() if g is not None)
- else:
- return mobj.group(group)
- elif default is not NO_DEFAULT:
- return default
- elif fatal:
- raise RegexNotFoundError('Unable to extract %s' % _name)
- else:
- self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
- return None
-
- def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
- """
- Like _search_regex, but strips HTML tags and unescapes entities.
- """
- res = self._search_regex(pattern, string, name, default, fatal, flags, group)
- if res:
- return clean_html(res).strip()
- else:
- return res
-
- def _get_netrc_login_info(self, netrc_machine=None):
- username = None
- password = None
- netrc_machine = netrc_machine or self._NETRC_MACHINE
-
- if self._downloader.params.get('usenetrc', False):
- try:
- info = netrc.netrc().authenticators(netrc_machine)
- if info is not None:
- username = info[0]
- password = info[2]
- else:
- raise netrc.NetrcParseError(
- 'No authenticators for %s' % netrc_machine)
- except (IOError, netrc.NetrcParseError) as err:
- self._downloader.report_warning(
- 'parsing .netrc: %s' % error_to_compat_str(err))
-
- return username, password
-
- def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
- """
- Get the login info as (username, password)
- First look for the manually specified credentials using username_option
- and password_option as keys in params dictionary. If no such credentials
- available look in the netrc file using the netrc_machine or _NETRC_MACHINE
- value.
- If there's no info available, return (None, None)
- """
- if self._downloader is None:
- return (None, None)
-
- downloader_params = self._downloader.params
-
- # Attempt to use provided username and password or .netrc data
- if downloader_params.get(username_option) is not None:
- username = downloader_params[username_option]
- password = downloader_params[password_option]
- else:
- username, password = self._get_netrc_login_info(netrc_machine)
-
- return username, password
-
- def _get_tfa_info(self, note='two-factor verification code'):
- """
- Get the two-factor authentication info
- TODO - asking the user will be required for sms/phone verify
- currently just uses the command line option
- If there's no info available, return None
- """
- if self._downloader is None:
- return None
- downloader_params = self._downloader.params
-
- if downloader_params.get('twofactor') is not None:
- return downloader_params['twofactor']
-
- return compat_getpass('Type %s and press [Return]: ' % note)
-
- # Helper functions for extracting OpenGraph info
- @staticmethod
- def _og_regexes(prop):
- content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
- property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
- % {'prop': re.escape(prop)})
- template = r'<meta[^>]+?%s[^>]+?%s'
- return [
- template % (property_re, content_re),
- template % (content_re, property_re),
- ]
-
- @staticmethod
- def _meta_regex(prop):
- return r'''(?isx)<meta
- (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1)
- [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
-
- def _og_search_property(self, prop, html, name=None, **kargs):
- if not isinstance(prop, (list, tuple)):
- prop = [prop]
- if name is None:
- name = 'OpenGraph %s' % prop[0]
- og_regexes = []
- for p in prop:
- og_regexes.extend(self._og_regexes(p))
- escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs)
- if escaped is None:
- return None
- return unescapeHTML(escaped)
-
- def _og_search_thumbnail(self, html, **kargs):
- return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs)
-
- def _og_search_description(self, html, **kargs):
- return self._og_search_property('description', html, fatal=False, **kargs)
-
- def _og_search_title(self, html, **kargs):
- return self._og_search_property('title', html, **kargs)
-
- def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
- regexes = self._og_regexes('video') + self._og_regexes('video:url')
- if secure:
- regexes = self._og_regexes('video:secure_url') + regexes
- return self._html_search_regex(regexes, html, name, **kargs)
-
- def _og_search_url(self, html, **kargs):
- return self._og_search_property('url', html, **kargs)
-
- def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
- if not isinstance(name, (list, tuple)):
- name = [name]
- if display_name is None:
- display_name = name[0]
- return self._html_search_regex(
- [self._meta_regex(n) for n in name],
- html, display_name, fatal=fatal, group='content', **kwargs)
-
- def _dc_search_uploader(self, html):
- return self._html_search_meta('dc.creator', html, 'uploader')
-
- def _rta_search(self, html):
- # See http://www.rtalabel.org/index.php?content=howtofaq#single
- if re.search(r'(?ix)<meta\s+name="rating"\s+'
- r' content="RTA-5042-1996-1400-1577-RTA"',
- html):
- return 18
- return 0
-
- def _media_rating_search(self, html):
- # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
- rating = self._html_search_meta('rating', html)
-
- if not rating:
- return None
-
- RATING_TABLE = {
- 'safe for kids': 0,
- 'general': 8,
- '14 years': 14,
- 'mature': 17,
- 'restricted': 19,
- }
- return RATING_TABLE.get(rating.lower())
-
- def _family_friendly_search(self, html):
- # See http://schema.org/VideoObject
- family_friendly = self._html_search_meta(
- 'isFamilyFriendly', html, default=None)
-
- if not family_friendly:
- return None
-
- RATING_TABLE = {
- '1': 0,
- 'true': 0,
- '0': 18,
- 'false': 18,
- }
- return RATING_TABLE.get(family_friendly.lower())
-
- def _twitter_search_player(self, html):
- return self._html_search_meta('twitter:player', html,
- 'twitter card player')
-
- def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
- json_ld = self._search_regex(
- JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
- default = kwargs.get('default', NO_DEFAULT)
- if not json_ld:
- return default if default is not NO_DEFAULT else {}
- # JSON-LD may be malformed and thus `fatal` should be respected.
- # At the same time `default` may be passed that assumes `fatal=False`
- # for _search_regex. Let's simulate the same behavior here as well.
- fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
- return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
-
- def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
- if isinstance(json_ld, compat_str):
- json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
- if not json_ld:
- return {}
- info = {}
- if not isinstance(json_ld, (list, tuple, dict)):
- return info
- if isinstance(json_ld, dict):
- json_ld = [json_ld]
-
- INTERACTION_TYPE_MAP = {
- 'CommentAction': 'comment',
- 'AgreeAction': 'like',
- 'DisagreeAction': 'dislike',
- 'LikeAction': 'like',
- 'DislikeAction': 'dislike',
- 'ListenAction': 'view',
- 'WatchAction': 'view',
- 'ViewAction': 'view',
- }
-
- def extract_interaction_statistic(e):
- interaction_statistic = e.get('interactionStatistic')
- if not isinstance(interaction_statistic, list):
- return
- for is_e in interaction_statistic:
- if not isinstance(is_e, dict):
- continue
- if is_e.get('@type') != 'InteractionCounter':
- continue
- interaction_type = is_e.get('interactionType')
- if not isinstance(interaction_type, compat_str):
- continue
- interaction_count = int_or_none(is_e.get('userInteractionCount'))
- if interaction_count is None:
- continue
- count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
- if not count_kind:
- continue
- count_key = '%s_count' % count_kind
- if info.get(count_key) is not None:
- continue
- info[count_key] = interaction_count
-
- def extract_video_object(e):
- assert e['@type'] == 'VideoObject'
- info.update({
- 'url': e.get('contentUrl'),
- 'title': unescapeHTML(e.get('name')),
- 'description': unescapeHTML(e.get('description')),
- 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
- 'duration': parse_duration(e.get('duration')),
- 'timestamp': unified_timestamp(e.get('uploadDate')),
- 'filesize': float_or_none(e.get('contentSize')),
- 'tbr': int_or_none(e.get('bitrate')),
- 'width': int_or_none(e.get('width')),
- 'height': int_or_none(e.get('height')),
- 'view_count': int_or_none(e.get('interactionCount')),
- })
- extract_interaction_statistic(e)
-
- for e in json_ld:
- if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):
- item_type = e.get('@type')
- if expected_type is not None and expected_type != item_type:
- return info
- if item_type in ('TVEpisode', 'Episode'):
- info.update({
- 'episode': unescapeHTML(e.get('name')),
- 'episode_number': int_or_none(e.get('episodeNumber')),
- 'description': unescapeHTML(e.get('description')),
- })
- part_of_season = e.get('partOfSeason')
- if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
- info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
- part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
- if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
- info['series'] = unescapeHTML(part_of_series.get('name'))
- elif item_type in ('Article', 'NewsArticle'):
- info.update({
- 'timestamp': parse_iso8601(e.get('datePublished')),
- 'title': unescapeHTML(e.get('headline')),
- 'description': unescapeHTML(e.get('articleBody')),
- })
- elif item_type == 'VideoObject':
- extract_video_object(e)
- continue
- video = e.get('video')
- if isinstance(video, dict) and video.get('@type') == 'VideoObject':
- extract_video_object(video)
- break
- return dict((k, v) for k, v in info.items() if v is not None)
-
- @staticmethod
- def _hidden_inputs(html):
- html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
- hidden_inputs = {}
- for input in re.findall(r'(?i)(<input[^>]+>)', html):
- attrs = extract_attributes(input)
- if not input:
- continue
- if attrs.get('type') not in ('hidden', 'submit'):
- continue
- name = attrs.get('name') or attrs.get('id')
- value = attrs.get('value')
- if name and value is not None:
- hidden_inputs[name] = value
- return hidden_inputs
-
- def _form_hidden_inputs(self, form_id, html):
- form = self._search_regex(
- r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
- html, '%s form' % form_id, group='form')
- return self._hidden_inputs(form)
-
- def _sort_formats(self, formats, field_preference=None):
- if not formats:
- raise ExtractorError('No video formats found')
-
- for f in formats:
- # Automatically determine tbr when missing based on abr and vbr (improves
- # formats sorting in some cases)
- if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None:
- f['tbr'] = f['abr'] + f['vbr']
-
- def _formats_key(f):
- # TODO remove the following workaround
- from ..utils import determine_ext
- if not f.get('ext') and 'url' in f:
- f['ext'] = determine_ext(f['url'])
-
- if isinstance(field_preference, (list, tuple)):
- return tuple(
- f.get(field)
- if f.get(field) is not None
- else ('' if field == 'format_id' else -1)
- for field in field_preference)
-
- preference = f.get('preference')
- if preference is None:
- preference = 0
- if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
- preference -= 0.5
-
- protocol = f.get('protocol') or determine_protocol(f)
- proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1)
-
- if f.get('vcodec') == 'none': # audio only
- preference -= 50
- if self._downloader.params.get('prefer_free_formats'):
- ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
- else:
- ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
- ext_preference = 0
- try:
- audio_ext_preference = ORDER.index(f['ext'])
- except ValueError:
- audio_ext_preference = -1
- else:
- if f.get('acodec') == 'none': # video only
- preference -= 40
- if self._downloader.params.get('prefer_free_formats'):
- ORDER = ['flv', 'mp4', 'webm']
- else:
- ORDER = ['webm', 'flv', 'mp4']
- try:
- ext_preference = ORDER.index(f['ext'])
- except ValueError:
- ext_preference = -1
- audio_ext_preference = 0
-
- return (
- preference,
- f.get('language_preference') if f.get('language_preference') is not None else -1,
- f.get('quality') if f.get('quality') is not None else -1,
- f.get('tbr') if f.get('tbr') is not None else -1,
- f.get('filesize') if f.get('filesize') is not None else -1,
- f.get('vbr') if f.get('vbr') is not None else -1,
- f.get('height') if f.get('height') is not None else -1,
- f.get('width') if f.get('width') is not None else -1,
- proto_preference,
- ext_preference,
- f.get('abr') if f.get('abr') is not None else -1,
- audio_ext_preference,
- f.get('fps') if f.get('fps') is not None else -1,
- f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
- f.get('source_preference') if f.get('source_preference') is not None else -1,
- f.get('format_id') if f.get('format_id') is not None else '',
- )
- formats.sort(key=_formats_key)
-
- def _check_formats(self, formats, video_id):
- if formats:
- formats[:] = filter(
- lambda f: self._is_valid_url(
- f['url'], video_id,
- item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
- formats)
-
- @staticmethod
- def _remove_duplicate_formats(formats):
- format_urls = set()
- unique_formats = []
- for f in formats:
- if f['url'] not in format_urls:
- format_urls.add(f['url'])
- unique_formats.append(f)
- formats[:] = unique_formats
-
- def _is_valid_url(self, url, video_id, item='video', headers={}):
- url = self._proto_relative_url(url, scheme='http:')
- # For now assume non HTTP(S) URLs always valid
- if not (url.startswith('http://') or url.startswith('https://')):
- return True
- try:
- self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
- return True
- except ExtractorError as e:
- if isinstance(e.cause, compat_urllib_error.URLError):
- self.to_screen(
- '%s: %s URL is invalid, skipping' % (video_id, item))
- return False
- raise
-
- def http_scheme(self):
- """ Either "http:" or "https:", depending on the user's preferences """
- return (
- 'http:'
- if self._downloader.params.get('prefer_insecure', False)
- else 'https:')
-
- def _proto_relative_url(self, url, scheme=None):
- if url is None:
- return url
- if url.startswith('//'):
- if scheme is None:
- scheme = self.http_scheme()
- return scheme + url
- else:
- return url
-
- def _sleep(self, timeout, video_id, msg_template=None):
- if msg_template is None:
- msg_template = '%(video_id)s: Waiting for %(timeout)s seconds'
- msg = msg_template % {'video_id': video_id, 'timeout': timeout}
- self.to_screen(msg)
- time.sleep(timeout)
-
- def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
- transform_source=lambda s: fix_xml_ampersands(s).strip(),
- fatal=True, m3u8_id=None):
- manifest = self._download_xml(
- manifest_url, video_id, 'Downloading f4m manifest',
- 'Unable to download f4m manifest',
- # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
- # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244)
- transform_source=transform_source,
- fatal=fatal)
-
- if manifest is False:
- return []
-
- return self._parse_f4m_formats(
- manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
- transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
-
- def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
- transform_source=lambda s: fix_xml_ampersands(s).strip(),
- fatal=True, m3u8_id=None):
- # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
- akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
- if akamai_pv is not None and ';' in akamai_pv.text:
- playerVerificationChallenge = akamai_pv.text.split(';')[0]
- if playerVerificationChallenge.strip() != '':
- return []
-
- formats = []
- manifest_version = '1.0'
- media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
- if not media_nodes:
- manifest_version = '2.0'
- media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
- # Remove unsupported DRM protected media from final formats
- # rendition (see https://github.com/rg3/youtube-dl/issues/8573).
- media_nodes = remove_encrypted_media(media_nodes)
- if not media_nodes:
- return formats
-
- manifest_base_url = get_base_url(manifest)
-
- bootstrap_info = xpath_element(
- manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
- 'bootstrap info', default=None)
-
- vcodec = None
- mime_type = xpath_text(
- manifest, ['{http://ns.adobe.com/f4m/1.0}mimeType', '{http://ns.adobe.com/f4m/2.0}mimeType'],
- 'base URL', default=None)
- if mime_type and mime_type.startswith('audio/'):
- vcodec = 'none'
-
- for i, media_el in enumerate(media_nodes):
- tbr = int_or_none(media_el.attrib.get('bitrate'))
- width = int_or_none(media_el.attrib.get('width'))
- height = int_or_none(media_el.attrib.get('height'))
- format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)]))
- # If <bootstrapInfo> is present, the specified f4m is a
- # stream-level manifest, and only set-level manifests may refer to
- # external resources. See section 11.4 and section 4 of F4M spec
- if bootstrap_info is None:
- media_url = None
- # @href is introduced in 2.0, see section 11.6 of F4M spec
- if manifest_version == '2.0':
- media_url = media_el.attrib.get('href')
- if media_url is None:
- media_url = media_el.attrib.get('url')
- if not media_url:
- continue
- manifest_url = (
- media_url if media_url.startswith('http://') or media_url.startswith('https://')
- else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
- # If media_url is itself a f4m manifest do the recursive extraction
- # since bitrates in parent manifest (this one) and media_url manifest
- # may differ leading to inability to resolve the format by requested
- # bitrate in f4m downloader
- ext = determine_ext(manifest_url)
- if ext == 'f4m':
- f4m_formats = self._extract_f4m_formats(
- manifest_url, video_id, preference=preference, f4m_id=f4m_id,
- transform_source=transform_source, fatal=fatal)
- # Sometimes stream-level manifest contains single media entry that
- # does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
- # At the same time parent's media entry in set-level manifest may
- # contain it. We will copy it from parent in such cases.
- if len(f4m_formats) == 1:
- f = f4m_formats[0]
- f.update({
- 'tbr': f.get('tbr') or tbr,
- 'width': f.get('width') or width,
- 'height': f.get('height') or height,
- 'format_id': f.get('format_id') if not tbr else format_id,
- 'vcodec': vcodec,
- })
- formats.extend(f4m_formats)
- continue
- elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- manifest_url, video_id, 'mp4', preference=preference,
- m3u8_id=m3u8_id, fatal=fatal))
- continue
- formats.append({
- 'format_id': format_id,
- 'url': manifest_url,
- 'manifest_url': manifest_url,
- 'ext': 'flv' if bootstrap_info is not None else None,
- 'protocol': 'f4m',
- 'tbr': tbr,
- 'width': width,
- 'height': height,
- 'vcodec': vcodec,
- 'preference': preference,
- })
- return formats
-
- def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None):
- return {
- 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
- 'url': m3u8_url,
- 'ext': ext,
- 'protocol': 'm3u8',
- 'preference': preference - 100 if preference else -100,
- 'resolution': 'multiple',
- 'format_note': 'Quality selection URL',
- }
-
- def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
- entry_protocol='m3u8', preference=None,
- m3u8_id=None, note=None, errnote=None,
- fatal=True, live=False):
- res = self._download_webpage_handle(
- m3u8_url, video_id,
- note=note or 'Downloading m3u8 information',
- errnote=errnote or 'Failed to download m3u8 information',
- fatal=fatal)
-
- if res is False:
- return []
-
- m3u8_doc, urlh = res
- m3u8_url = urlh.geturl()
-
- return self._parse_m3u8_formats(
- m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
- preference=preference, m3u8_id=m3u8_id, live=live)
-
- def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
- entry_protocol='m3u8', preference=None,
- m3u8_id=None, live=False):
- if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
- return []
-
- if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay
- return []
-
- formats = []
-
- format_url = lambda u: (
- u
- if re.match(r'^https?://', u)
- else compat_urlparse.urljoin(m3u8_url, u))
-
- # References:
- # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
- # 2. https://github.com/rg3/youtube-dl/issues/12211
-
- # We should try extracting formats only from master playlists [1, 4.3.4],
- # i.e. playlists that describe available qualities. On the other hand
- # media playlists [1, 4.3.3] should be returned as is since they contain
- # just the media without qualities renditions.
- # Fortunately, master playlist can be easily distinguished from media
- # playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
- # master playlist tags MUST NOT appear in a media playist and vice versa.
- # As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
- # media playlist and MUST NOT appear in master playlist thus we can
- # clearly detect media playlist with this criterion.
-
- if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
- return [{
- 'url': m3u8_url,
- 'format_id': m3u8_id,
- 'ext': ext,
- 'protocol': entry_protocol,
- 'preference': preference,
- }]
-
- groups = {}
- last_stream_inf = {}
-
- def extract_media(x_media_line):
- media = parse_m3u8_attributes(x_media_line)
- # As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
- media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
- if not (media_type and group_id and name):
- return
- groups.setdefault(group_id, []).append(media)
- if media_type not in ('VIDEO', 'AUDIO'):
- return
- media_url = media.get('URI')
- if media_url:
- format_id = []
- for v in (m3u8_id, group_id, name):
- if v:
- format_id.append(v)
- f = {
- 'format_id': '-'.join(format_id),
- 'url': format_url(media_url),
- 'manifest_url': m3u8_url,
- 'language': media.get('LANGUAGE'),
- 'ext': ext,
- 'protocol': entry_protocol,
- 'preference': preference,
- }
- if media_type == 'AUDIO':
- f['vcodec'] = 'none'
- formats.append(f)
-
- def build_stream_name():
- # Despite specification does not mention NAME attribute for
- # EXT-X-STREAM-INF tag it still sometimes may be present (see [1]
- # or vidio test in TestInfoExtractor.test_parse_m3u8_formats)
- # 1. http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
- stream_name = last_stream_inf.get('NAME')
- if stream_name:
- return stream_name
- # If there is no NAME in EXT-X-STREAM-INF it will be obtained
- # from corresponding rendition group
- stream_group_id = last_stream_inf.get('VIDEO')
- if not stream_group_id:
- return
- stream_group = groups.get(stream_group_id)
- if not stream_group:
- return stream_group_id
- rendition = stream_group[0]
- return rendition.get('NAME') or stream_group_id
-
- for line in m3u8_doc.splitlines():
- if line.startswith('#EXT-X-STREAM-INF:'):
- last_stream_inf = parse_m3u8_attributes(line)
- elif line.startswith('#EXT-X-MEDIA:'):
- extract_media(line)
- elif line.startswith('#') or not line.strip():
- continue
- else:
- tbr = float_or_none(
- last_stream_inf.get('AVERAGE-BANDWIDTH') or
- last_stream_inf.get('BANDWIDTH'), scale=1000)
- format_id = []
- if m3u8_id:
- format_id.append(m3u8_id)
- stream_name = build_stream_name()
- # Bandwidth of live streams may differ over time thus making
- # format_id unpredictable. So it's better to keep provided
- # format_id intact.
- if not live:
- format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
- manifest_url = format_url(line.strip())
- f = {
- 'format_id': '-'.join(format_id),
- 'url': manifest_url,
- 'manifest_url': m3u8_url,
- 'tbr': tbr,
- 'ext': ext,
- 'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
- 'protocol': entry_protocol,
- 'preference': preference,
- }
- resolution = last_stream_inf.get('RESOLUTION')
- if resolution:
- mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
- if mobj:
- f['width'] = int(mobj.group('width'))
- f['height'] = int(mobj.group('height'))
- # Unified Streaming Platform
- mobj = re.search(
- r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
- if mobj:
- abr, vbr = mobj.groups()
- abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
- f.update({
- 'vbr': vbr,
- 'abr': abr,
- })
- codecs = parse_codecs(last_stream_inf.get('CODECS'))
- f.update(codecs)
- audio_group_id = last_stream_inf.get('AUDIO')
- # As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
- # references a rendition group MUST have a CODECS attribute.
- # However, this is not always respected, for example, [2]
- # contains EXT-X-STREAM-INF tag which references AUDIO
- # rendition group but does not have CODECS and despite
- # referencing audio group an audio group, it represents
- # a complete (with audio and video) format. So, for such cases
- # we will ignore references to rendition groups and treat them
- # as complete formats.
- if audio_group_id and codecs and f.get('vcodec') != 'none':
- audio_group = groups.get(audio_group_id)
- if audio_group and audio_group[0].get('URI'):
- # TODO: update acodec for audio only formats with
- # the same GROUP-ID
- f['acodec'] = 'none'
- formats.append(f)
- last_stream_inf = {}
- return formats
-
- @staticmethod
- def _xpath_ns(path, namespace=None):
- if not namespace:
- return path
- out = []
- for c in path.split('/'):
- if not c or c == '.':
- out.append(c)
- else:
- out.append('{%s}%s' % (namespace, c))
- return '/'.join(out)
-
- def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
- smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
-
- if smil is False:
- assert not fatal
- return []
-
- namespace = self._parse_smil_namespace(smil)
-
- return self._parse_smil_formats(
- smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
-
- def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
- smil = self._download_smil(smil_url, video_id, fatal=fatal)
- if smil is False:
- return {}
- return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
-
- def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
- return self._download_xml(
- smil_url, video_id, 'Downloading SMIL file',
- 'Unable to download SMIL file', fatal=fatal, transform_source=transform_source)
-
- def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
- namespace = self._parse_smil_namespace(smil)
-
- formats = self._parse_smil_formats(
- smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
- subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
-
- video_id = os.path.splitext(url_basename(smil_url))[0]
- title = None
- description = None
- upload_date = None
- for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
- name = meta.attrib.get('name')
- content = meta.attrib.get('content')
- if not name or not content:
- continue
- if not title and name == 'title':
- title = content
- elif not description and name in ('description', 'abstract'):
- description = content
- elif not upload_date and name == 'date':
- upload_date = unified_strdate(content)
-
- thumbnails = [{
- 'id': image.get('type'),
- 'url': image.get('src'),
- 'width': int_or_none(image.get('width')),
- 'height': int_or_none(image.get('height')),
- } for image in smil.findall(self._xpath_ns('.//image', namespace)) if image.get('src')]
-
- return {
- 'id': video_id,
- 'title': title or video_id,
- 'description': description,
- 'upload_date': upload_date,
- 'thumbnails': thumbnails,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
- def _parse_smil_namespace(self, smil):
- return self._search_regex(
- r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
-
- def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
- base = smil_url
- for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
- b = meta.get('base') or meta.get('httpBase')
- if b:
- base = b
- break
-
- formats = []
- rtmp_count = 0
- http_count = 0
- m3u8_count = 0
-
- srcs = []
- media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
- for medium in media:
- src = medium.get('src')
- if not src or src in srcs:
- continue
- srcs.append(src)
-
- bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
- filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
- width = int_or_none(medium.get('width'))
- height = int_or_none(medium.get('height'))
- proto = medium.get('proto')
- ext = medium.get('ext')
- src_ext = determine_ext(src)
- streamer = medium.get('streamer') or base
-
- if proto == 'rtmp' or streamer.startswith('rtmp'):
- rtmp_count += 1
- formats.append({
- 'url': streamer,
- 'play_path': src,
- 'ext': 'flv',
- 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
- 'tbr': bitrate,
- 'filesize': filesize,
- 'width': width,
- 'height': height,
- })
- if transform_rtmp_url:
- streamer, src = transform_rtmp_url(streamer, src)
- formats[-1].update({
- 'url': streamer,
- 'play_path': src,
- })
- continue
-
- src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
- src_url = src_url.strip()
-
- if proto == 'm3u8' or src_ext == 'm3u8':
- m3u8_formats = self._extract_m3u8_formats(
- src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
- if len(m3u8_formats) == 1:
- m3u8_count += 1
- m3u8_formats[0].update({
- 'format_id': 'hls-%d' % (m3u8_count if bitrate is None else bitrate),
- 'tbr': bitrate,
- 'width': width,
- 'height': height,
- })
- formats.extend(m3u8_formats)
- continue
-
- if src_ext == 'f4m':
- f4m_url = src_url
- if not f4m_params:
- f4m_params = {
- 'hdcore': '3.2.0',
- 'plugin': 'flowplayer-3.2.0.1',
- }
- f4m_url += '&' if '?' in f4m_url else '?'
- f4m_url += compat_urllib_parse_urlencode(f4m_params)
- formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
- continue
-
- if src_url.startswith('http') and self._is_valid_url(src, video_id):
- http_count += 1
- formats.append({
- 'url': src_url,
- 'ext': ext or src_ext or 'flv',
- 'format_id': 'http-%d' % (bitrate or http_count),
- 'tbr': bitrate,
- 'filesize': filesize,
- 'width': width,
- 'height': height,
- })
- continue
-
- return formats
-
- def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
- urls = []
- subtitles = {}
- for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
- src = textstream.get('src')
- if not src or src in urls:
- continue
- urls.append(src)
- ext = textstream.get('ext') or mimetype2ext(textstream.get('type')) or determine_ext(src)
- lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
- subtitles.setdefault(lang, []).append({
- 'url': src,
- 'ext': ext,
- })
- return subtitles
-
- def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
- xspf = self._download_xml(
- xspf_url, playlist_id, 'Downloading xpsf playlist',
- 'Unable to download xspf manifest', fatal=fatal)
- if xspf is False:
- return []
- return self._parse_xspf(
- xspf, playlist_id, xspf_url=xspf_url,
- xspf_base_url=base_url(xspf_url))
-
- def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
- NS_MAP = {
- 'xspf': 'http://xspf.org/ns/0/',
- 's1': 'http://static.streamone.nl/player/ns/0',
- }
-
- entries = []
- for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
- title = xpath_text(
- track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
- description = xpath_text(
- track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
- thumbnail = xpath_text(
- track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
- duration = float_or_none(
- xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
-
- formats = []
- for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
- format_url = urljoin(xspf_base_url, location.text)
- if not format_url:
- continue
- formats.append({
- 'url': format_url,
- 'manifest_url': xspf_url,
- 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
- 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
- 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
- })
- self._sort_formats(formats)
-
- entries.append({
- 'id': playlist_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- })
- return entries
-
- def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
- res = self._download_xml_handle(
- mpd_url, video_id,
- note=note or 'Downloading MPD manifest',
- errnote=errnote or 'Failed to download MPD manifest',
- fatal=fatal)
- if res is False:
- return []
- mpd_doc, urlh = res
- mpd_base_url = base_url(urlh.geturl())
-
- return self._parse_mpd_formats(
- mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
- formats_dict=formats_dict, mpd_url=mpd_url)
-
- def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
- """
- Parse formats from MPD manifest.
- References:
- 1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E),
- http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
- 2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
- """
- if mpd_doc.get('type') == 'dynamic':
- return []
-
- namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
-
- def _add_ns(path):
- return self._xpath_ns(path, namespace)
-
- def is_drm_protected(element):
- return element.find(_add_ns('ContentProtection')) is not None
-
- def extract_multisegment_info(element, ms_parent_info):
- ms_info = ms_parent_info.copy()
-
- # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
- # common attributes and elements. We will only extract relevant
- # for us.
- def extract_common(source):
- segment_timeline = source.find(_add_ns('SegmentTimeline'))
- if segment_timeline is not None:
- s_e = segment_timeline.findall(_add_ns('S'))
- if s_e:
- ms_info['total_number'] = 0
- ms_info['s'] = []
- for s in s_e:
- r = int(s.get('r', 0))
- ms_info['total_number'] += 1 + r
- ms_info['s'].append({
- 't': int(s.get('t', 0)),
- # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
- 'd': int(s.attrib['d']),
- 'r': r,
- })
- start_number = source.get('startNumber')
- if start_number:
- ms_info['start_number'] = int(start_number)
- timescale = source.get('timescale')
- if timescale:
- ms_info['timescale'] = int(timescale)
- segment_duration = source.get('duration')
- if segment_duration:
- ms_info['segment_duration'] = float(segment_duration)
-
- def extract_Initialization(source):
- initialization = source.find(_add_ns('Initialization'))
- if initialization is not None:
- ms_info['initialization_url'] = initialization.attrib['sourceURL']
-
- segment_list = element.find(_add_ns('SegmentList'))
- if segment_list is not None:
- extract_common(segment_list)
- extract_Initialization(segment_list)
- segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
- if segment_urls_e:
- ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
- else:
- segment_template = element.find(_add_ns('SegmentTemplate'))
- if segment_template is not None:
- extract_common(segment_template)
- media = segment_template.get('media')
- if media:
- ms_info['media'] = media
- initialization = segment_template.get('initialization')
- if initialization:
- ms_info['initialization'] = initialization
- else:
- extract_Initialization(segment_template)
- return ms_info
-
- mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
- formats = []
- for period in mpd_doc.findall(_add_ns('Period')):
- period_duration = parse_duration(period.get('duration')) or mpd_duration
- period_ms_info = extract_multisegment_info(period, {
- 'start_number': 1,
- 'timescale': 1,
- })
- for adaptation_set in period.findall(_add_ns('AdaptationSet')):
- if is_drm_protected(adaptation_set):
- continue
- adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
- for representation in adaptation_set.findall(_add_ns('Representation')):
- if is_drm_protected(representation):
- continue
- representation_attrib = adaptation_set.attrib.copy()
- representation_attrib.update(representation.attrib)
- # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
- mime_type = representation_attrib['mimeType']
- content_type = mime_type.split('/')[0]
- if content_type == 'text':
- # TODO implement WebVTT downloading
- pass
- elif content_type in ('video', 'audio'):
- base_url = ''
- for element in (representation, adaptation_set, period, mpd_doc):
- base_url_e = element.find(_add_ns('BaseURL'))
- if base_url_e is not None:
- base_url = base_url_e.text + base_url
- if re.match(r'^https?://', base_url):
- break
- if mpd_base_url and not re.match(r'^https?://', base_url):
- if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
- mpd_base_url += '/'
- base_url = mpd_base_url + base_url
- representation_id = representation_attrib.get('id')
- lang = representation_attrib.get('lang')
- url_el = representation.find(_add_ns('BaseURL'))
- filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
- bandwidth = int_or_none(representation_attrib.get('bandwidth'))
- f = {
- 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
- 'url': base_url,
- 'manifest_url': mpd_url,
- 'ext': mimetype2ext(mime_type),
- 'width': int_or_none(representation_attrib.get('width')),
- 'height': int_or_none(representation_attrib.get('height')),
- 'tbr': float_or_none(bandwidth, 1000),
- 'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
- 'fps': int_or_none(representation_attrib.get('frameRate')),
- 'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
- 'format_note': 'DASH %s' % content_type,
- 'filesize': filesize,
- 'container': mimetype2ext(mime_type) + '_dash',
- }
- f.update(parse_codecs(representation_attrib.get('codecs')))
- representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
-
- def prepare_template(template_name, identifiers):
- tmpl = representation_ms_info[template_name]
- # First of, % characters outside $...$ templates
- # must be escaped by doubling for proper processing
- # by % operator string formatting used further (see
- # https://github.com/rg3/youtube-dl/issues/16867).
- t = ''
- in_template = False
- for c in tmpl:
- t += c
- if c == '$':
- in_template = not in_template
- elif c == '%' and not in_template:
- t += c
- # Next, $...$ templates are translated to their
- # %(...) counterparts to be used with % operator
- t = t.replace('$RepresentationID$', representation_id)
- t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
- t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
- t.replace('$$', '$')
- return t
-
- # @initialization is a regular template like @media one
- # so it should be handled just the same way (see
- # https://github.com/rg3/youtube-dl/issues/11605)
- if 'initialization' in representation_ms_info:
- initialization_template = prepare_template(
- 'initialization',
- # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
- # $Time$ shall not be included for @initialization thus
- # only $Bandwidth$ remains
- ('Bandwidth', ))
- representation_ms_info['initialization_url'] = initialization_template % {
- 'Bandwidth': bandwidth,
- }
-
- def location_key(location):
- return 'url' if re.match(r'^https?://', location) else 'path'
-
- if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
-
- media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
- media_location_key = location_key(media_template)
-
- # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
- # can't be used at the same time
- if '%(Number' in media_template and 's' not in representation_ms_info:
- segment_duration = None
- if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
- segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
- representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
- representation_ms_info['fragments'] = [{
- media_location_key: media_template % {
- 'Number': segment_number,
- 'Bandwidth': bandwidth,
- },
- 'duration': segment_duration,
- } for segment_number in range(
- representation_ms_info['start_number'],
- representation_ms_info['total_number'] + representation_ms_info['start_number'])]
- else:
- # $Number*$ or $Time$ in media template with S list available
- # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
- # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
- representation_ms_info['fragments'] = []
- segment_time = 0
- segment_d = None
- segment_number = representation_ms_info['start_number']
-
- def add_segment_url():
- segment_url = media_template % {
- 'Time': segment_time,
- 'Bandwidth': bandwidth,
- 'Number': segment_number,
- }
- representation_ms_info['fragments'].append({
- media_location_key: segment_url,
- 'duration': float_or_none(segment_d, representation_ms_info['timescale']),
- })
-
- for num, s in enumerate(representation_ms_info['s']):
- segment_time = s.get('t') or segment_time
- segment_d = s['d']
- add_segment_url()
- segment_number += 1
- for r in range(s.get('r', 0)):
- segment_time += segment_d
- add_segment_url()
- segment_number += 1
- segment_time += segment_d
- elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
- # No media template
- # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
- # or any YouTube dashsegments video
- fragments = []
- segment_index = 0
- timescale = representation_ms_info['timescale']
- for s in representation_ms_info['s']:
- duration = float_or_none(s['d'], timescale)
- for r in range(s.get('r', 0) + 1):
- segment_uri = representation_ms_info['segment_urls'][segment_index]
- fragments.append({
- location_key(segment_uri): segment_uri,
- 'duration': duration,
- })
- segment_index += 1
- representation_ms_info['fragments'] = fragments
- elif 'segment_urls' in representation_ms_info:
- # Segment URLs with no SegmentTimeline
- # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
- # https://github.com/rg3/youtube-dl/pull/14844
- fragments = []
- segment_duration = float_or_none(
- representation_ms_info['segment_duration'],
- representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
- for segment_url in representation_ms_info['segment_urls']:
- fragment = {
- location_key(segment_url): segment_url,
- }
- if segment_duration:
- fragment['duration'] = segment_duration
- fragments.append(fragment)
- representation_ms_info['fragments'] = fragments
- # NB: MPD manifest may contain direct URLs to unfragmented media.
- # No fragments key is present in this case.
- if 'fragments' in representation_ms_info:
- f.update({
- 'fragment_base_url': base_url,
- 'fragments': [],
- 'protocol': 'http_dash_segments',
- })
- if 'initialization_url' in representation_ms_info:
- initialization_url = representation_ms_info['initialization_url']
- if not f.get('url'):
- f['url'] = initialization_url
- f['fragments'].append({location_key(initialization_url): initialization_url})
- f['fragments'].extend(representation_ms_info['fragments'])
- # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
- # is not necessarily unique within a Period thus formats with
- # the same `format_id` are quite possible. There are numerous examples
- # of such manifests (see https://github.com/rg3/youtube-dl/issues/15111,
- # https://github.com/rg3/youtube-dl/issues/13919)
- full_info = formats_dict.get(representation_id, {}).copy()
- full_info.update(f)
- formats.append(full_info)
- else:
- self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
- return formats
-
- def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
- res = self._download_xml_handle(
- ism_url, video_id,
- note=note or 'Downloading ISM manifest',
- errnote=errnote or 'Failed to download ISM manifest',
- fatal=fatal)
- if res is False:
- return []
- ism_doc, urlh = res
-
- return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
-
- def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
- """
- Parse formats from ISM manifest.
- References:
- 1. [MS-SSTR]: Smooth Streaming Protocol,
- https://msdn.microsoft.com/en-us/library/ff469518.aspx
- """
- if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
- return []
-
- duration = int(ism_doc.attrib['Duration'])
- timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
-
- formats = []
- for stream in ism_doc.findall('StreamIndex'):
- stream_type = stream.get('Type')
- if stream_type not in ('video', 'audio'):
- continue
- url_pattern = stream.attrib['Url']
- stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
- stream_name = stream.get('Name')
- for track in stream.findall('QualityLevel'):
- fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
- # TODO: add support for WVC1 and WMAP
- if fourcc not in ('H264', 'AVC1', 'AACL'):
- self.report_warning('%s is not a supported codec' % fourcc)
- continue
- tbr = int(track.attrib['Bitrate']) // 1000
- # [1] does not mention Width and Height attributes. However,
- # they're often present while MaxWidth and MaxHeight are
- # missing, so should be used as fallbacks
- width = int_or_none(track.get('MaxWidth') or track.get('Width'))
- height = int_or_none(track.get('MaxHeight') or track.get('Height'))
- sampling_rate = int_or_none(track.get('SamplingRate'))
-
- track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
- track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
-
- fragments = []
- fragment_ctx = {
- 'time': 0,
- }
- stream_fragments = stream.findall('c')
- for stream_fragment_index, stream_fragment in enumerate(stream_fragments):
- fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time']
- fragment_repeat = int_or_none(stream_fragment.get('r')) or 1
- fragment_ctx['duration'] = int_or_none(stream_fragment.get('d'))
- if not fragment_ctx['duration']:
- try:
- next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t'])
- except IndexError:
- next_fragment_time = duration
- fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
- for _ in range(fragment_repeat):
- fragments.append({
- 'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
- 'duration': fragment_ctx['duration'] / stream_timescale,
- })
- fragment_ctx['time'] += fragment_ctx['duration']
-
- format_id = []
- if ism_id:
- format_id.append(ism_id)
- if stream_name:
- format_id.append(stream_name)
- format_id.append(compat_str(tbr))
-
- formats.append({
- 'format_id': '-'.join(format_id),
- 'url': ism_url,
- 'manifest_url': ism_url,
- 'ext': 'ismv' if stream_type == 'video' else 'isma',
- 'width': width,
- 'height': height,
- 'tbr': tbr,
- 'asr': sampling_rate,
- 'vcodec': 'none' if stream_type == 'audio' else fourcc,
- 'acodec': 'none' if stream_type == 'video' else fourcc,
- 'protocol': 'ism',
- 'fragments': fragments,
- '_download_params': {
- 'duration': duration,
- 'timescale': stream_timescale,
- 'width': width or 0,
- 'height': height or 0,
- 'fourcc': fourcc,
- 'codec_private_data': track.get('CodecPrivateData'),
- 'sampling_rate': sampling_rate,
- 'channels': int_or_none(track.get('Channels', 2)),
- 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
- 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
- },
- })
- return formats
-
- def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
- def absolute_url(item_url):
- return urljoin(base_url, item_url)
-
- def parse_content_type(content_type):
- if not content_type:
- return {}
- ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
- if ctr:
- mimetype, codecs = ctr.groups()
- f = parse_codecs(codecs)
- f['ext'] = mimetype2ext(mimetype)
- return f
- return {}
-
- def _media_formats(src, cur_media_type, type_info={}):
- full_url = absolute_url(src)
- ext = type_info.get('ext') or determine_ext(full_url)
- if ext == 'm3u8':
- is_plain_url = False
- formats = self._extract_m3u8_formats(
- full_url, video_id, ext='mp4',
- entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
- preference=preference, fatal=False)
- elif ext == 'mpd':
- is_plain_url = False
- formats = self._extract_mpd_formats(
- full_url, video_id, mpd_id=mpd_id, fatal=False)
- else:
- is_plain_url = True
- formats = [{
- 'url': full_url,
- 'vcodec': 'none' if cur_media_type == 'audio' else None,
- }]
- return is_plain_url, formats
-
- entries = []
- # amp-video and amp-audio are very similar to their HTML5 counterparts
- # so we wll include them right here (see
- # https://www.ampproject.org/docs/reference/components/amp-video)
- media_tags = [(media_tag, media_type, '')
- for media_tag, media_type
- in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)]
- media_tags.extend(re.findall(
- # We only allow video|audio followed by a whitespace or '>'.
- # Allowing more characters may end up in significant slow down (see
- # https://github.com/rg3/youtube-dl/issues/11979, example URL:
- # http://www.porntrex.com/maps/videositemap.xml).
- r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
- for media_tag, media_type, media_content in media_tags:
- media_info = {
- 'formats': [],
- 'subtitles': {},
- }
- media_attributes = extract_attributes(media_tag)
- src = media_attributes.get('src')
- if src:
- _, formats = _media_formats(src, media_type)
- media_info['formats'].extend(formats)
- media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
- if media_content:
- for source_tag in re.findall(r'<source[^>]+>', media_content):
- source_attributes = extract_attributes(source_tag)
- src = source_attributes.get('src')
- if not src:
- continue
- f = parse_content_type(source_attributes.get('type'))
- is_plain_url, formats = _media_formats(src, media_type, f)
- if is_plain_url:
- # res attribute is not standard but seen several times
- # in the wild
- f.update({
- 'height': int_or_none(source_attributes.get('res')),
- 'format_id': source_attributes.get('label'),
- })
- f.update(formats[0])
- media_info['formats'].append(f)
- else:
- media_info['formats'].extend(formats)
- for track_tag in re.findall(r'<track[^>]+>', media_content):
- track_attributes = extract_attributes(track_tag)
- kind = track_attributes.get('kind')
- if not kind or kind in ('subtitles', 'captions'):
- src = track_attributes.get('src')
- if not src:
- continue
- lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
- media_info['subtitles'].setdefault(lang, []).append({
- 'url': absolute_url(src),
- })
- for f in media_info['formats']:
- f.setdefault('http_headers', {})['Referer'] = base_url
- if media_info['formats'] or media_info['subtitles']:
- entries.append(media_info)
- return entries
-
- def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
- formats = []
- hdcore_sign = 'hdcore=3.7.0'
- f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
- hds_host = hosts.get('hds')
- if hds_host:
- f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
- if 'hdcore=' not in f4m_url:
- f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
- f4m_formats = self._extract_f4m_formats(
- f4m_url, video_id, f4m_id='hds', fatal=False)
- for entry in f4m_formats:
- entry.update({'extra_param_to_segment_url': hdcore_sign})
- formats.extend(f4m_formats)
- m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
- hls_host = hosts.get('hls')
- if hls_host:
- m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- return formats
-
- def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
- query = compat_urlparse.urlparse(url).query
- url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
- mobj = re.search(
- r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
- url_base = mobj.group('url')
- http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base)
- formats = []
-
- def manifest_url(manifest):
- m_url = '%s/%s' % (http_base_url, manifest)
- if query:
- m_url += '?%s' % query
- return m_url
-
- if 'm3u8' not in skip_protocols:
- formats.extend(self._extract_m3u8_formats(
- manifest_url('playlist.m3u8'), video_id, 'mp4',
- m3u8_entry_protocol, m3u8_id='hls', fatal=False))
- if 'f4m' not in skip_protocols:
- formats.extend(self._extract_f4m_formats(
- manifest_url('manifest.f4m'),
- video_id, f4m_id='hds', fatal=False))
- if 'dash' not in skip_protocols:
- formats.extend(self._extract_mpd_formats(
- manifest_url('manifest.mpd'),
- video_id, mpd_id='dash', fatal=False))
- if re.search(r'(?:/smil:|\.smil)', url_base):
- if 'smil' not in skip_protocols:
- rtmp_formats = self._extract_smil_formats(
- manifest_url('jwplayer.smil'),
- video_id, fatal=False)
- for rtmp_format in rtmp_formats:
- rtsp_format = rtmp_format.copy()
- rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
- del rtsp_format['play_path']
- del rtsp_format['ext']
- rtsp_format.update({
- 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
- 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
- 'protocol': 'rtsp',
- })
- formats.extend([rtmp_format, rtsp_format])
- else:
- for protocol in ('rtmp', 'rtsp'):
- if protocol not in skip_protocols:
- formats.append({
- 'url': '%s:%s' % (protocol, url_base),
- 'format_id': protocol,
- 'protocol': protocol,
- })
- return formats
-
- def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
- mobj = re.search(
- r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
- webpage)
- if mobj:
- try:
- jwplayer_data = self._parse_json(mobj.group('options'),
- video_id=video_id,
- transform_source=transform_source)
- except ExtractorError:
- pass
- else:
- if isinstance(jwplayer_data, dict):
- return jwplayer_data
-
- def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
- jwplayer_data = self._find_jwplayer_data(
- webpage, video_id, transform_source=js_to_json)
- return self._parse_jwplayer_data(
- jwplayer_data, video_id, *args, **kwargs)
-
- def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
- m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
- # JWPlayer backward compatibility: flattened playlists
- # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
- if 'playlist' not in jwplayer_data:
- jwplayer_data = {'playlist': [jwplayer_data]}
-
- entries = []
-
- # JWPlayer backward compatibility: single playlist item
- # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
- if not isinstance(jwplayer_data['playlist'], list):
- jwplayer_data['playlist'] = [jwplayer_data['playlist']]
-
- for video_data in jwplayer_data['playlist']:
- # JWPlayer backward compatibility: flattened sources
- # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
- if 'sources' not in video_data:
- video_data['sources'] = [video_data]
-
- this_video_id = video_id or video_data['mediaid']
-
- formats = self._parse_jwplayer_formats(
- video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
- mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
-
- subtitles = {}
- tracks = video_data.get('tracks')
- if tracks and isinstance(tracks, list):
- for track in tracks:
- if not isinstance(track, dict):
- continue
- track_kind = track.get('kind')
- if not track_kind or not isinstance(track_kind, compat_str):
- continue
- if track_kind.lower() not in ('captions', 'subtitles'):
- continue
- track_url = urljoin(base_url, track.get('file'))
- if not track_url:
- continue
- subtitles.setdefault(track.get('label') or 'en', []).append({
- 'url': self._proto_relative_url(track_url)
- })
-
- entry = {
- 'id': this_video_id,
- 'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
- 'description': video_data.get('description'),
- 'thumbnail': self._proto_relative_url(video_data.get('image')),
- 'timestamp': int_or_none(video_data.get('pubdate')),
- 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
- 'subtitles': subtitles,
- }
- # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
- if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
- entry.update({
- '_type': 'url_transparent',
- 'url': formats[0]['url'],
- })
- else:
- self._sort_formats(formats)
- entry['formats'] = formats
- entries.append(entry)
- if len(entries) == 1:
- return entries[0]
- else:
- return self.playlist_result(entries)
-
- def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
- m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
- urls = []
- formats = []
- for source in jwplayer_sources_data:
- if not isinstance(source, dict):
- continue
- source_url = self._proto_relative_url(source.get('file'))
- if not source_url:
- continue
- if base_url:
- source_url = compat_urlparse.urljoin(base_url, source_url)
- if source_url in urls:
- continue
- urls.append(source_url)
- source_type = source.get('type') or ''
- ext = mimetype2ext(source_type) or determine_ext(source_url)
- if source_type == 'hls' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- source_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id=m3u8_id, fatal=False))
- elif source_type == 'dash' or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- source_url, video_id, mpd_id=mpd_id, fatal=False))
- elif ext == 'smil':
- formats.extend(self._extract_smil_formats(
- source_url, video_id, fatal=False))
- # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
- elif source_type.startswith('audio') or ext in (
- 'oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
- formats.append({
- 'url': source_url,
- 'vcodec': 'none',
- 'ext': ext,
- })
- else:
- height = int_or_none(source.get('height'))
- if height is None:
- # Often no height is provided but there is a label in
- # format like "1080p", "720p SD", or 1080.
- height = int_or_none(self._search_regex(
- r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
- 'height', default=None))
- a_format = {
- 'url': source_url,
- 'width': int_or_none(source.get('width')),
- 'height': height,
- 'tbr': int_or_none(source.get('bitrate')),
- 'ext': ext,
- }
- if source_url.startswith('rtmp'):
- a_format['ext'] = 'flv'
- # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
- # of jwplayer.flash.swf
- rtmp_url_parts = re.split(
- r'((?:mp4|mp3|flv):)', source_url, 1)
- if len(rtmp_url_parts) == 3:
- rtmp_url, prefix, play_path = rtmp_url_parts
- a_format.update({
- 'url': rtmp_url,
- 'play_path': prefix + play_path,
- })
- if rtmp_params:
- a_format.update(rtmp_params)
- formats.append(a_format)
- return formats
-
- def _live_title(self, name):
- """ Generate the title for a live video """
- now = datetime.datetime.now()
- now_str = now.strftime('%Y-%m-%d %H:%M')
- return name + ' ' + now_str
-
- def _int(self, v, name, fatal=False, **kwargs):
- res = int_or_none(v, **kwargs)
- if 'get_attr' in kwargs:
- print(getattr(v, kwargs['get_attr']))
- if res is None:
- msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
- if fatal:
- raise ExtractorError(msg)
- else:
- self._downloader.report_warning(msg)
- return res
-
- def _float(self, v, name, fatal=False, **kwargs):
- res = float_or_none(v, **kwargs)
- if res is None:
- msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
- if fatal:
- raise ExtractorError(msg)
- else:
- self._downloader.report_warning(msg)
- return res
-
- def _set_cookie(self, domain, name, value, expire_time=None, port=None,
- path='/', secure=False, discard=False, rest={}, **kwargs):
- cookie = compat_cookiejar.Cookie(
- 0, name, value, port, port is not None, domain, True,
- domain.startswith('.'), path, True, secure, expire_time,
- discard, None, None, rest)
- self._downloader.cookiejar.set_cookie(cookie)
-
- def _get_cookies(self, url):
- """ Return a compat_cookies.SimpleCookie with the cookies for the url """
- req = sanitized_Request(url)
- self._downloader.cookiejar.add_cookie_header(req)
- return compat_cookies.SimpleCookie(req.get_header('Cookie'))
-
- def get_testcases(self, include_onlymatching=False):
- t = getattr(self, '_TEST', None)
- if t:
- assert not hasattr(self, '_TESTS'), \
- '%s has _TEST and _TESTS' % type(self).__name__
- tests = [t]
- else:
- tests = getattr(self, '_TESTS', [])
- for t in tests:
- if not include_onlymatching and t.get('only_matching', False):
- continue
- t['name'] = type(self).__name__[:-len('IE')]
- yield t
-
- def is_suitable(self, age_limit):
- """ Test whether the extractor is generally suitable for the given
- age limit (i.e. pornographic sites are not, all others usually are) """
-
- any_restricted = False
- for tc in self.get_testcases(include_onlymatching=False):
- if tc.get('playlist', []):
- tc = tc['playlist'][0]
- is_restricted = age_restricted(
- tc.get('info_dict', {}).get('age_limit'), age_limit)
- if not is_restricted:
- return True
- any_restricted = any_restricted or is_restricted
- return not any_restricted
-
- def extract_subtitles(self, *args, **kwargs):
- if (self._downloader.params.get('writesubtitles', False) or
- self._downloader.params.get('listsubtitles')):
- return self._get_subtitles(*args, **kwargs)
- return {}
-
- def _get_subtitles(self, *args, **kwargs):
- raise NotImplementedError('This method must be implemented by subclasses')
-
- @staticmethod
- def _merge_subtitle_items(subtitle_list1, subtitle_list2):
- """ Merge subtitle items for one language. Items with duplicated URLs
- will be dropped. """
- list1_urls = set([item['url'] for item in subtitle_list1])
- ret = list(subtitle_list1)
- ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
- return ret
-
- @classmethod
- def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
- """ Merge two subtitle dictionaries, language by language. """
- ret = dict(subtitle_dict1)
- for lang in subtitle_dict2:
- ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
- return ret
-
- def extract_automatic_captions(self, *args, **kwargs):
- if (self._downloader.params.get('writeautomaticsub', False) or
- self._downloader.params.get('listsubtitles')):
- return self._get_automatic_captions(*args, **kwargs)
- return {}
-
- def _get_automatic_captions(self, *args, **kwargs):
- raise NotImplementedError('This method must be implemented by subclasses')
-
- def mark_watched(self, *args, **kwargs):
- if (self._downloader.params.get('mark_watched', False) and
- (self._get_login_info()[0] is not None or
- self._downloader.params.get('cookiefile') is not None)):
- self._mark_watched(*args, **kwargs)
-
- def _mark_watched(self, *args, **kwargs):
- raise NotImplementedError('This method must be implemented by subclasses')
-
- def geo_verification_headers(self):
- headers = {}
- geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
- if geo_verification_proxy:
- headers['Ytdl-request-proxy'] = geo_verification_proxy
- return headers
-
- def _generic_id(self, url):
- return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
-
- def _generic_title(self, url):
- return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
-
-
-class SearchInfoExtractor(InfoExtractor):
- """
- Base class for paged search queries extractors.
- They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query}
- Instances should define _SEARCH_KEY and _MAX_RESULTS.
- """
-
- @classmethod
- def _make_valid_url(cls):
- return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
-
- @classmethod
- def suitable(cls, url):
- return re.match(cls._make_valid_url(), url) is not None
-
- def _real_extract(self, query):
- mobj = re.match(self._make_valid_url(), query)
- if mobj is None:
- raise ExtractorError('Invalid search query "%s"' % query)
-
- prefix = mobj.group('prefix')
- query = mobj.group('query')
- if prefix == '':
- return self._get_n_results(query, 1)
- elif prefix == 'all':
- return self._get_n_results(query, self._MAX_RESULTS)
- else:
- n = int(prefix)
- if n <= 0:
- raise ExtractorError('invalid download number %s for query "%s"' % (n, query))
- elif n > self._MAX_RESULTS:
- self._downloader.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
- n = self._MAX_RESULTS
- return self._get_n_results(query, n)
-
- def _get_n_results(self, query, n):
- """Get a specified number of results for a query"""
- raise NotImplementedError('This method must be implemented by subclasses')
-
- @property
- def SEARCH_KEY(self):
- return self._SEARCH_KEY
diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py
deleted file mode 100644
index 79f7a9c..0000000
--- a/youtube_dl/extractor/commonmistakes.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from __future__ import unicode_literals
-
-import sys
-
-from .common import InfoExtractor
-from ..utils import ExtractorError
-
-
-class CommonMistakesIE(InfoExtractor):
- IE_DESC = False # Do not list
- _VALID_URL = r'''(?x)
- (?:url|URL)$
- '''
-
- _TESTS = [{
- 'url': 'url',
- 'only_matching': True,
- }, {
- 'url': 'URL',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- msg = (
- 'You\'ve asked youtube-dl to download the URL "%s". '
- 'That doesn\'t make any sense. '
- 'Simply remove the parameter in your command or configuration.'
- ) % url
- if not self._downloader.params.get('verbose'):
- msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
- raise ExtractorError(msg, expected=True)
-
-
-class UnicodeBOMIE(InfoExtractor):
- IE_DESC = False
- _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
-
- # Disable test for python 3.2 since BOM is broken in re in this version
- # (see https://github.com/rg3/youtube-dl/issues/9751)
- _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
- 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- real_url = self._match_id(url)
- self.report_warning(
- 'Your URL starts with a Byte Order Mark (BOM). '
- 'Removing the BOM and looking for "%s" ...' % real_url)
- return self.url_result(real_url)
diff --git a/youtube_dl/extractor/commonprotocols.py b/youtube_dl/extractor/commonprotocols.py
deleted file mode 100644
index d98331a..0000000
--- a/youtube_dl/extractor/commonprotocols.py
+++ /dev/null
@@ -1,60 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import (
- compat_urlparse,
-)
-
-
-class RtmpIE(InfoExtractor):
- IE_DESC = False # Do not list
- _VALID_URL = r'(?i)rtmp[est]?://.+'
-
- _TESTS = [{
- 'url': 'rtmp://cp44293.edgefcs.net/ondemand?auth=daEcTdydfdqcsb8cZcDbAaCbhamacbbawaS-bw7dBb-bWG-GqpGFqCpNCnGoyL&aifp=v001&slist=public/unsecure/audio/2c97899446428e4301471a8cb72b4b97--audio--pmg-20110908-0900a_flv_aac_med_int.mp4',
- 'only_matching': True,
- }, {
- 'url': 'rtmp://edge.live.hitbox.tv/live/dimak',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._generic_id(url)
- title = self._generic_title(url)
- return {
- 'id': video_id,
- 'title': title,
- 'formats': [{
- 'url': url,
- 'ext': 'flv',
- 'format_id': compat_urlparse.urlparse(url).scheme,
- }],
- }
-
-
-class MmsIE(InfoExtractor):
- IE_DESC = False # Do not list
- _VALID_URL = r'(?i)mms://.+'
-
- _TEST = {
- # Direct MMS link
- 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv',
- 'info_dict': {
- 'id': 'MilesReid(0709)',
- 'ext': 'wmv',
- 'title': 'MilesReid(0709)',
- },
- 'params': {
- 'skip_download': True, # rtsp downloads, requiring mplayer or mpv
- },
- }
-
- def _real_extract(self, url):
- video_id = self._generic_id(url)
- title = self._generic_title(url)
-
- return {
- 'id': video_id,
- 'title': title,
- 'url': url,
- }
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
deleted file mode 100644
index ceb72da..0000000
--- a/youtube_dl/extractor/extractors.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# flake8: noqa
-from __future__ import unicode_literals
-
-
-from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
-from .commonprotocols import (
- MmsIE,
- RtmpIE,
-)
-
-from .openload import OpenloadIE
-
-from .youtube import (
- YoutubeIE,
- YoutubeChannelIE,
- YoutubeFavouritesIE,
- YoutubeHistoryIE,
- YoutubeLiveIE,
- YoutubePlaylistIE,
- YoutubePlaylistsIE,
- YoutubeRecommendedIE,
- YoutubeSearchDateIE,
- YoutubeSearchIE,
- YoutubeSearchURLIE,
- YoutubeShowIE,
- YoutubeSubscriptionsIE,
- YoutubeTruncatedIDIE,
- YoutubeTruncatedURLIE,
- YoutubeUserIE,
- YoutubeWatchLaterIE,
-)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
deleted file mode 100644
index aa04905..0000000
--- a/youtube_dl/extractor/generic.py
+++ /dev/null
@@ -1,3335 +0,0 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-import os
-import re
-import sys
-
-from .common import InfoExtractor
-from .youtube import YoutubeIE
-from ..compat import (
- compat_etree_fromstring,
- compat_str,
- compat_urllib_parse_unquote,
- compat_urlparse,
- compat_xml_parse_error,
-)
-from ..utils import (
- determine_ext,
- ExtractorError,
- float_or_none,
- HEADRequest,
- is_html,
- js_to_json,
- KNOWN_EXTENSIONS,
- merge_dicts,
- mimetype2ext,
- orderedSet,
- sanitized_Request,
- smuggle_url,
- unescapeHTML,
- unified_strdate,
- unsmuggle_url,
- UnsupportedError,
- xpath_text,
-)
-from .commonprotocols import RtmpIE
-from .brightcove import (
- BrightcoveLegacyIE,
- BrightcoveNewIE,
-)
-from .nexx import (
- NexxIE,
- NexxEmbedIE,
-)
-from .nbc import NBCSportsVPlayerIE
-from .ooyala import OoyalaIE
-from .rutv import RUTVIE
-from .tvc import TVCIE
-from .sportbox import SportBoxEmbedIE
-from .smotri import SmotriIE
-from .myvi import MyviIE
-from .condenast import CondeNastIE
-from .udn import UDNEmbedIE
-from .senateisvp import SenateISVPIE
-from .svt import SVTIE
-from .pornhub import PornHubIE
-from .xhamster import XHamsterEmbedIE
-from .tnaflix import TNAFlixNetworkEmbedIE
-from .drtuber import DrTuberIE
-from .redtube import RedTubeIE
-from .tube8 import Tube8IE
-from .vimeo import VimeoIE
-from .dailymotion import DailymotionIE
-from .dailymail import DailyMailIE
-from .onionstudios import OnionStudiosIE
-from .viewlift import ViewLiftEmbedIE
-from .mtv import MTVServicesEmbeddedIE
-from .pladform import PladformIE
-from .videomore import VideomoreIE
-from .webcaster import WebcasterFeedIE
-from .googledrive import GoogleDriveIE
-from .jwplatform import JWPlatformIE
-from .digiteka import DigitekaIE
-from .arkena import ArkenaIE
-from .instagram import InstagramIE
-from .liveleak import LiveLeakIE
-from .threeqsdn import ThreeQSDNIE
-from .theplatform import ThePlatformIE
-from .vessel import VesselIE
-from .kaltura import KalturaIE
-from .eagleplatform import EaglePlatformIE
-from .facebook import FacebookIE
-from .soundcloud import SoundcloudIE
-from .tunein import TuneInBaseIE
-from .vbox7 import Vbox7IE
-from .dbtv import DBTVIE
-from .piksel import PikselIE
-from .videa import VideaIE
-from .twentymin import TwentyMinutenIE
-from .ustream import UstreamIE
-from .openload import OpenloadIE
-from .videopress import VideoPressIE
-from .rutube import RutubeIE
-from .limelight import LimelightBaseIE
-from .anvato import AnvatoIE
-from .washingtonpost import WashingtonPostIE
-from .wistia import WistiaIE
-from .mediaset import MediasetIE
-from .joj import JojIE
-from .megaphone import MegaphoneIE
-from .vzaar import VzaarIE
-from .channel9 import Channel9IE
-from .vshare import VShareIE
-from .mediasite import MediasiteIE
-from .springboardplatform import SpringboardPlatformIE
-from .yapfiles import YapFilesIE
-from .vice import ViceIE
-from .xfileshare import XFileShareIE
-from .cloudflarestream import CloudflareStreamIE
-from .peertube import PeerTubeIE
-from .indavideo import IndavideoEmbedIE
-from .apa import APAIE
-from .foxnews import FoxNewsIE
-
-
-class GenericIE(InfoExtractor):
- IE_DESC = 'Generic downloader that works on some sites'
- _VALID_URL = r'.*'
- IE_NAME = 'generic'
- _TESTS = [
- # Direct link to a video
- {
- 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
- 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
- 'info_dict': {
- 'id': 'trailer',
- 'ext': 'mp4',
- 'title': 'trailer',
- 'upload_date': '20100513',
- }
- },
- # Direct link to media delivered compressed (until Accept-Encoding is *)
- {
- 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
- 'md5': '128c42e68b13950268b648275386fc74',
- 'info_dict': {
- 'id': 'FictionJunction-Parallel_Hearts',
- 'ext': 'flac',
- 'title': 'FictionJunction-Parallel_Hearts',
- 'upload_date': '20140522',
- },
- 'expected_warnings': [
- 'URL could be a direct video link, returning it as such.'
- ],
- 'skip': 'URL invalid',
- },
- # Direct download with broken HEAD
- {
- 'url': 'http://ai-radio.org:8000/radio.opus',
- 'info_dict': {
- 'id': 'radio',
- 'ext': 'opus',
- 'title': 'radio',
- },
- 'params': {
- 'skip_download': True, # infinite live stream
- },
- 'expected_warnings': [
- r'501.*Not Implemented',
- r'400.*Bad Request',
- ],
- },
- # Direct link with incorrect MIME type
- {
- 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
- 'md5': '4ccbebe5f36706d85221f204d7eb5913',
- 'info_dict': {
- 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
- 'id': '5_Lennart_Poettering_-_Systemd',
- 'ext': 'webm',
- 'title': '5_Lennart_Poettering_-_Systemd',
- 'upload_date': '20141120',
- },
- 'expected_warnings': [
- 'URL could be a direct video link, returning it as such.'
- ]
- },
- # RSS feed
- {
- 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
- 'info_dict': {
- 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
- 'title': 'Zero Punctuation',
- 'description': 're:.*groundbreaking video review series.*'
- },
- 'playlist_mincount': 11,
- },
- # RSS feed with enclosure
- {
- 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
- 'info_dict': {
- 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
- 'ext': 'm4v',
- 'upload_date': '20150228',
- 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
- }
- },
- # RSS feed with enclosures and unsupported link URLs
- {
- 'url': 'http://www.hellointernet.fm/podcast?format=rss',
- 'info_dict': {
- 'id': 'http://www.hellointernet.fm/podcast?format=rss',
- 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
- 'title': 'Hello Internet',
- },
- 'playlist_mincount': 100,
- },
- # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
- {
- 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
- 'info_dict': {
- 'id': 'smil',
- 'ext': 'mp4',
- 'title': 'Automatics, robotics and biocybernetics',
- 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
- 'upload_date': '20130627',
- 'formats': 'mincount:16',
- 'subtitles': 'mincount:1',
- },
- 'params': {
- 'force_generic_extractor': True,
- 'skip_download': True,
- },
- },
- # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
- {
- 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
- 'info_dict': {
- 'id': 'hds',
- 'ext': 'flv',
- 'title': 'hds',
- 'formats': 'mincount:1',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # SMIL from https://www.restudy.dk/video/play/id/1637
- {
- 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
- 'info_dict': {
- 'id': 'video_1637',
- 'ext': 'flv',
- 'title': 'video_1637',
- 'formats': 'mincount:3',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
- {
- 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
- 'info_dict': {
- 'id': 'smil-service',
- 'ext': 'flv',
- 'title': 'smil-service',
- 'formats': 'mincount:1',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
- {
- 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
- 'info_dict': {
- 'id': '4719370',
- 'ext': 'mp4',
- 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
- 'formats': 'mincount:3',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
- {
- 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
- 'info_dict': {
- 'id': 'mZlp2ctYIUEB',
- 'ext': 'mp4',
- 'title': 'Tikibad ontruimd wegens brand',
- 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 33,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # MPD from http://dash-mse-test.appspot.com/media.html
- {
- 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
- 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
- 'info_dict': {
- 'id': 'car-20120827-manifest',
- 'ext': 'mp4',
- 'title': 'car-20120827-manifest',
- 'formats': 'mincount:9',
- 'upload_date': '20130904',
- },
- 'params': {
- 'format': 'bestvideo',
- },
- },
- # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
- {
- 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
- 'info_dict': {
- 'id': 'content',
- 'ext': 'mp4',
- 'title': 'content',
- 'formats': 'mincount:8',
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- },
- 'skip': 'video gone',
- },
- # m3u8 served with Content-Type: text/plain
- {
- 'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
- 'info_dict': {
- 'id': 'index',
- 'ext': 'mp4',
- 'title': 'index',
- 'upload_date': '20140720',
- 'formats': 'mincount:11',
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- },
- 'skip': 'video gone',
- },
- # google redirect
- {
- 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
- 'info_dict': {
- 'id': 'cmQHVoWB5FY',
- 'ext': 'mp4',
- 'upload_date': '20130224',
- 'uploader_id': 'TheVerge',
- 'description': r're:^Chris Ziegler takes a look at the\.*',
- 'uploader': 'The Verge',
- 'title': 'First Firefox OS phones side-by-side',
- },
- 'params': {
- 'skip_download': False,
- }
- },
- {
- # redirect in Refresh HTTP header
- 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
- 'info_dict': {
- 'id': 'pO8h3EaFRdo',
- 'ext': 'mp4',
- 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
- 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
- 'upload_date': '20150917',
- 'uploader_id': 'brtvofficial',
- 'uploader': 'Boiler Room',
- },
- 'params': {
- 'skip_download': False,
- },
- },
- {
- 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
- 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
- 'info_dict': {
- 'id': '13601338388002',
- 'ext': 'mp4',
- 'uploader': 'www.hodiho.fr',
- 'title': 'R\u00e9gis plante sa Jeep',
- }
- },
- # bandcamp page with custom domain
- {
- 'add_ie': ['Bandcamp'],
- 'url': 'http://bronyrock.com/track/the-pony-mash',
- 'info_dict': {
- 'id': '3235767654',
- 'ext': 'mp3',
- 'title': 'The Pony Mash',
- 'uploader': 'M_Pallante',
- },
- 'skip': 'There is a limit of 200 free downloads / month for the test song',
- },
- {
- # embedded brightcove video
- # it also tests brightcove videos that need to set the 'Referer'
- # in the http requests
- 'add_ie': ['BrightcoveLegacy'],
- 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
- 'info_dict': {
- 'id': '2765128793001',
- 'ext': 'mp4',
- 'title': 'Le cours de bourse : l’analyse technique',
- 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
- 'uploader': 'BFM BUSINESS',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # embedded with itemprop embedURL and video id spelled as `idVideo`
- 'add_id': ['BrightcoveLegacy'],
- 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
- 'info_dict': {
- 'id': '5255628253001',
- 'ext': 'mp4',
- 'title': 'md5:37c519b1128915607601e75a87995fc0',
- 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
- 'uploader': 'BFM BUSINESS',
- 'uploader_id': '876450612001',
- 'timestamp': 1482255315,
- 'upload_date': '20161220',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # https://github.com/rg3/youtube-dl/issues/2253
- 'url': 'http://bcove.me/i6nfkrc3',
- 'md5': '0ba9446db037002366bab3b3eb30c88c',
- 'info_dict': {
- 'id': '3101154703001',
- 'ext': 'mp4',
- 'title': 'Still no power',
- 'uploader': 'thestar.com',
- 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
- },
- 'add_ie': ['BrightcoveLegacy'],
- 'skip': 'video gone',
- },
- {
- 'url': 'http://www.championat.com/video/football/v/87/87499.html',
- 'md5': 'fb973ecf6e4a78a67453647444222983',
- 'info_dict': {
- 'id': '3414141473001',
- 'ext': 'mp4',
- 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
- 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
- 'uploader': 'Championat',
- },
- },
- {
- # https://github.com/rg3/youtube-dl/issues/3541
- 'add_ie': ['BrightcoveLegacy'],
- 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
- 'info_dict': {
- 'id': '3866516442001',
- 'ext': 'mp4',
- 'title': 'Leer mij vrouwen kennen: Aflevering 1',
- 'description': 'Leer mij vrouwen kennen: Aflevering 1',
- 'uploader': 'SBS Broadcasting',
- },
- 'skip': 'Restricted to Netherlands',
- 'params': {
- 'skip_download': True, # m3u8 download
- },
- },
- {
- # Brightcove video in <iframe>
- 'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
- 'md5': '36d74ef5e37c8b4a2ce92880d208b968',
- 'info_dict': {
- 'id': '5360463607001',
- 'ext': 'mp4',
- 'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活',
- 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
- 'uploader': 'United Nations',
- 'uploader_id': '1362235914001',
- 'timestamp': 1489593889,
- 'upload_date': '20170315',
- },
- 'add_ie': ['BrightcoveLegacy'],
- },
- {
- # Brightcove with alternative playerID key
- 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
- 'info_dict': {
- 'id': 'nmeth.2062_SV1',
- 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': '2228375078001',
- 'ext': 'mp4',
- 'title': 'nmeth.2062-sv1',
- 'description': 'nmeth.2062-sv1',
- 'timestamp': 1363357591,
- 'upload_date': '20130315',
- 'uploader': 'Nature Publishing Group',
- 'uploader_id': '1964492299001',
- },
- }],
- },
- {
- # Brightcove with UUID in videoPlayer
- 'url': 'http://www8.hp.com/cn/zh/home.html',
- 'info_dict': {
- 'id': '5255815316001',
- 'ext': 'mp4',
- 'title': 'Sprocket Video - China',
- 'description': 'Sprocket Video - China',
- 'uploader': 'HP-Video Gallery',
- 'timestamp': 1482263210,
- 'upload_date': '20161220',
- 'uploader_id': '1107601872001',
- },
- 'params': {
- 'skip_download': True, # m3u8 download
- },
- 'skip': 'video rotates...weekly?',
- },
- {
- # Brightcove:new type [2].
- 'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
- 'md5': '2b35148fcf48da41c9fb4591650784f3',
- 'info_dict': {
- 'id': '5348741021001',
- 'ext': 'mp4',
- 'upload_date': '20170306',
- 'uploader_id': '4191638492001',
- 'timestamp': 1488769918,
- 'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
-
- },
- },
- {
- # Alternative brightcove <video> attributes
- 'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
- 'info_dict': {
- 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
- 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
- },
- 'playlist': [{
- 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
- 'info_dict': {
- 'id': '5311302538001',
- 'ext': 'mp4',
- 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
- 'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
- 'timestamp': 1486321708,
- 'upload_date': '20170205',
- 'uploader_id': '800000640001',
- },
- 'only_matching': True,
- }],
- },
- {
- # Brightcove with UUID in videoPlayer
- 'url': 'http://www8.hp.com/cn/zh/home.html',
- 'info_dict': {
- 'id': '5255815316001',
- 'ext': 'mp4',
- 'title': 'Sprocket Video - China',
- 'description': 'Sprocket Video - China',
- 'uploader': 'HP-Video Gallery',
- 'timestamp': 1482263210,
- 'upload_date': '20161220',
- 'uploader_id': '1107601872001',
- },
- 'params': {
- 'skip_download': True, # m3u8 download
- },
- },
- # ooyala video
- {
- 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
- 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
- 'info_dict': {
- 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
- 'ext': 'mp4',
- 'title': '2cc213299525360.mov', # that's what we get
- 'duration': 238.231,
- },
- 'add_ie': ['Ooyala'],
- },
- {
- # ooyala video embedded with http://player.ooyala.com/iframe.js
- 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
- 'info_dict': {
- 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
- 'ext': 'mp4',
- 'title': '"Steve Jobs: Man in the Machine" trailer',
- 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
- 'duration': 135.427,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'movie expired',
- },
- # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
- {
- 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
- 'info_dict': {
- 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
- 'ext': 'mp4',
- 'title': 'Steampunk Fest Comes to Honesdale',
- 'duration': 43.276,
- },
- 'params': {
- 'skip_download': True,
- }
- },
- # embed.ly video
- {
- 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
- 'info_dict': {
- 'id': '9ODmcdjQcHQ',
- 'ext': 'mp4',
- 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
- 'upload_date': '20140225',
- 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
- 'uploader': 'Tested',
- 'uploader_id': 'testedcom',
- },
- # No need to test YoutubeIE here
- 'params': {
- 'skip_download': True,
- },
- },
- # funnyordie embed
- {
- 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
- 'info_dict': {
- 'id': '18e820ec3f',
- 'ext': 'mp4',
- 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
- 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
- },
- # HEAD requests lead to endless 301, while GET is OK
- 'expected_warnings': ['301'],
- },
- # RUTV embed
- {
- 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
- 'info_dict': {
- 'id': '776940',
- 'ext': 'mp4',
- 'title': 'Охотское море стало целиком российским',
- 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- # TVC embed
- {
- 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
- 'info_dict': {
- 'id': '55304',
- 'ext': 'mp4',
- 'title': 'Дошкольное воспитание',
- },
- },
- # SportBox embed
- {
- 'url': 'http://www.vestifinance.ru/articles/25753',
- 'info_dict': {
- 'id': '25753',
- 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': '370908',
- 'title': 'Госзаказ. День 3',
- 'ext': 'mp4',
- }
- }, {
- 'info_dict': {
- 'id': '370905',
- 'title': 'Госзаказ. День 2',
- 'ext': 'mp4',
- }
- }, {
- 'info_dict': {
- 'id': '370902',
- 'title': 'Госзаказ. День 1',
- 'ext': 'mp4',
- }
- }],
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- # Myvi.ru embed
- {
- 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
- 'info_dict': {
- 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
- 'ext': 'mp4',
- 'title': 'Ужастики, русский трейлер (2015)',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 153,
- }
- },
- # XHamster embed
- {
- 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
- 'info_dict': {
- 'id': 'showthread',
- 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
- },
- 'playlist_mincount': 7,
- # This forum does not allow <iframe> syntaxes anymore
- # Now HTML tags are displayed as-is
- 'skip': 'No videos on this page',
- },
- # Embedded TED video
- {
- 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
- 'md5': '65fdff94098e4a607385a60c5177c638',
- 'info_dict': {
- 'id': '1969',
- 'ext': 'mp4',
- 'title': 'Hidden miracles of the natural world',
- 'uploader': 'Louie Schwartzberg',
- 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
- }
- },
- # nowvideo embed hidden behind percent encoding
- {
- 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
- 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
- 'info_dict': {
- 'id': '06e53103ca9aa',
- 'ext': 'flv',
- 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
- 'description': 'No description',
- },
- },
- # arte embed
- {
- 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
- 'md5': '7653032cbb25bf6c80d80f217055fa43',
- 'info_dict': {
- 'id': '048195-004_PLUS7-F',
- 'ext': 'flv',
- 'title': 'X:enius',
- 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
- 'upload_date': '20140320',
- },
- 'params': {
- 'skip_download': 'Requires rtmpdump'
- },
- 'skip': 'video gone',
- },
- # francetv embed
- {
- 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
- 'info_dict': {
- 'id': 'EV_30231',
- 'ext': 'mp4',
- 'title': 'Alcaline, le concert avec Calogero',
- 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
- 'upload_date': '20150226',
- 'timestamp': 1424989860,
- 'duration': 5400,
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- },
- 'expected_warnings': [
- 'Forbidden'
- ]
- },
- # Condé Nast embed
- {
- 'url': 'http://www.wired.com/2014/04/honda-asimo/',
- 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
- 'info_dict': {
- 'id': '53501be369702d3275860000',
- 'ext': 'mp4',
- 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
- }
- },
- # Dailymotion embed
- {
- 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
- 'md5': '441aeeb82eb72c422c7f14ec533999cd',
- 'info_dict': {
- 'id': 'k2mm4bCdJ6CQ2i7c8o2',
- 'ext': 'mp4',
- 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
- 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
- 'uploader': 'Spi0n',
- 'uploader_id': 'xgditw',
- 'upload_date': '20140425',
- 'timestamp': 1398441542,
- },
- 'add_ie': ['Dailymotion'],
- },
- # DailyMail embed
- {
- 'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
- 'info_dict': {
- 'id': '1495629',
- 'ext': 'mp4',
- 'title': 'Care worker punches elderly dementia patient in head 11 times',
- 'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
- },
- 'add_ie': ['DailyMail'],
- 'params': {
- 'skip_download': True,
- },
- },
- # YouTube embed
- {
- 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
- 'info_dict': {
- 'id': 'FXRb4ykk4S0',
- 'ext': 'mp4',
- 'title': 'The NBL Auction 2014',
- 'uploader': 'BADMINTON England',
- 'uploader_id': 'BADMINTONEvents',
- 'upload_date': '20140603',
- 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
- },
- 'add_ie': ['Youtube'],
- 'params': {
- 'skip_download': True,
- }
- },
- # MTVSercices embed
- {
- 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
- 'md5': 'ca1aef97695ef2c1d6973256a57e5252',
- 'info_dict': {
- 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
- 'ext': 'mp4',
- 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
- 'description': 'Two valets share their love for movie star Liam Neesons.',
- 'timestamp': 1349922600,
- 'upload_date': '20121011',
- },
- },
- # YouTube embed via <data-embed-url="">
- {
- 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
- 'info_dict': {
- 'id': '4vAffPZIT44',
- 'ext': 'mp4',
- 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
- 'uploader': 'Gameloft',
- 'uploader_id': 'gameloft',
- 'upload_date': '20140828',
- 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
- },
- 'params': {
- 'skip_download': True,
- }
- },
- # YouTube <object> embed
- {
- 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
- 'md5': '516718101ec834f74318df76259fb3cc',
- 'info_dict': {
- 'id': 'msN87y-iEx0',
- 'ext': 'webm',
- 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
- 'upload_date': '20080526',
- 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
- 'uploader': 'Christopher Sykes',
- 'uploader_id': 'ChristopherJSykes',
- },
- 'add_ie': ['Youtube'],
- },
- # Camtasia studio
- {
- 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
- 'playlist': [{
- 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
- 'info_dict': {
- 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
- 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
- 'ext': 'flv',
- 'duration': 2235.90,
- }
- }, {
- 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
- 'info_dict': {
- 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
- 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
- 'ext': 'flv',
- 'duration': 2235.93,
- }
- }],
- 'info_dict': {
- 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
- }
- },
- # Flowplayer
- {
- 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
- 'md5': '9d65602bf31c6e20014319c7d07fba27',
- 'info_dict': {
- 'id': '5123ea6d5e5a7',
- 'ext': 'mp4',
- 'age_limit': 18,
- 'uploader': 'www.handjobhub.com',
- 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
- }
- },
- # Multiple brightcove videos
- # https://github.com/rg3/youtube-dl/issues/2283
- {
- 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
- 'info_dict': {
- 'id': 'always-never',
- 'title': 'Always / Never - The New Yorker',
- },
- 'playlist_count': 3,
- 'params': {
- 'extract_flat': False,
- 'skip_download': True,
- }
- },
- # MLB embed
- {
- 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
- 'md5': '96f09a37e44da40dd083e12d9a683327',
- 'info_dict': {
- 'id': '33322633',
- 'ext': 'mp4',
- 'title': 'Ump changes call to ball',
- 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
- 'duration': 48,
- 'timestamp': 1401537900,
- 'upload_date': '20140531',
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- },
- # Wistia embed
- {
- 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
- 'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
- 'info_dict': {
- 'id': '6e2wtrbdaf',
- 'ext': 'mov',
- 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
- 'description': 'a Paywall Videos video from Remilon',
- 'duration': 644.072,
- 'uploader': 'study.com',
- 'timestamp': 1459678540,
- 'upload_date': '20160403',
- 'filesize': 24687186,
- },
- },
- {
- 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
- 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
- 'info_dict': {
- 'id': 'uxjb0lwrcz',
- 'ext': 'mp4',
- 'title': 'Conversation about Hexagonal Rails Part 1',
- 'description': 'a Martin Fowler video from ThoughtWorks',
- 'duration': 1715.0,
- 'uploader': 'thoughtworks.wistia.com',
- 'timestamp': 1401832161,
- 'upload_date': '20140603',
- },
- },
- # Wistia standard embed (async)
- {
- 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
- 'info_dict': {
- 'id': '807fafadvk',
- 'ext': 'mp4',
- 'title': 'Drip Brennan Dunn Workshop',
- 'description': 'a JV Webinars video from getdrip-1',
- 'duration': 4986.95,
- 'timestamp': 1463607249,
- 'upload_date': '20160518',
- },
- 'params': {
- 'skip_download': True,
- }
- },
- # Soundcloud embed
- {
- 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
- 'info_dict': {
- 'id': '174391317',
- 'ext': 'mp3',
- 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
- 'uploader': 'Sophos Security',
- 'title': 'Chet Chat 171 - Oct 29, 2014',
- 'upload_date': '20141029',
- }
- },
- # Soundcloud multiple embeds
- {
- 'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
- 'info_dict': {
- 'id': '52809',
- 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
- },
- 'playlist_mincount': 7,
- },
- # TuneIn station embed
- {
- 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
- 'info_dict': {
- 'id': '204146',
- 'ext': 'mp3',
- 'title': 'CNRV',
- 'location': 'Paris, France',
- 'is_live': True,
- },
- 'params': {
- # Live stream
- 'skip_download': True,
- },
- },
- # Livestream embed
- {
- 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
- 'info_dict': {
- 'id': '67864563',
- 'ext': 'flv',
- 'upload_date': '20141112',
- 'title': 'Rosetta #CometLanding webcast HL 10',
- }
- },
- # Another Livestream embed, without 'new.' in URL
- {
- 'url': 'https://www.freespeech.org/',
- 'info_dict': {
- 'id': '123537347',
- 'ext': 'mp4',
- 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- },
- 'params': {
- # Live stream
- 'skip_download': True,
- },
- },
- # LazyYT
- {
- 'url': 'https://skiplagged.com/',
- 'info_dict': {
- 'id': 'skiplagged',
- 'title': 'Skiplagged: The smart way to find cheap flights',
- },
- 'playlist_mincount': 1,
- 'add_ie': ['Youtube'],
- },
- # Cinchcast embed
- {
- 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
- 'info_dict': {
- 'id': '7141703',
- 'ext': 'mp3',
- 'upload_date': '20141126',
- 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
- }
- },
- # Cinerama player
- {
- 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
- 'info_dict': {
- 'id': '730m_DandD_1901_512k',
- 'ext': 'mp4',
- 'uploader': 'www.abc.net.au',
- 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
- }
- },
- # embedded viddler video
- {
- 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
- 'info_dict': {
- 'id': '4d03aad9',
- 'ext': 'mp4',
- 'uploader': 'deadspin',
- 'title': 'WALL-TO-GORTAT',
- 'timestamp': 1422285291,
- 'upload_date': '20150126',
- },
- 'add_ie': ['Viddler'],
- },
- # Libsyn embed
- {
- 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
- 'info_dict': {
- 'id': '3377616',
- 'ext': 'mp3',
- 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
- 'description': 'md5:601cb790edd05908957dae8aaa866465',
- 'upload_date': '20150220',
- },
- 'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
- },
- # jwplayer YouTube
- {
- 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
- 'info_dict': {
- 'id': 'Mrj4DVp2zeA',
- 'ext': 'mp4',
- 'upload_date': '20150212',
- 'uploader': 'The National Archives UK',
- 'description': 'md5:8078af856dca76edc42910b61273dbbf',
- 'uploader_id': 'NationalArchives08',
- 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
- },
- },
- # jwplayer rtmp
- {
- 'url': 'http://www.suffolk.edu/sjc/live.php',
- 'info_dict': {
- 'id': 'live',
- 'ext': 'flv',
- 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
- 'uploader': 'www.suffolk.edu',
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
- },
- # Complex jwplayer
- {
- 'url': 'http://www.indiedb.com/games/king-machine/videos',
- 'info_dict': {
- 'id': 'videos',
- 'ext': 'mp4',
- 'title': 'king machine trailer 1',
- 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- },
- {
- # JWPlayer config passed as variable
- 'url': 'http://www.txxx.com/videos/3326530/ariele/',
- 'info_dict': {
- 'id': '3326530_hq',
- 'ext': 'mp4',
- 'title': 'ARIELE | Tube Cup',
- 'uploader': 'www.txxx.com',
- 'age_limit': 18,
- },
- 'params': {
- 'skip_download': True,
- }
- },
- {
- # JWPlatform iframe
- 'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
- 'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
- 'info_dict': {
- 'id': 'O0c5JcKT',
- 'ext': 'mp4',
- 'upload_date': '20171122',
- 'timestamp': 1511366290,
- 'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
- },
- 'add_ie': [JWPlatformIE.ie_key()],
- },
- {
- # Video.js embed, multiple formats
- 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
- 'info_dict': {
- 'id': 'yygqldloqIk',
- 'ext': 'mp4',
- 'title': 'SolidWorks. Урок 6 Настройка чертежа',
- 'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
- 'upload_date': '20130314',
- 'uploader': 'PROстое3D',
- 'uploader_id': 'PROstoe3D',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Video.js embed, single format
- 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
- 'info_dict': {
- 'id': 'watch',
- 'ext': 'mp4',
- 'title': 'Step 1 - Good Foundation',
- 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # rtl.nl embed
- {
- 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
- 'playlist_mincount': 5,
- 'info_dict': {
- 'id': 'aanslagen-kopenhagen',
- 'title': 'Aanslagen Kopenhagen',
- }
- },
- # Zapiks embed
- {
- 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
- 'info_dict': {
- 'id': '118046',
- 'ext': 'mp4',
- 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
- }
- },
- # Kaltura embed (different embed code)
- {
- 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
- 'info_dict': {
- 'id': '1_a52wc67y',
- 'ext': 'flv',
- 'upload_date': '20150127',
- 'uploader_id': 'PremierMedia',
- 'timestamp': int,
- 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
- },
- },
- # Kaltura embed with single quotes
- {
- 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
- 'info_dict': {
- 'id': '0_izeg5utt',
- 'ext': 'mp4',
- 'title': '35871',
- 'timestamp': 1355743100,
- 'upload_date': '20121217',
- 'uploader_id': 'cplapp@learn360.com',
- },
- 'add_ie': ['Kaltura'],
- },
- {
- # Kaltura embedded via quoted entry_id
- 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
- 'info_dict': {
- 'id': '0_utuok90b',
- 'ext': 'mp4',
- 'title': '06_matthew_brender_raj_dutt',
- 'timestamp': 1466638791,
- 'upload_date': '20160622',
- },
- 'add_ie': ['Kaltura'],
- 'expected_warnings': [
- 'Could not send HEAD request'
- ],
- 'params': {
- 'skip_download': True,
- }
- },
- {
- # Kaltura embedded, some fileExt broken (#11480)
- 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
- 'info_dict': {
- 'id': '1_sgtvehim',
- 'ext': 'mp4',
- 'title': 'Our "Standard Models" of particle physics and cosmology',
- 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
- 'timestamp': 1321158993,
- 'upload_date': '20111113',
- 'uploader_id': 'kps1',
- },
- 'add_ie': ['Kaltura'],
- },
- {
- # Kaltura iframe embed
- 'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
- 'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
- 'info_dict': {
- 'id': '0_f2cfbpwy',
- 'ext': 'mp4',
- 'title': 'I. M. Pei: A Centennial Celebration',
- 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
- 'upload_date': '20170403',
- 'uploader_id': 'batchUser',
- 'timestamp': 1491232186,
- },
- 'add_ie': ['Kaltura'],
- },
- {
- # Kaltura iframe embed, more sophisticated
- 'url': 'http://www.cns.nyu.edu/~eero/math-tools/Videos/lecture-05sep2017.html',
- 'info_dict': {
- 'id': '1_9gzouybz',
- 'ext': 'mp4',
- 'title': 'lecture-05sep2017',
- 'description': 'md5:40f347d91fd4ba047e511c5321064b49',
- 'upload_date': '20170913',
- 'uploader_id': 'eps2',
- 'timestamp': 1505340777,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['Kaltura'],
- },
- {
- # meta twitter:player
- 'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
- 'info_dict': {
- 'id': '0_01b42zps',
- 'ext': 'mp4',
- 'title': 'Main Twerk (Video)',
- 'upload_date': '20171208',
- 'uploader_id': 'sebastian.salinas@thechive.com',
- 'timestamp': 1512713057,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['Kaltura'],
- },
- # referrer protected EaglePlatform embed
- {
- 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
- 'info_dict': {
- 'id': '582306',
- 'ext': 'mp4',
- 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 3382,
- 'view_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # ClipYou (EaglePlatform) embed (custom URL)
- {
- 'url': 'http://muz-tv.ru/play/7129/',
- # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
- 'info_dict': {
- 'id': '12820',
- 'ext': 'mp4',
- 'title': "'O Sole Mio",
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 216,
- 'view_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This video is unavailable.',
- },
- # Pladform embed
- {
- 'url': 'http://muz-tv.ru/kinozal/view/7400/',
- 'info_dict': {
- 'id': '100183293',
- 'ext': 'mp4',
- 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
- 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 694,
- 'age_limit': 0,
- },
- 'skip': 'HTTP Error 404: Not Found',
- },
- # Playwire embed
- {
- 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
- 'info_dict': {
- 'id': '3519514',
- 'ext': 'mp4',
- 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
- 'thumbnail': r're:^https?://.*\.png$',
- 'duration': 45.115,
- },
- },
- # 5min embed
- {
- 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
- 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
- 'info_dict': {
- 'id': '518726732',
- 'ext': 'mp4',
- 'title': 'Facebook Creates "On This Day" | Crunch Report',
- 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
- 'timestamp': 1427237531,
- 'uploader': 'Crunch Report',
- 'upload_date': '20150324',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- # Crooks and Liars embed
- {
- 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
- 'info_dict': {
- 'id': '8RUoRhRi',
- 'ext': 'mp4',
- 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
- 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
- 'timestamp': 1428207000,
- 'upload_date': '20150405',
- 'uploader': 'Heather',
- },
- },
- # Crooks and Liars external embed
- {
- 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
- 'info_dict': {
- 'id': 'MTE3MjUtMzQ2MzA',
- 'ext': 'mp4',
- 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
- 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
- 'timestamp': 1265032391,
- 'upload_date': '20100201',
- 'uploader': 'Heather',
- },
- },
- # NBC Sports vplayer embed
- {
- 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
- 'info_dict': {
- 'id': 'ln7x1qSThw4k',
- 'ext': 'flv',
- 'title': "PFT Live: New leader in the 'new-look' defense",
- 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
- 'uploader': 'NBCU-SPORTS',
- 'upload_date': '20140107',
- 'timestamp': 1389118457,
- },
- 'skip': 'Invalid Page URL',
- },
- # NBC News embed
- {
- 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
- 'md5': '1aa589c675898ae6d37a17913cf68d66',
- 'info_dict': {
- 'id': 'x_dtl_oa_LettermanliftPR_160608',
- 'ext': 'mp4',
- 'title': 'David Letterman: A Preview',
- 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
- 'upload_date': '20160609',
- 'timestamp': 1465431544,
- 'uploader': 'NBCU-NEWS',
- },
- },
- # UDN embed
- {
- 'url': 'https://video.udn.com/news/300346',
- 'md5': 'fd2060e988c326991037b9aff9df21a6',
- 'info_dict': {
- 'id': '300346',
- 'ext': 'mp4',
- 'title': '中一中男師變性 全校師生力挺',
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'expected_warnings': ['Failed to parse JSON Expecting value'],
- },
- # Brightcove URL in single quotes
- {
- 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
- 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
- 'info_dict': {
- 'id': '4255764656001',
- 'ext': 'mp4',
- 'title': 'SN Presents: Russell Martin, World Citizen',
- 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
- 'uploader': 'Rogers Sportsnet',
- 'uploader_id': '1704050871',
- 'upload_date': '20150525',
- 'timestamp': 1432570283,
- },
- },
- # OnionStudios embed
- {
- 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
- 'info_dict': {
- 'id': '2855',
- 'ext': 'mp4',
- 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
- 'thumbnail': r're:^https?://.*\.jpe?g$',
- 'uploader': 'ClickHole',
- 'uploader_id': 'clickhole',
- }
- },
- # SnagFilms embed
- {
- 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
- 'info_dict': {
- 'id': '74849a00-85a9-11e1-9660-123139220831',
- 'ext': 'mp4',
- 'title': '#whilewewatch',
- }
- },
- # AdobeTVVideo embed
- {
- 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
- 'md5': '43662b577c018ad707a63766462b1e87',
- 'info_dict': {
- 'id': '2456',
- 'ext': 'mp4',
- 'title': 'New experience with Acrobat DC',
- 'description': 'New experience with Acrobat DC',
- 'duration': 248.667,
- },
- },
- # BrightcoveInPageEmbed embed
- {
- 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
- 'info_dict': {
- 'id': '4238694884001',
- 'ext': 'flv',
- 'title': 'Tabletop: Dread, Last Thoughts',
- 'description': 'Tabletop: Dread, Last Thoughts',
- 'duration': 51690,
- },
- },
- # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
- # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
- {
- 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
- 'info_dict': {
- 'id': '4785848093001',
- 'ext': 'mp4',
- 'title': 'The Cardinal Pell Interview',
- 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
- 'uploader': 'GlobeCast Australia - GlobeStream',
- 'uploader_id': '2733773828001',
- 'upload_date': '20160304',
- 'timestamp': 1457083087,
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- },
- },
- {
- # Brightcove embed with whitespace around attribute names
- 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
- 'info_dict': {
- 'id': '3167554373001',
- 'ext': 'mp4',
- 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
- 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
- 'uploader_id': '1079349493',
- 'upload_date': '20140207',
- 'timestamp': 1391810548,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # Another form of arte.tv embed
- {
- 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
- 'md5': '850bfe45417ddf221288c88a0cffe2e2',
- 'info_dict': {
- 'id': '030273-562_PLUS7-F',
- 'ext': 'mp4',
- 'title': 'ARTE Reportage - Nulle part, en France',
- 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
- 'upload_date': '20160409',
- },
- },
- # LiveLeak embed
- {
- 'url': 'http://www.wykop.pl/link/3088787/',
- 'md5': '7619da8c820e835bef21a1efa2a0fc71',
- 'info_dict': {
- 'id': '874_1459135191',
- 'ext': 'mp4',
- 'title': 'Man shows poor quality of new apartment building',
- 'description': 'The wall is like a sand pile.',
- 'uploader': 'Lake8737',
- },
- 'add_ie': [LiveLeakIE.ie_key()],
- },
- # Another LiveLeak embed pattern (#13336)
- {
- 'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
- 'info_dict': {
- 'id': '2eb_1496309988',
- 'ext': 'mp4',
- 'title': 'Thief robs place where everyone was armed',
- 'description': 'md5:694d73ee79e535953cf2488562288eee',
- 'uploader': 'brazilwtf',
- },
- 'add_ie': [LiveLeakIE.ie_key()],
- },
- # Duplicated embedded video URLs
- {
- 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
- 'info_dict': {
- 'id': '149298443_480_16c25b74_2',
- 'ext': 'mp4',
- 'title': 'vs. Blue Orange Spring Game',
- 'uploader': 'www.hudl.com',
- },
- },
- # twitter:player:stream embed
- {
- 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
- 'info_dict': {
- 'id': 'master',
- 'ext': 'mp4',
- 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
- 'uploader': 'www.rtl.be',
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- },
- },
- # twitter:player embed
- {
- 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
- 'md5': 'a3e0df96369831de324f0778e126653c',
- 'info_dict': {
- 'id': '4909620399001',
- 'ext': 'mp4',
- 'title': 'What Do Black Holes Sound Like?',
- 'description': 'what do black holes sound like',
- 'upload_date': '20160524',
- 'uploader_id': '29913724001',
- 'timestamp': 1464107587,
- 'uploader': 'TheAtlantic',
- },
- 'add_ie': ['BrightcoveLegacy'],
- },
- # Facebook <iframe> embed
- {
- 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
- 'md5': 'fbcde74f534176ecb015849146dd3aee',
- 'info_dict': {
- 'id': '599637780109885',
- 'ext': 'mp4',
- 'title': 'Facebook video #599637780109885',
- },
- },
- # Facebook <iframe> embed, plugin video
- {
- 'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
- 'info_dict': {
- 'id': '1754168231264132',
- 'ext': 'mp4',
- 'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
- 'uploader': 'Tariq Ramadan (official)',
- 'timestamp': 1496758379,
- 'upload_date': '20170606',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # Facebook API embed
- {
- 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
- 'md5': 'a47372ee61b39a7b90287094d447d94e',
- 'info_dict': {
- 'id': '10153467542406923',
- 'ext': 'mp4',
- 'title': 'Facebook video #10153467542406923',
- },
- },
- # Wordpress "YouTube Video Importer" plugin
- {
- 'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
- 'md5': 'd16797741b560b485194eddda8121b48',
- 'info_dict': {
- 'id': 'HNTXWDXV9Is',
- 'ext': 'mp4',
- 'title': 'Blue Devils Drumline Stanford lot 2016',
- 'upload_date': '20160627',
- 'uploader_id': 'GENOCIDE8GENERAL10',
- 'uploader': 'cylus cyrus',
- },
- },
- {
- # video stored on custom kaltura server
- 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
- 'md5': '537617d06e64dfed891fa1593c4b30cc',
- 'info_dict': {
- 'id': '0_1iotm5bh',
- 'ext': 'mp4',
- 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
- 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
- 'uploader_id': 'videos.expansion@el-mundo.net',
- 'upload_date': '20150429',
- 'timestamp': 1430303472,
- },
- 'add_ie': ['Kaltura'],
- },
- {
- # Non-standard Vimeo embed
- 'url': 'https://openclassrooms.com/courses/understanding-the-web',
- 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
- 'info_dict': {
- 'id': '148867247',
- 'ext': 'mp4',
- 'title': 'Understanding the web - Teaser',
- 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
- 'upload_date': '20151214',
- 'uploader': 'OpenClassrooms',
- 'uploader_id': 'openclassrooms',
- },
- 'add_ie': ['Vimeo'],
- },
- {
- # generic vimeo embed that requires original URL passed as Referer
- 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
- 'only_matching': True,
- },
- {
- 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
- 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
- 'info_dict': {
- 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
- 'ext': 'mp4',
- 'title': 'Big Buck Bunny',
- 'description': 'Royalty free test video',
- 'timestamp': 1432816365,
- 'upload_date': '20150528',
- 'is_live': False,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [ArkenaIE.ie_key()],
- },
- {
- 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
- 'info_dict': {
- 'id': '1c7141f46c',
- 'ext': 'mp4',
- 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [Vbox7IE.ie_key()],
- },
- {
- # DBTV embeds
- 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
- 'info_dict': {
- 'id': '43254897',
- 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
- },
- 'playlist_mincount': 3,
- },
- {
- # Videa embeds
- 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
- 'info_dict': {
- 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
- 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
- },
- 'playlist_mincount': 2,
- },
- {
- # 20 minuten embed
- 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
- 'info_dict': {
- 'id': '523629',
- 'ext': 'mp4',
- 'title': 'So kommen Sie bei Eis und Schnee sicher an',
- 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [TwentyMinutenIE.ie_key()],
- },
- {
- # VideoPress embed
- 'url': 'https://en.support.wordpress.com/videopress/',
- 'info_dict': {
- 'id': 'OcobLTqC',
- 'ext': 'm4v',
- 'title': 'IMG_5786',
- 'timestamp': 1435711927,
- 'upload_date': '20150701',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [VideoPressIE.ie_key()],
- },
- {
- # Rutube embed
- 'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
- 'info_dict': {
- 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
- 'ext': 'flv',
- 'title': 'Магаззино: Казань 2',
- 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
- 'uploader': 'Магаззино',
- 'upload_date': '20170228',
- 'uploader_id': '996642',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [RutubeIE.ie_key()],
- },
- {
- # ThePlatform embedded with whitespaces in URLs
- 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
- 'only_matching': True,
- },
- {
- # Senate ISVP iframe https
- 'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
- 'md5': 'fb8c70b0b515e5037981a2492099aab8',
- 'info_dict': {
- 'id': 'govtaff020316',
- 'ext': 'mp4',
- 'title': 'Integrated Senate Video Player',
- },
- 'add_ie': [SenateISVPIE.ie_key()],
- },
- {
- # Limelight embeds (1 channel embed + 4 media embeds)
- 'url': 'http://www.sedona.com/FacilitatorTraining2017',
- 'info_dict': {
- 'id': 'FacilitatorTraining2017',
- 'title': 'Facilitator Training 2017',
- },
- 'playlist_mincount': 5,
- },
- {
- # Limelight embed (LimelightPlayerUtil.embed)
- 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
- 'info_dict': {
- 'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
- 'ext': 'mp4',
- 'title': '07448641',
- 'timestamp': 1499890639,
- 'upload_date': '20170712',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['LimelightMedia'],
- },
- {
- 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
- 'info_dict': {
- 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
- 'title': 'Standoff with Walnut Creek murder suspect ends',
- 'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
- },
- 'playlist_mincount': 4,
- },
- {
- # WashingtonPost embed
- 'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
- 'info_dict': {
- 'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
- 'ext': 'mp4',
- 'title': "No one has seen the drama series based on Trump's life \u2014 until now",
- 'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
- 'timestamp': 1455216756,
- 'uploader': 'The Washington Post',
- 'upload_date': '20160211',
- },
- 'add_ie': [WashingtonPostIE.ie_key()],
- },
- {
- # Mediaset embed
- 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
- 'info_dict': {
- 'id': '720642',
- 'ext': 'mp4',
- 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [MediasetIE.ie_key()],
- },
- {
- # JOJ.sk embeds
- 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
- 'info_dict': {
- 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
- 'title': 'Slovenskom sa prehnala vlna silných búrok',
- },
- 'playlist_mincount': 5,
- 'add_ie': [JojIE.ie_key()],
- },
- {
- # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
- 'url': 'https://tvrain.ru/amp/418921/',
- 'md5': 'cc00413936695987e8de148b67d14f1d',
- 'info_dict': {
- 'id': '418921',
- 'ext': 'mp4',
- 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
- },
- },
- {
- # vzaar embed
- 'url': 'http://help.vzaar.com/article/165-embedding-video',
- 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
- 'info_dict': {
- 'id': '8707641',
- 'ext': 'mp4',
- 'title': 'Building A Business Online: Principal Chairs Q & A',
- },
- },
- {
- # multiple HTML5 videos on one page
- 'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
- 'info_dict': {
- 'id': 'keyscenarios',
- 'title': 'Rescue Kit 14 Free Edition - Getting started',
- },
- 'playlist_count': 4,
- },
- {
- # vshare embed
- 'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
- 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
- 'info_dict': {
- 'id': '0f64ce6',
- 'title': 'vl14062007715967',
- 'ext': 'mp4',
- }
- },
- {
- 'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
- 'md5': 'aecd089f55b1cb5a59032cb049d3a356',
- 'info_dict': {
- 'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
- 'ext': 'mp4',
- 'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
- 'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
- 'timestamp': 1474354800,
- 'upload_date': '20160920',
- }
- },
- {
- 'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
- 'info_dict': {
- 'id': '1731611',
- 'ext': 'mp4',
- 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
- 'description': 'md5:eb5f23826a027ba95277d105f248b825',
- 'timestamp': 1516100691,
- 'upload_date': '20180116',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [SpringboardPlatformIE.ie_key()],
- },
- {
- 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
- 'info_dict': {
- 'id': 'uPDB5I9wfp8',
- 'ext': 'webm',
- 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
- 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
- 'upload_date': '20160219',
- 'uploader': 'Pocoyo - Português (BR)',
- 'uploader_id': 'PocoyoBrazil',
- },
- 'add_ie': [YoutubeIE.ie_key()],
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
- 'info_dict': {
- 'id': 'vMDE4NzI1Mjgt690b',
- 'ext': 'mp4',
- 'title': 'Котята',
- },
- 'add_ie': [YapFilesIE.ie_key()],
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # CloudflareStream embed
- 'url': 'https://www.cloudflare.com/products/cloudflare-stream/',
- 'info_dict': {
- 'id': '31c9291ab41fac05471db4e73aa11717',
- 'ext': 'mp4',
- 'title': '31c9291ab41fac05471db4e73aa11717',
- },
- 'add_ie': [CloudflareStreamIE.ie_key()],
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # PeerTube embed
- 'url': 'https://joinpeertube.org/fr/home/',
- 'info_dict': {
- 'id': 'home',
- 'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
- },
- 'playlist_count': 2,
- },
- {
- # Indavideo embed
- 'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
- 'info_dict': {
- 'id': '1693903',
- 'ext': 'mp4',
- 'title': 'Így kell otthon hamburgert sütni',
- 'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
- 'timestamp': 1426330212,
- 'upload_date': '20150314',
- 'uploader': 'StreetKitchen',
- 'uploader_id': '546363',
- },
- 'add_ie': [IndavideoEmbedIE.ie_key()],
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # APA embed via JWPlatform embed
- 'url': 'http://www.vol.at/blue-man-group/5593454',
- 'info_dict': {
- 'id': 'jjv85FdZ',
- 'ext': 'mp4',
- 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
- 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 254,
- 'timestamp': 1519211149,
- 'upload_date': '20180221',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://share-videos.se/auto/video/83645793?uid=13',
- 'md5': 'b68d276de422ab07ee1d49388103f457',
- 'info_dict': {
- 'id': '83645793',
- 'title': 'Lock up and get excited',
- 'ext': 'mp4'
- },
- 'skip': 'TODO: fix nested playlists processing in tests',
- },
- # {
- # # TODO: find another test
- # # http://schema.org/VideoObject
- # 'url': 'https://flipagram.com/f/nyvTSJMKId',
- # 'md5': '888dcf08b7ea671381f00fab74692755',
- # 'info_dict': {
- # 'id': 'nyvTSJMKId',
- # 'ext': 'mp4',
- # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
- # 'description': '#love for cats.',
- # 'timestamp': 1461244995,
- # 'upload_date': '20160421',
- # },
- # 'params': {
- # 'force_generic_extractor': True,
- # },
- # }
- ]
-
- def report_following_redirect(self, new_url):
- """Report information extraction."""
- self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
-
- def _extract_rss(self, url, video_id, doc):
- playlist_title = doc.find('./channel/title').text
- playlist_desc_el = doc.find('./channel/description')
- playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
-
- entries = []
- for it in doc.findall('./channel/item'):
- next_url = None
- enclosure_nodes = it.findall('./enclosure')
- for e in enclosure_nodes:
- next_url = e.attrib.get('url')
- if next_url:
- break
-
- if not next_url:
- next_url = xpath_text(it, 'link', fatal=False)
-
- if not next_url:
- continue
-
- entries.append({
- '_type': 'url_transparent',
- 'url': next_url,
- 'title': it.find('title').text,
- })
-
- return {
- '_type': 'playlist',
- 'id': url,
- 'title': playlist_title,
- 'description': playlist_desc,
- 'entries': entries,
- }
-
- def _extract_camtasia(self, url, video_id, webpage):
- """ Returns None if no camtasia video can be found. """
-
- camtasia_cfg = self._search_regex(
- r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
- webpage, 'camtasia configuration file', default=None)
- if camtasia_cfg is None:
- return None
-
- title = self._html_search_meta('DC.title', webpage, fatal=True)
-
- camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
- camtasia_cfg = self._download_xml(
- camtasia_url, video_id,
- note='Downloading camtasia configuration',
- errnote='Failed to download camtasia configuration')
- fileset_node = camtasia_cfg.find('./playlist/array/fileset')
-
- entries = []
- for n in fileset_node.getchildren():
- url_n = n.find('./uri')
- if url_n is None:
- continue
-
- entries.append({
- 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
- 'title': '%s - %s' % (title, n.tag),
- 'url': compat_urlparse.urljoin(url, url_n.text),
- 'duration': float_or_none(n.find('./duration').text),
- })
-
- return {
- '_type': 'playlist',
- 'entries': entries,
- 'title': title,
- }
-
- def _real_extract(self, url):
- if url.startswith('//'):
- return {
- '_type': 'url',
- 'url': self.http_scheme() + url,
- }
-
- parsed_url = compat_urlparse.urlparse(url)
- if not parsed_url.scheme:
- default_search = self._downloader.params.get('default_search')
- if default_search is None:
- default_search = 'fixup_error'
-
- if default_search in ('auto', 'auto_warning', 'fixup_error'):
- if '/' in url:
- self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
- return self.url_result('http://' + url)
- elif default_search != 'fixup_error':
- if default_search == 'auto_warning':
- if re.match(r'^(?:url|URL)$', url):
- raise ExtractorError(
- 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
- expected=True)
- else:
- self._downloader.report_warning(
- 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
- return self.url_result('ytsearch:' + url)
-
- if default_search in ('error', 'fixup_error'):
- raise ExtractorError(
- '%r is not a valid URL. '
- 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
- % (url, url), expected=True)
- else:
- if ':' not in default_search:
- default_search += ':'
- return self.url_result(default_search + url)
-
- url, smuggled_data = unsmuggle_url(url)
- force_videoid = None
- is_intentional = smuggled_data and smuggled_data.get('to_generic')
- if smuggled_data and 'force_videoid' in smuggled_data:
- force_videoid = smuggled_data['force_videoid']
- video_id = force_videoid
- else:
- video_id = self._generic_id(url)
-
- self.to_screen('%s: Requesting header' % video_id)
-
- head_req = HEADRequest(url)
- head_response = self._request_webpage(
- head_req, video_id,
- note=False, errnote='Could not send HEAD request to %s' % url,
- fatal=False)
-
- if head_response is not False:
- # Check for redirect
- new_url = compat_str(head_response.geturl())
- if url != new_url:
- self.report_following_redirect(new_url)
- if force_videoid:
- new_url = smuggle_url(
- new_url, {'force_videoid': force_videoid})
- return self.url_result(new_url)
-
- full_response = None
- if head_response is False:
- request = sanitized_Request(url)
- request.add_header('Accept-Encoding', '*')
- full_response = self._request_webpage(request, video_id)
- head_response = full_response
-
- info_dict = {
- 'id': video_id,
- 'title': self._generic_title(url),
- 'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
- }
-
- # Check for direct link to a video
- content_type = head_response.headers.get('Content-Type', '').lower()
- m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
- if m:
- format_id = compat_str(m.group('format_id'))
- if format_id.endswith('mpegurl'):
- formats = self._extract_m3u8_formats(url, video_id, 'mp4')
- elif format_id == 'f4m':
- formats = self._extract_f4m_formats(url, video_id)
- else:
- formats = [{
- 'format_id': format_id,
- 'url': url,
- 'vcodec': 'none' if m.group('type') == 'audio' else None
- }]
- info_dict['direct'] = True
- self._sort_formats(formats)
- info_dict['formats'] = formats
- return info_dict
-
- if not self._downloader.params.get('test', False) and not is_intentional:
- force = self._downloader.params.get('force_generic_extractor', False)
- self._downloader.report_warning(
- '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
-
- if not full_response:
- request = sanitized_Request(url)
- # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
- # making it impossible to download only chunk of the file (yet we need only 512kB to
- # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
- # that will always result in downloading the whole file that is not desirable.
- # Therefore for extraction pass we have to override Accept-Encoding to any in order
- # to accept raw bytes and being able to download only a chunk.
- # It may probably better to solve this by checking Content-Type for application/octet-stream
- # after HEAD request finishes, but not sure if we can rely on this.
- request.add_header('Accept-Encoding', '*')
- full_response = self._request_webpage(request, video_id)
-
- first_bytes = full_response.read(512)
-
- # Is it an M3U playlist?
- if first_bytes.startswith(b'#EXTM3U'):
- info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
- self._sort_formats(info_dict['formats'])
- return info_dict
-
- # Maybe it's a direct link to a video?
- # Be careful not to download the whole thing!
- if not is_html(first_bytes):
- self._downloader.report_warning(
- 'URL could be a direct video link, returning it as such.')
- info_dict.update({
- 'direct': True,
- 'url': url,
- })
- return info_dict
-
- webpage = self._webpage_read_content(
- full_response, url, video_id, prefix=first_bytes)
-
- self.report_extraction(video_id)
-
- # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
- try:
- doc = compat_etree_fromstring(webpage.encode('utf-8'))
- if doc.tag == 'rss':
- return self._extract_rss(url, video_id, doc)
- elif doc.tag == 'SmoothStreamingMedia':
- info_dict['formats'] = self._parse_ism_formats(doc, url)
- self._sort_formats(info_dict['formats'])
- return info_dict
- elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
- smil = self._parse_smil(doc, url, video_id)
- self._sort_formats(smil['formats'])
- return smil
- elif doc.tag == '{http://xspf.org/ns/0/}playlist':
- return self.playlist_result(
- self._parse_xspf(
- doc, video_id, xspf_url=url,
- xspf_base_url=compat_str(full_response.geturl())),
- video_id)
- elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
- info_dict['formats'] = self._parse_mpd_formats(
- doc,
- mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
- mpd_url=url)
- self._sort_formats(info_dict['formats'])
- return info_dict
- elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
- info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
- self._sort_formats(info_dict['formats'])
- return info_dict
- except compat_xml_parse_error:
- pass
-
- # Is it a Camtasia project?
- camtasia_res = self._extract_camtasia(url, video_id, webpage)
- if camtasia_res is not None:
- return camtasia_res
-
- # Sometimes embedded video player is hidden behind percent encoding
- # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
- # Unescaping the whole page allows to handle those cases in a generic way
- webpage = compat_urllib_parse_unquote(webpage)
-
- # it's tempting to parse this further, but you would
- # have to take into account all the variations like
- # Video Title - Site Name
- # Site Name | Video Title
- # Video Title - Tagline | Site Name
- # and so on and so forth; it's just not practical
- video_title = self._og_search_title(
- webpage, default=None) or self._html_search_regex(
- r'(?s)<title>(.*?)</title>', webpage, 'video title',
- default='video')
-
- # Try to detect age limit automatically
- age_limit = self._rta_search(webpage)
- # And then there are the jokers who advertise that they use RTA,
- # but actually don't.
- AGE_LIMIT_MARKERS = [
- r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
- ]
- if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
- age_limit = 18
-
- # video uploader is domain name
- video_uploader = self._search_regex(
- r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
-
- video_description = self._og_search_description(webpage, default=None)
- video_thumbnail = self._og_search_thumbnail(webpage, default=None)
-
- info_dict.update({
- 'title': video_title,
- 'description': video_description,
- 'thumbnail': video_thumbnail,
- 'age_limit': age_limit,
- })
-
- # Look for Brightcove Legacy Studio embeds
- bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
- if bc_urls:
- entries = [{
- '_type': 'url',
- 'url': smuggle_url(bc_url, {'Referer': url}),
- 'ie_key': 'BrightcoveLegacy'
- } for bc_url in bc_urls]
-
- return {
- '_type': 'playlist',
- 'title': video_title,
- 'id': video_id,
- 'entries': entries,
- }
-
- # Look for Brightcove New Studio embeds
- bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
- if bc_urls:
- return self.playlist_from_matches(
- bc_urls, video_id, video_title,
- getter=lambda x: smuggle_url(x, {'referrer': url}),
- ie='BrightcoveNew')
-
- # Look for Nexx embeds
- nexx_urls = NexxIE._extract_urls(webpage)
- if nexx_urls:
- return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
-
- # Look for Nexx iFrame embeds
- nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
- if nexx_embed_urls:
- return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
-
- # Look for ThePlatform embeds
- tp_urls = ThePlatformIE._extract_urls(webpage)
- if tp_urls:
- return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
-
- # Look for Vessel embeds
- vessel_urls = VesselIE._extract_urls(webpage)
- if vessel_urls:
- return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
-
- # Look for embedded rtl.nl player
- matches = re.findall(
- r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
- webpage)
- if matches:
- return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
-
- vimeo_urls = VimeoIE._extract_urls(url, webpage)
- if vimeo_urls:
- return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
-
- vid_me_embed_url = self._search_regex(
- r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
- webpage, 'vid.me embed', default=None)
- if vid_me_embed_url is not None:
- return self.url_result(vid_me_embed_url, 'Vidme')
-
- # Look for YouTube embeds
- youtube_urls = YoutubeIE._extract_urls(webpage)
- if youtube_urls:
- return self.playlist_from_matches(
- youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
-
- matches = DailymotionIE._extract_urls(webpage)
- if matches:
- return self.playlist_from_matches(matches, video_id, video_title)
-
- # Look for embedded Dailymotion playlist player (#3822)
- m = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
- if m:
- playlists = re.findall(
- r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
- if playlists:
- return self.playlist_from_matches(
- playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
-
- # Look for DailyMail embeds
- dailymail_urls = DailyMailIE._extract_urls(webpage)
- if dailymail_urls:
- return self.playlist_from_matches(
- dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
-
- # Look for embedded Wistia player
- wistia_url = WistiaIE._extract_url(webpage)
- if wistia_url:
- return {
- '_type': 'url_transparent',
- 'url': self._proto_relative_url(wistia_url),
- 'ie_key': WistiaIE.ie_key(),
- 'uploader': video_uploader,
- }
-
- # Look for SVT player
- svt_url = SVTIE._extract_url(webpage)
- if svt_url:
- return self.url_result(svt_url, 'SVT')
-
- # Look for Bandcamp pages with custom domain
- mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
- if mobj is not None:
- burl = unescapeHTML(mobj.group(1))
- # Don't set the extractor because it can be a track url or an album
- return self.url_result(burl)
-
- # Look for embedded Vevo player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for embedded Viddler player
- mobj = re.search(
- r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for NYTimes player
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for Libsyn player
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for Ooyala videos
- mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
- re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
- re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
- re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
- re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
- if mobj is not None:
- embed_token = self._search_regex(
- r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
- webpage, 'ooyala embed token', default=None)
- return OoyalaIE._build_url_result(smuggle_url(
- mobj.group('ec'), {
- 'domain': url,
- 'embed_token': embed_token,
- }))
-
- # Look for multiple Ooyala embeds on SBN network websites
- mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
- if mobj is not None:
- embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
- if embeds:
- return self.playlist_from_matches(
- embeds, video_id, video_title,
- getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
-
- # Look for Aparat videos
- mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group(1), 'Aparat')
-
- # Look for MPORA videos
- mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group(1), 'Mpora')
-
- # Look for embedded NovaMov-based player
- mobj = re.search(
- r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
- (?P<url>http://(?:(?:embed|www)\.)?
- (?:novamov\.com|
- nowvideo\.(?:ch|sx|eu|at|ag|co)|
- videoweed\.(?:es|com)|
- movshare\.(?:net|sx|ag)|
- divxstage\.(?:eu|net|ch|co|at|ag))
- /embed\.php.+?)\1''', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for embedded Facebook player
- facebook_urls = FacebookIE._extract_urls(webpage)
- if facebook_urls:
- return self.playlist_from_matches(facebook_urls, video_id, video_title)
-
- # Look for embedded VK player
- mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'VK')
-
- # Look for embedded Odnoklassniki player
- mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Odnoklassniki')
-
- # Look for embedded ivi player
- mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Ivi')
-
- # Look for embedded Huffington Post player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'HuffPost')
-
- # Look for embed.ly
- mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
- mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
- if mobj is not None:
- return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
-
- # Look for funnyordie embed
- matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
- if matches:
- return self.playlist_from_matches(
- matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
-
- # Look for BBC iPlayer embed
- matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
- if matches:
- return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
-
- # Look for embedded RUTV player
- rutv_url = RUTVIE._extract_url(webpage)
- if rutv_url:
- return self.url_result(rutv_url, 'RUTV')
-
- # Look for embedded TVC player
- tvc_url = TVCIE._extract_url(webpage)
- if tvc_url:
- return self.url_result(tvc_url, 'TVC')
-
- # Look for embedded SportBox player
- sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
- if sportbox_urls:
- return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
-
- # Look for embedded XHamster player
- xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
- if xhamster_urls:
- return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
-
- # Look for embedded TNAFlixNetwork player
- tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
- if tnaflix_urls:
- return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
-
- # Look for embedded PornHub player
- pornhub_urls = PornHubIE._extract_urls(webpage)
- if pornhub_urls:
- return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
-
- # Look for embedded DrTuber player
- drtuber_urls = DrTuberIE._extract_urls(webpage)
- if drtuber_urls:
- return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
-
- # Look for embedded RedTube player
- redtube_urls = RedTubeIE._extract_urls(webpage)
- if redtube_urls:
- return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
-
- # Look for embedded Tube8 player
- tube8_urls = Tube8IE._extract_urls(webpage)
- if tube8_urls:
- return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
-
- # Look for embedded Tvigle player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Tvigle')
-
- # Look for embedded TED player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'TED')
-
- # Look for embedded Ustream videos
- ustream_url = UstreamIE._extract_url(webpage)
- if ustream_url:
- return self.url_result(ustream_url, UstreamIE.ie_key())
-
- # Look for embedded arte.tv player
- mobj = re.search(
- r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'ArteTVEmbed')
-
- # Look for embedded francetv player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for embedded smotri.com player
- smotri_url = SmotriIE._extract_url(webpage)
- if smotri_url:
- return self.url_result(smotri_url, 'Smotri')
-
- # Look for embedded Myvi.ru player
- myvi_url = MyviIE._extract_url(webpage)
- if myvi_url:
- return self.url_result(myvi_url)
-
- # Look for embedded soundcloud player
- soundcloud_urls = SoundcloudIE._extract_urls(webpage)
- if soundcloud_urls:
- return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
-
- # Look for tunein player
- tunein_urls = TuneInBaseIE._extract_urls(webpage)
- if tunein_urls:
- return self.playlist_from_matches(tunein_urls, video_id, video_title)
-
- # Look for embedded mtvservices player
- mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
- if mtvservices_url:
- return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
-
- # Look for embedded yahoo player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Yahoo')
-
- # Look for embedded sbs.com.au player
- mobj = re.search(
- r'''(?x)
- (?:
- <meta\s+property="og:video"\s+content=|
- <iframe[^>]+?src=
- )
- (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'SBS')
-
- # Look for embedded Cinchcast player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Cinchcast')
-
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
- webpage)
- if not mobj:
- mobj = re.search(
- r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'MLB')
-
- mobj = re.search(
- r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
- webpage)
- if mobj is not None:
- return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
-
- mobj = re.search(
- r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Livestream')
-
- # Look for Zapiks embed
- mobj = re.search(
- r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Zapiks')
-
- # Look for Kaltura embeds
- kaltura_url = KalturaIE._extract_url(webpage)
- if kaltura_url:
- return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
-
- # Look for EaglePlatform embeds
- eagleplatform_url = EaglePlatformIE._extract_url(webpage)
- if eagleplatform_url:
- return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
-
- # Look for ClipYou (uses EaglePlatform) embeds
- mobj = re.search(
- r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
- if mobj is not None:
- return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
-
- # Look for Pladform embeds
- pladform_url = PladformIE._extract_url(webpage)
- if pladform_url:
- return self.url_result(pladform_url)
-
- # Look for Videomore embeds
- videomore_url = VideomoreIE._extract_url(webpage)
- if videomore_url:
- return self.url_result(videomore_url)
-
- # Look for Webcaster embeds
- webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
- if webcaster_url:
- return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
-
- # Look for Playwire embeds
- mobj = re.search(
- r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for 5min embeds
- mobj = re.search(
- r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
- if mobj is not None:
- return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
-
- # Look for Crooks and Liars embeds
- mobj = re.search(
- r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for NBC Sports VPlayer embeds
- nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
- if nbc_sports_url:
- return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
-
- # Look for NBC News embeds
- nbc_news_embed_url = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
- if nbc_news_embed_url:
- return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
-
- # Look for Google Drive embeds
- google_drive_url = GoogleDriveIE._extract_url(webpage)
- if google_drive_url:
- return self.url_result(google_drive_url, 'GoogleDrive')
-
- # Look for UDN embeds
- mobj = re.search(
- r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
- if mobj is not None:
- return self.url_result(
- compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
-
- # Look for Senate ISVP iframe
- senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
- if senate_isvp_url:
- return self.url_result(senate_isvp_url, 'SenateISVP')
-
- # Look for OnionStudios embeds
- onionstudios_url = OnionStudiosIE._extract_url(webpage)
- if onionstudios_url:
- return self.url_result(onionstudios_url)
-
- # Look for ViewLift embeds
- viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
- if viewlift_url:
- return self.url_result(viewlift_url)
-
- # Look for JWPlatform embeds
- jwplatform_urls = JWPlatformIE._extract_urls(webpage)
- if jwplatform_urls:
- return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
-
- # Look for Digiteka embeds
- digiteka_url = DigitekaIE._extract_url(webpage)
- if digiteka_url:
- return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
-
- # Look for Arkena embeds
- arkena_url = ArkenaIE._extract_url(webpage)
- if arkena_url:
- return self.url_result(arkena_url, ArkenaIE.ie_key())
-
- # Look for Piksel embeds
- piksel_url = PikselIE._extract_url(webpage)
- if piksel_url:
- return self.url_result(piksel_url, PikselIE.ie_key())
-
- # Look for Limelight embeds
- limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
- if limelight_urls:
- return self.playlist_result(
- limelight_urls, video_id, video_title, video_description)
-
- # Look for Anvato embeds
- anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
- if anvato_urls:
- return self.playlist_result(
- anvato_urls, video_id, video_title, video_description)
-
- # Look for AdobeTVVideo embeds
- mobj = re.search(
- r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
- webpage)
- if mobj is not None:
- return self.url_result(
- self._proto_relative_url(unescapeHTML(mobj.group(1))),
- 'AdobeTVVideo')
-
- # Look for Vine embeds
- mobj = re.search(
- r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
- webpage)
- if mobj is not None:
- return self.url_result(
- self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
-
- # Look for VODPlatform embeds
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(
- self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
-
- # Look for Mangomolo embeds
- mobj = re.search(
- r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
- (?:
- video\?.*?\bid=(?P<video_id>\d+)|
- index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
- ).+?)\1''', webpage)
- if mobj is not None:
- info = {
- '_type': 'url_transparent',
- 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
- 'title': video_title,
- 'description': video_description,
- 'thumbnail': video_thumbnail,
- 'uploader': video_uploader,
- }
- video_id = mobj.group('video_id')
- if video_id:
- info.update({
- 'ie_key': 'MangomoloVideo',
- 'id': video_id,
- })
- else:
- info.update({
- 'ie_key': 'MangomoloLive',
- 'id': mobj.group('channel_id'),
- })
- return info
-
- # Look for Instagram embeds
- instagram_embed_url = InstagramIE._extract_embed_url(webpage)
- if instagram_embed_url is not None:
- return self.url_result(
- self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
-
- # Look for LiveLeak embeds
- liveleak_urls = LiveLeakIE._extract_urls(webpage)
- if liveleak_urls:
- return self.playlist_from_matches(liveleak_urls, video_id, video_title)
-
- # Look for 3Q SDN embeds
- threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
- if threeqsdn_url:
- return {
- '_type': 'url_transparent',
- 'ie_key': ThreeQSDNIE.ie_key(),
- 'url': self._proto_relative_url(threeqsdn_url),
- 'title': video_title,
- 'description': video_description,
- 'thumbnail': video_thumbnail,
- 'uploader': video_uploader,
- }
-
- # Look for VBOX7 embeds
- vbox7_url = Vbox7IE._extract_url(webpage)
- if vbox7_url:
- return self.url_result(vbox7_url, Vbox7IE.ie_key())
-
- # Look for DBTV embeds
- dbtv_urls = DBTVIE._extract_urls(webpage)
- if dbtv_urls:
- return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
-
- # Look for Videa embeds
- videa_urls = VideaIE._extract_urls(webpage)
- if videa_urls:
- return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
-
- # Look for 20 minuten embeds
- twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
- if twentymin_urls:
- return self.playlist_from_matches(
- twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
-
- # Look for Openload embeds
- openload_urls = OpenloadIE._extract_urls(webpage)
- if openload_urls:
- return self.playlist_from_matches(
- openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
-
- # Look for VideoPress embeds
- videopress_urls = VideoPressIE._extract_urls(webpage)
- if videopress_urls:
- return self.playlist_from_matches(
- videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
-
- # Look for Rutube embeds
- rutube_urls = RutubeIE._extract_urls(webpage)
- if rutube_urls:
- return self.playlist_from_matches(
- rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
-
- # Look for WashingtonPost embeds
- wapo_urls = WashingtonPostIE._extract_urls(webpage)
- if wapo_urls:
- return self.playlist_from_matches(
- wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
-
- # Look for Mediaset embeds
- mediaset_urls = MediasetIE._extract_urls(webpage)
- if mediaset_urls:
- return self.playlist_from_matches(
- mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
-
- # Look for JOJ.sk embeds
- joj_urls = JojIE._extract_urls(webpage)
- if joj_urls:
- return self.playlist_from_matches(
- joj_urls, video_id, video_title, ie=JojIE.ie_key())
-
- # Look for megaphone.fm embeds
- mpfn_urls = MegaphoneIE._extract_urls(webpage)
- if mpfn_urls:
- return self.playlist_from_matches(
- mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
-
- # Look for vzaar embeds
- vzaar_urls = VzaarIE._extract_urls(webpage)
- if vzaar_urls:
- return self.playlist_from_matches(
- vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
-
- channel9_urls = Channel9IE._extract_urls(webpage)
- if channel9_urls:
- return self.playlist_from_matches(
- channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
-
- vshare_urls = VShareIE._extract_urls(webpage)
- if vshare_urls:
- return self.playlist_from_matches(
- vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
-
- # Look for Mediasite embeds
- mediasite_urls = MediasiteIE._extract_urls(webpage)
- if mediasite_urls:
- entries = [
- self.url_result(smuggle_url(
- compat_urlparse.urljoin(url, mediasite_url),
- {'UrlReferrer': url}), ie=MediasiteIE.ie_key())
- for mediasite_url in mediasite_urls]
- return self.playlist_result(entries, video_id, video_title)
-
- springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
- if springboardplatform_urls:
- return self.playlist_from_matches(
- springboardplatform_urls, video_id, video_title,
- ie=SpringboardPlatformIE.ie_key())
-
- yapfiles_urls = YapFilesIE._extract_urls(webpage)
- if yapfiles_urls:
- return self.playlist_from_matches(
- yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
-
- vice_urls = ViceIE._extract_urls(webpage)
- if vice_urls:
- return self.playlist_from_matches(
- vice_urls, video_id, video_title, ie=ViceIE.ie_key())
-
- xfileshare_urls = XFileShareIE._extract_urls(webpage)
- if xfileshare_urls:
- return self.playlist_from_matches(
- xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
-
- cloudflarestream_urls = CloudflareStreamIE._extract_urls(webpage)
- if cloudflarestream_urls:
- return self.playlist_from_matches(
- cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
-
- peertube_urls = PeerTubeIE._extract_urls(webpage, url)
- if peertube_urls:
- return self.playlist_from_matches(
- peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
-
- indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
- if indavideo_urls:
- return self.playlist_from_matches(
- indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
-
- apa_urls = APAIE._extract_urls(webpage)
- if apa_urls:
- return self.playlist_from_matches(
- apa_urls, video_id, video_title, ie=APAIE.ie_key())
-
- foxnews_urls = FoxNewsIE._extract_urls(webpage)
- if foxnews_urls:
- return self.playlist_from_matches(
- foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
-
- sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
- r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
- webpage)]
- if sharevideos_urls:
- return self.playlist_from_matches(
- sharevideos_urls, video_id, video_title)
-
- # Look for HTML5 media
- entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
- if entries:
- if len(entries) == 1:
- entries[0].update({
- 'id': video_id,
- 'title': video_title,
- })
- else:
- for num, entry in enumerate(entries, start=1):
- entry.update({
- 'id': '%s-%s' % (video_id, num),
- 'title': '%s (%d)' % (video_title, num),
- })
- for entry in entries:
- self._sort_formats(entry['formats'])
- return self.playlist_result(entries, video_id, video_title)
-
- jwplayer_data = self._find_jwplayer_data(
- webpage, video_id, transform_source=js_to_json)
- if jwplayer_data:
- info = self._parse_jwplayer_data(
- jwplayer_data, video_id, require_title=False, base_url=url)
- return merge_dicts(info, info_dict)
-
- # Video.js embed
- mobj = re.search(
- r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
- webpage)
- if mobj is not None:
- sources = self._parse_json(
- mobj.group(1), video_id, transform_source=js_to_json,
- fatal=False) or []
- if not isinstance(sources, list):
- sources = [sources]
- formats = []
- for source in sources:
- src = source.get('src')
- if not src or not isinstance(src, compat_str):
- continue
- src = compat_urlparse.urljoin(url, src)
- src_type = source.get('type')
- if isinstance(src_type, compat_str):
- src_type = src_type.lower()
- ext = determine_ext(src).lower()
- if src_type == 'video/youtube':
- return self.url_result(src, YoutubeIE.ie_key())
- if src_type == 'application/dash+xml' or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- src, video_id, mpd_id='dash', fatal=False))
- elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- src, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- else:
- formats.append({
- 'url': src,
- 'ext': (mimetype2ext(src_type) or
- ext if ext in KNOWN_EXTENSIONS else 'mp4'),
- })
- if formats:
- self._sort_formats(formats)
- info_dict['formats'] = formats
- return info_dict
-
- # Looking for http://schema.org/VideoObject
- json_ld = self._search_json_ld(
- webpage, video_id, default={}, expected_type='VideoObject')
- if json_ld.get('url'):
- return merge_dicts(json_ld, info_dict)
-
- def check_video(vurl):
- if YoutubeIE.suitable(vurl):
- return True
- if RtmpIE.suitable(vurl):
- return True
- vpath = compat_urlparse.urlparse(vurl).path
- vext = determine_ext(vpath)
- return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
-
- def filter_video(urls):
- return list(filter(check_video, urls))
-
- # Start with something easy: JW Player in SWFObject
- found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
- if not found:
- # Look for gorilla-vid style embedding
- found = filter_video(re.findall(r'''(?sx)
- (?:
- jw_plugins|
- JWPlayerOptions|
- jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
- )
- .*?
- ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
- if not found:
- # Broaden the search a little bit
- found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
- if not found:
- # Broaden the findall a little bit: JWPlayer JS loader
- found = filter_video(re.findall(
- r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
- if not found:
- # Flow player
- found = filter_video(re.findall(r'''(?xs)
- flowplayer\("[^"]+",\s*
- \{[^}]+?\}\s*,
- \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
- ["']?url["']?\s*:\s*["']([^"']+)["']
- ''', webpage))
- if not found:
- # Cinerama player
- found = re.findall(
- r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
- if not found:
- # Try to find twitter cards info
- # twitter:player:stream should be checked before twitter:player since
- # it is expected to contain a raw stream (see
- # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
- found = filter_video(re.findall(
- r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
- if not found:
- # We look for Open Graph info:
- # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
- m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
- # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
- if m_video_type is not None:
- found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
- if not found:
- REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
- found = re.search(
- r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
- r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
- webpage)
- if not found:
- # Look also in Refresh HTTP header
- refresh_header = head_response.headers.get('Refresh')
- if refresh_header:
- # In python 2 response HTTP headers are bytestrings
- if sys.version_info < (3, 0) and isinstance(refresh_header, str):
- refresh_header = refresh_header.decode('iso-8859-1')
- found = re.search(REDIRECT_REGEX, refresh_header)
- if found:
- new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
- if new_url != url:
- self.report_following_redirect(new_url)
- return {
- '_type': 'url',
- 'url': new_url,
- }
- else:
- found = None
-
- if not found:
- # twitter:player is a https URL to iframe player that may or may not
- # be supported by youtube-dl thus this is checked the very last (see
- # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
- embed_url = self._html_search_meta('twitter:player', webpage, default=None)
- if embed_url and embed_url != url:
- return self.url_result(embed_url)
-
- if not found:
- raise UnsupportedError(url)
-
- entries = []
- for video_url in orderedSet(found):
- video_url = unescapeHTML(video_url)
- video_url = video_url.replace('\\/', '/')
- video_url = compat_urlparse.urljoin(url, video_url)
- video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
-
- # Sometimes, jwplayer extraction will result in a YouTube URL
- if YoutubeIE.suitable(video_url):
- entries.append(self.url_result(video_url, 'Youtube'))
- continue
-
- # here's a fun little line of code for you:
- video_id = os.path.splitext(video_id)[0]
-
- entry_info_dict = {
- 'id': video_id,
- 'uploader': video_uploader,
- 'title': video_title,
- 'age_limit': age_limit,
- }
-
- if RtmpIE.suitable(video_url):
- entry_info_dict.update({
- '_type': 'url_transparent',
- 'ie_key': RtmpIE.ie_key(),
- 'url': video_url,
- })
- entries.append(entry_info_dict)
- continue
-
- ext = determine_ext(video_url)
- if ext == 'smil':
- entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
- elif ext == 'xspf':
- return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
- elif ext == 'm3u8':
- entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
- elif ext == 'mpd':
- entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
- elif ext == 'f4m':
- entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
- elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
- # Just matching .ism/manifest is not enough to be reliably sure
- # whether it's actually an ISM manifest or some other streaming
- # manifest since there are various streaming URL formats
- # possible (see [1]) as well as some other shenanigans like
- # .smil/manifest URLs that actually serve an ISM (see [2]) and
- # so on.
- # Thus the most reasonable way to solve this is to delegate
- # to generic extractor in order to look into the contents of
- # the manifest itself.
- # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
- # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
- entry_info_dict = self.url_result(
- smuggle_url(video_url, {'to_generic': True}),
- GenericIE.ie_key())
- else:
- entry_info_dict['url'] = video_url
-
- if entry_info_dict.get('formats'):
- self._sort_formats(entry_info_dict['formats'])
-
- entries.append(entry_info_dict)
-
- if len(entries) == 1:
- return entries[0]
- else:
- for num, e in enumerate(entries, start=1):
- # 'url' results don't have a title
- if e.get('title') is not None:
- e['title'] = '%s (%d)' % (e['title'], num)
- return {
- '_type': 'playlist',
- 'entries': entries,
- }
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
deleted file mode 100644
index d264fe2..0000000
--- a/youtube_dl/extractor/openload.py
+++ /dev/null
@@ -1,379 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-import os
-import re
-import subprocess
-import tempfile
-
-from .common import InfoExtractor
-from ..compat import (
- compat_urlparse,
- compat_kwargs,
-)
-from ..utils import (
- check_executable,
- determine_ext,
- encodeArgument,
- ExtractorError,
- get_element_by_id,
- get_exe_version,
- is_outdated_version,
- std_headers,
-)
-
-
-def cookie_to_dict(cookie):
- cookie_dict = {
- 'name': cookie.name,
- 'value': cookie.value,
- }
- if cookie.port_specified:
- cookie_dict['port'] = cookie.port
- if cookie.domain_specified:
- cookie_dict['domain'] = cookie.domain
- if cookie.path_specified:
- cookie_dict['path'] = cookie.path
- if cookie.expires is not None:
- cookie_dict['expires'] = cookie.expires
- if cookie.secure is not None:
- cookie_dict['secure'] = cookie.secure
- if cookie.discard is not None:
- cookie_dict['discard'] = cookie.discard
- try:
- if (cookie.has_nonstandard_attr('httpOnly') or
- cookie.has_nonstandard_attr('httponly') or
- cookie.has_nonstandard_attr('HttpOnly')):
- cookie_dict['httponly'] = True
- except TypeError:
- pass
- return cookie_dict
-
-
-def cookie_jar_to_list(cookie_jar):
- return [cookie_to_dict(cookie) for cookie in cookie_jar]
-
-
-class PhantomJSwrapper(object):
- """PhantomJS wrapper class
-
- This class is experimental.
- """
-
- _TEMPLATE = r'''
- phantom.onError = function(msg, trace) {{
- var msgStack = ['PHANTOM ERROR: ' + msg];
- if(trace && trace.length) {{
- msgStack.push('TRACE:');
- trace.forEach(function(t) {{
- msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
- + (t.function ? ' (in function ' + t.function +')' : ''));
- }});
- }}
- console.error(msgStack.join('\n'));
- phantom.exit(1);
- }};
- var page = require('webpage').create();
- var fs = require('fs');
- var read = {{ mode: 'r', charset: 'utf-8' }};
- var write = {{ mode: 'w', charset: 'utf-8' }};
- JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
- phantom.addCookie(x);
- }});
- page.settings.resourceTimeout = {timeout};
- page.settings.userAgent = "{ua}";
- page.onLoadStarted = function() {{
- page.evaluate(function() {{
- delete window._phantom;
- delete window.callPhantom;
- }});
- }};
- var saveAndExit = function() {{
- fs.write("{html}", page.content, write);
- fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
- phantom.exit();
- }};
- page.onLoadFinished = function(status) {{
- if(page.url === "") {{
- page.setContent(fs.read("{html}", read), "{url}");
- }}
- else {{
- {jscode}
- }}
- }};
- page.open("");
- '''
-
- _TMP_FILE_NAMES = ['script', 'html', 'cookies']
-
- @staticmethod
- def _version():
- return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
-
- def __init__(self, extractor, required_version=None, timeout=10000):
- self._TMP_FILES = {}
-
- self.exe = check_executable('phantomjs', ['-v'])
- if not self.exe:
- raise ExtractorError('PhantomJS executable not found in PATH, '
- 'download it from http://phantomjs.org',
- expected=True)
-
- self.extractor = extractor
-
- if required_version:
- version = self._version()
- if is_outdated_version(version, required_version):
- self.extractor._downloader.report_warning(
- 'Your copy of PhantomJS is outdated, update it to version '
- '%s or newer if you encounter any errors.' % required_version)
-
- self.options = {
- 'timeout': timeout,
- }
- for name in self._TMP_FILE_NAMES:
- tmp = tempfile.NamedTemporaryFile(delete=False)
- tmp.close()
- self._TMP_FILES[name] = tmp
-
- def __del__(self):
- for name in self._TMP_FILE_NAMES:
- try:
- os.remove(self._TMP_FILES[name].name)
- except (IOError, OSError, KeyError):
- pass
-
- def _save_cookies(self, url):
- cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
- for cookie in cookies:
- if 'path' not in cookie:
- cookie['path'] = '/'
- if 'domain' not in cookie:
- cookie['domain'] = compat_urlparse.urlparse(url).netloc
- with open(self._TMP_FILES['cookies'].name, 'wb') as f:
- f.write(json.dumps(cookies).encode('utf-8'))
-
- def _load_cookies(self):
- with open(self._TMP_FILES['cookies'].name, 'rb') as f:
- cookies = json.loads(f.read().decode('utf-8'))
- for cookie in cookies:
- if cookie['httponly'] is True:
- cookie['rest'] = {'httpOnly': None}
- if 'expiry' in cookie:
- cookie['expire_time'] = cookie['expiry']
- self.extractor._set_cookie(**compat_kwargs(cookie))
-
- def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
- """
- Downloads webpage (if needed) and executes JS
-
- Params:
- url: website url
- html: optional, html code of website
- video_id: video id
- note: optional, displayed when downloading webpage
- note2: optional, displayed when executing JS
- headers: custom http headers
- jscode: code to be executed when page is loaded
-
- Returns tuple with:
- * downloaded website (after JS execution)
- * anything you print with `console.log` (but not inside `page.execute`!)
-
- In most cases you don't need to add any `jscode`.
- It is executed in `page.onLoadFinished`.
- `saveAndExit();` is mandatory, use it instead of `phantom.exit()`
- It is possible to wait for some element on the webpage, for example:
- var check = function() {
- var elementFound = page.evaluate(function() {
- return document.querySelector('#b.done') !== null;
- });
- if(elementFound)
- saveAndExit();
- else
- window.setTimeout(check, 500);
- }
-
- page.evaluate(function(){
- document.querySelector('#a').click();
- });
- check();
- """
- if 'saveAndExit();' not in jscode:
- raise ExtractorError('`saveAndExit();` not found in `jscode`')
- if not html:
- html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
- with open(self._TMP_FILES['html'].name, 'wb') as f:
- f.write(html.encode('utf-8'))
-
- self._save_cookies(url)
-
- replaces = self.options
- replaces['url'] = url
- user_agent = headers.get('User-Agent') or std_headers['User-Agent']
- replaces['ua'] = user_agent.replace('"', '\\"')
- replaces['jscode'] = jscode
-
- for x in self._TMP_FILE_NAMES:
- replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
-
- with open(self._TMP_FILES['script'].name, 'wb') as f:
- f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
-
- if video_id is None:
- self.extractor.to_screen('%s' % (note2,))
- else:
- self.extractor.to_screen('%s: %s' % (video_id, note2))
-
- p = subprocess.Popen([
- self.exe, '--ssl-protocol=any',
- self._TMP_FILES['script'].name
- ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- out, err = p.communicate()
- if p.returncode != 0:
- raise ExtractorError(
- 'Executing JS failed\n:' + encodeArgument(err))
- with open(self._TMP_FILES['html'].name, 'rb') as f:
- html = f.read().decode('utf-8')
-
- self._load_cookies()
-
- return (html, encodeArgument(out))
-
-
-class OpenloadIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
-
- _TESTS = [{
- 'url': 'https://openload.co/f/kUEfGclsU9o',
- 'md5': 'bf1c059b004ebc7a256f89408e65c36e',
- 'info_dict': {
- 'id': 'kUEfGclsU9o',
- 'ext': 'mp4',
- 'title': 'skyrim_no-audio_1080.mp4',
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- }, {
- 'url': 'https://openload.co/embed/rjC09fkPLYs',
- 'info_dict': {
- 'id': 'rjC09fkPLYs',
- 'ext': 'mp4',
- 'title': 'movie.mp4',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'subtitles': {
- 'en': [{
- 'ext': 'vtt',
- }],
- },
- },
- 'params': {
- 'skip_download': True, # test subtitles only
- },
- }, {
- 'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
- 'only_matching': True,
- }, {
- 'url': 'https://openload.io/f/ZAn6oz-VZGE/',
- 'only_matching': True,
- }, {
- 'url': 'https://openload.co/f/_-ztPaZtMhM/',
- 'only_matching': True,
- }, {
- # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
- # for title and ext
- 'url': 'https://openload.co/embed/Sxz5sADo82g/',
- 'only_matching': True,
- }, {
- # unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available
- # via https://openload.co/f/e-Ixz9ZR5L0/
- 'url': 'https://openload.co/f/e-Ixz9ZR5L0/',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
- 'only_matching': True,
- }, {
- 'url': 'http://www.openload.link/f/KnG-kKZdcfY',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.stream/f/KnG-kKZdcfY',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.win/f/kUEfGclsU9o',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.download/f/kUEfGclsU9o',
- 'only_matching': True,
- }, {
- # Its title has not got its extension but url has it
- 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
- 'only_matching': True,
- }]
-
- _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
-
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+src=["\']((?:https?://)?(?:openload\.(?:co|io)|oload\.tv)/embed/[a-zA-Z0-9-_]+)',
- webpage)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- url_pattern = 'https://openload.co/%%s/%s/' % video_id
- headers = {
- 'User-Agent': self._USER_AGENT,
- }
-
- for path in ('embed', 'f'):
- page_url = url_pattern % path
- last = path == 'f'
- webpage = self._download_webpage(
- page_url, video_id, 'Downloading %s webpage' % path,
- headers=headers, fatal=last)
- if not webpage:
- continue
- if 'File not found' in webpage or 'deleted by the owner' in webpage:
- if not last:
- continue
- raise ExtractorError('File not found', expected=True, video_id=video_id)
- break
-
- phantom = PhantomJSwrapper(self, required_version='2.0')
- webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers)
-
- decoded_id = (get_element_by_id('streamurl', webpage) or
- get_element_by_id('streamuri', webpage) or
- get_element_by_id('streamurj', webpage) or
- self._search_regex(
- (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
- r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)',
- r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<',
- r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<',
- r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
- 'stream URL'))
-
- video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
-
- title = self._og_search_title(webpage, default=None) or self._search_regex(
- r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
- 'title', default=None) or self._html_search_meta(
- 'description', webpage, 'title', fatal=True)
-
- entries = self._parse_html5_media_entries(page_url, webpage, video_id)
- entry = entries[0] if entries else {}
- subtitles = entry.get('subtitles')
-
- info_dict = {
- 'id': video_id,
- 'title': title,
- 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
- 'url': video_url,
- 'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
- 'subtitles': subtitles,
- 'http_headers': headers,
- }
- return info_dict
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
deleted file mode 100644
index df8ec25..0000000
--- a/youtube_dl/extractor/youtube.py
+++ /dev/null
@@ -1,3394 +0,0 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-
-import itertools
-import json
-import os.path
-import random
-import re
-import time
-import traceback
-import html
-
-from .common import InfoExtractor, SearchInfoExtractor
-from ..jsinterp import JSInterpreter
-from ..swfinterp import SWFInterpreter
-from ..compat import (
- compat_chr,
- compat_HTTPError,
- compat_kwargs,
- compat_parse_qs,
- compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
- compat_urlparse,
- compat_str,
-)
-from ..utils import (
- bool_or_none,
- clean_html,
- dict_get,
- error_to_compat_str,
- extract_attributes,
- ExtractorError,
- float_or_none,
- get_element_by_attribute,
- get_element_by_id,
- int_or_none,
- mimetype2ext,
- orderedSet,
- parse_codecs,
- parse_duration,
- remove_quotes,
- remove_start,
- smuggle_url,
- str_or_none,
- str_to_int,
- try_get,
- unescapeHTML,
- unified_strdate,
- unsmuggle_url,
- uppercase_escape,
- url_or_none,
- urlencode_postdata,
-)
-class YoutubeError(Exception):
- pass
-
-class YoutubeBaseInfoExtractor(InfoExtractor):
- """Provide base functions for Youtube extractors"""
- _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
- _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
-
- _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
- _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
- _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
-
- _NETRC_MACHINE = 'youtube'
- # If True it will raise an error if no login info is provided
- _LOGIN_REQUIRED = False
-
- _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
-
- def _set_language(self):
- self._set_cookie(
- '.youtube.com', 'PREF', 'f1=50000000&hl=en',
- # YouTube sets the expire time to about two months
- expire_time=time.time() + 2 * 30 * 24 * 3600)
-
- def _ids_to_results(self, ids):
- return [
- self.url_result(vid_id, 'Youtube', video_id=vid_id)
- for vid_id in ids]
-
- def _login(self):
- """
- Attempt to log in to YouTube.
- True is returned if successful or skipped.
- False is returned if login failed.
-
- If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
- """
- username, password = self._get_login_info()
- # No authentication to be performed
- if username is None:
- if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
- raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
- return True
-
- login_page = self._download_webpage(
- self._LOGIN_URL, None,
- note='Downloading login page',
- errnote='unable to fetch login page', fatal=False)
- if login_page is False:
- return
-
- login_form = self._hidden_inputs(login_page)
-
- def req(url, f_req, note, errnote):
- data = login_form.copy()
- data.update({
- 'pstMsg': 1,
- 'checkConnection': 'youtube',
- 'checkedDomains': 'youtube',
- 'hl': 'en',
- 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
- 'f.req': json.dumps(f_req),
- 'flowName': 'GlifWebSignIn',
- 'flowEntry': 'ServiceLogin',
- # TODO: reverse actual botguard identifier generation algo
- 'bgRequest': '["identifier",""]',
- })
- return self._download_json(
- url, None, note=note, errnote=errnote,
- transform_source=lambda s: re.sub(r'^[^[]*', '', s),
- fatal=False,
- data=urlencode_postdata(data), headers={
- 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
- 'Google-Accounts-XSRF': 1,
- })
-
- def warn(message):
- self._downloader.report_warning(message)
-
- lookup_req = [
- username,
- None, [], None, 'US', None, None, 2, False, True,
- [
- None, None,
- [2, 1, None, 1,
- 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
- None, [], 4],
- 1, [None, None, []], None, None, None, True
- ],
- username,
- ]
-
- lookup_results = req(
- self._LOOKUP_URL, lookup_req,
- 'Looking up account info', 'Unable to look up account info')
-
- if lookup_results is False:
- return False
-
- user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
- if not user_hash:
- warn('Unable to extract user hash')
- return False
-
- challenge_req = [
- user_hash,
- None, 1, None, [1, None, None, None, [password, None, True]],
- [
- None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
- 1, [None, None, []], None, None, None, True
- ]]
-
- challenge_results = req(
- self._CHALLENGE_URL, challenge_req,
- 'Logging in', 'Unable to log in')
-
- if challenge_results is False:
- return
-
- login_res = try_get(challenge_results, lambda x: x[0][5], list)
- if login_res:
- login_msg = try_get(login_res, lambda x: x[5], compat_str)
- warn(
- 'Unable to login: %s' % 'Invalid password'
- if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
- return False
-
- res = try_get(challenge_results, lambda x: x[0][-1], list)
- if not res:
- warn('Unable to extract result entry')
- return False
-
- login_challenge = try_get(res, lambda x: x[0][0], list)
- if login_challenge:
- challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
- if challenge_str == 'TWO_STEP_VERIFICATION':
- # SEND_SUCCESS - TFA code has been successfully sent to phone
- # QUOTA_EXCEEDED - reached the limit of TFA codes
- status = try_get(login_challenge, lambda x: x[5], compat_str)
- if status == 'QUOTA_EXCEEDED':
- warn('Exceeded the limit of TFA codes, try later')
- return False
-
- tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
- if not tl:
- warn('Unable to extract TL')
- return False
-
- tfa_code = self._get_tfa_info('2-step verification code')
-
- if not tfa_code:
- warn(
- 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
- '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
- return False
-
- tfa_code = remove_start(tfa_code, 'G-')
-
- tfa_req = [
- user_hash, None, 2, None,
- [
- 9, None, None, None, None, None, None, None,
- [None, tfa_code, True, 2]
- ]]
-
- tfa_results = req(
- self._TFA_URL.format(tl), tfa_req,
- 'Submitting TFA code', 'Unable to submit TFA code')
-
- if tfa_results is False:
- return False
-
- tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
- if tfa_res:
- tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
- warn(
- 'Unable to finish TFA: %s' % 'Invalid TFA code'
- if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
- return False
-
- check_cookie_url = try_get(
- tfa_results, lambda x: x[0][-1][2], compat_str)
- else:
- CHALLENGES = {
- 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
- 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
- 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
- }
- challenge = CHALLENGES.get(
- challenge_str,
- '%s returned error %s.' % (self.IE_NAME, challenge_str))
- warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
- return False
- else:
- check_cookie_url = try_get(res, lambda x: x[2], compat_str)
-
- if not check_cookie_url:
- warn('Unable to extract CheckCookie URL')
- return False
-
- check_cookie_results = self._download_webpage(
- check_cookie_url, None, 'Checking cookie', fatal=False)
-
- if check_cookie_results is False:
- return False
-
- if 'https://myaccount.google.com/' not in check_cookie_results:
- warn('Unable to log in')
- return False
-
- return True
-
- def _download_webpage_handle(self, *args, **kwargs):
- query = kwargs.get('query', {}).copy()
- query['disable_polymer'] = 'true'
- kwargs['query'] = query
- return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
- *args, **compat_kwargs(kwargs))
-
- def _real_initialize(self):
- if self._downloader is None:
- return
- self._set_language()
- if not self._login():
- return
-
-
-class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
- # Extract entries from page with "Load more" button
- def _entries(self, page, playlist_id):
- more_widget_html = content_html = page
- for page_num in itertools.count(1):
- for entry in self._process_page(content_html):
- yield entry
-
- mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
- if not mobj:
- break
-
- count = 0
- retries = 3
- while count <= retries:
- try:
- # Downloading page may result in intermittent 5xx HTTP error
- # that is usually worked around with a retry
- more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
- 'Downloading page #%s%s'
- % (page_num, ' (retry #%d)' % count if count else ''),
- transform_source=uppercase_escape)
- break
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
- count += 1
- if count <= retries:
- continue
- raise
-
- content_html = more['content_html']
- if not content_html.strip():
- # Some webpages show a "Load more" button but they don't
- # have more videos
- break
- more_widget_html = more['load_more_widget_html']
-
-
-class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
- def _process_page(self, content):
- for video_id, video_title in self.extract_videos_from_page(content):
- yield self.url_result(video_id, 'Youtube', video_id, video_title)
-
- def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
- for mobj in re.finditer(video_re, page):
- # The link with index 0 is not the first video of the playlist (not sure if still actual)
- if 'index' in mobj.groupdict() and mobj.group('id') == '0':
- continue
- video_id = mobj.group('id')
- video_title = unescapeHTML(
- mobj.group('title')) if 'title' in mobj.groupdict() else None
- if video_title:
- video_title = video_title.strip()
- if video_title == '► Play all':
- video_title = None
- try:
- idx = ids_in_page.index(video_id)
- if video_title and not titles_in_page[idx]:
- titles_in_page[idx] = video_title
- except ValueError:
- ids_in_page.append(video_id)
- titles_in_page.append(video_title)
-
- def extract_videos_from_page(self, page):
- ids_in_page = []
- titles_in_page = []
- self.extract_videos_from_page_impl(
- self._VIDEO_RE, page, ids_in_page, titles_in_page)
- return zip(ids_in_page, titles_in_page)
-
-
-class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
- def _process_page(self, content):
- for playlist_id in orderedSet(re.findall(
- r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
- content)):
- yield self.url_result(
- 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
- title = self._og_search_title(webpage, fatal=False)
- return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
-
-
-class YoutubeIE(YoutubeBaseInfoExtractor):
- IE_DESC = 'YouTube.com'
- _VALID_URL = r"""(?x)^
- (
- (?:https?://|//) # http(s):// or protocol-independent URL
- (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
- (?:www\.)?deturl\.com/www\.youtube\.com/|
- (?:www\.)?pwnyoutube\.com/|
- (?:www\.)?hooktube\.com/|
- (?:www\.)?yourepeat\.com/|
- tube\.majestyc\.net/|
- # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
- (?:(?:www|dev)\.)?invidio\.us/|
- (?:(?:www|no)\.)?invidiou\.sh/|
- (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
- (?:www\.)?invidious\.kabi\.tk/|
- (?:www\.)?invidious\.enkirton\.net/|
- (?:www\.)?invidious\.13ad\.de/|
- (?:www\.)?invidious\.mastodon\.host/|
- (?:www\.)?invidious\.nixnet\.xyz/|
- (?:www\.)?tube\.poal\.co/|
- (?:www\.)?vid\.wxzm\.sx/|
- (?:www\.)?yt\.elukerio\.org/|
- (?:www\.)?kgg2m7yk5aybusll\.onion/|
- (?:www\.)?qklhadlycap4cnod\.onion/|
- (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
- (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
- (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
- (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
- youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
- (?:.*?\#/)? # handle anchor (#/) redirect urls
- (?: # the various things that can precede the ID:
- (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
- |(?: # or the v= param in all its forms
- (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
- (?:\?|\#!?) # the params delimiter ? or # or #!
- (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
- v=
- )
- ))
- |(?:
- youtu\.be| # just youtu.be/xxxx
- vid\.plus| # or vid.plus/xxxx
- zwearz\.com/watch| # or zwearz.com/watch/xxxx
- )/
- |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
- )
- )? # all until now is optional -> you can pass the naked ID
- ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
- (?!.*?\blist=
- (?:
- %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
- WL # WL are handled by the watch later IE
- )
- )
- (?(1).+)? # if we found the ID, everything can follow
- $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
- _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
- _formats = {
- '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
- '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
- '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
- '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
- '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
- '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
- '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
- '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
- '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
- '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
- '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
- '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
-
-
- # 3D videos
- '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
- '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
- '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
- '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
- '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
- '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
- '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
-
- # Apple HTTP Live Streaming
- '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
- '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
- '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
- '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
- '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
-
- # DASH mp4 video
- '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
- '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
- '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
- '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
-
- # Dash mp4 audio
- '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
- '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
- '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
- '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
- '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
- '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
- '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
-
- # Dash webm
- '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
- '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
- '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
-
- # Dash webm audio
- '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
- '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
-
- # Dash webm audio with opus inside
- '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
- '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
- '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
-
- # RTMP (unnamed)
- '_rtmp': {'protocol': 'rtmp'},
-
- # av01 video only formats sometimes served with "unknown" codecs
- '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- }
- _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
-
- _GEO_BYPASS = False
-
- IE_NAME = 'youtube'
- _TESTS = [
- {
- 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
- 'info_dict': {
- 'id': 'BaW_jenozKc',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
- 'uploader': 'Philipp Hagemeister',
- 'uploader_id': 'phihag',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
- 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
- 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
- 'upload_date': '20121002',
- 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
- 'categories': ['Science & Technology'],
- 'tags': ['youtube-dl'],
- 'duration': 10,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- 'start_time': 1,
- 'end_time': 9,
- }
- },
- {
- 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
- 'note': 'Test generic use_cipher_signature video (#897)',
- 'info_dict': {
- 'id': 'UxxajLWwzqY',
- 'ext': 'mp4',
- 'upload_date': '20120506',
- 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
- 'alt_title': 'I Love It (feat. Charli XCX)',
- 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
- 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
- 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
- 'iconic ep', 'iconic', 'love', 'it'],
- 'duration': 180,
- 'uploader': 'Icona Pop',
- 'uploader_id': 'IconaPop',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
- 'creator': 'Icona Pop',
- 'track': 'I Love It (feat. Charli XCX)',
- 'artist': 'Icona Pop',
- }
- },
- {
- 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
- 'note': 'Test VEVO video with age protection (#956)',
- 'info_dict': {
- 'id': '07FYdnEawAQ',
- 'ext': 'mp4',
- 'upload_date': '20130703',
- 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
- 'alt_title': 'Tunnel Vision',
- 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
- 'duration': 419,
- 'uploader': 'justintimberlakeVEVO',
- 'uploader_id': 'justintimberlakeVEVO',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
- 'creator': 'Justin Timberlake',
- 'track': 'Tunnel Vision',
- 'artist': 'Justin Timberlake',
- 'age_limit': 18,
- }
- },
- {
- 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
- 'note': 'Embed-only video (#1746)',
- 'info_dict': {
- 'id': 'yZIXLfi8CZQ',
- 'ext': 'mp4',
- 'upload_date': '20120608',
- 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
- 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
- 'uploader': 'SET India',
- 'uploader_id': 'setindia',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
- 'age_limit': 18,
- }
- },
- {
- 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
- 'note': 'Use the first video ID in the URL',
- 'info_dict': {
- 'id': 'BaW_jenozKc',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
- 'uploader': 'Philipp Hagemeister',
- 'uploader_id': 'phihag',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
- 'upload_date': '20121002',
- 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
- 'categories': ['Science & Technology'],
- 'tags': ['youtube-dl'],
- 'duration': 10,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
- 'note': '256k DASH audio (format 141) via DASH manifest',
- 'info_dict': {
- 'id': 'a9LDPn-MO4I',
- 'ext': 'm4a',
- 'upload_date': '20121002',
- 'uploader_id': '8KVIDEO',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
- 'description': '',
- 'uploader': '8KVIDEO',
- 'title': 'UHDTV TEST 8K VIDEO.mp4'
- },
- 'params': {
- 'youtube_include_dash_manifest': True,
- 'format': '141',
- },
- 'skip': 'format 141 not served anymore',
- },
- # DASH manifest with encrypted signature
- {
- 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
- 'info_dict': {
- 'id': 'IB3lcPjvWLA',
- 'ext': 'm4a',
- 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
- 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
- 'duration': 244,
- 'uploader': 'AfrojackVEVO',
- 'uploader_id': 'AfrojackVEVO',
- 'upload_date': '20131011',
- },
- 'params': {
- 'youtube_include_dash_manifest': True,
- 'format': '141/bestaudio[ext=m4a]',
- },
- },
- # JS player signature function name containing $
- {
- 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
- 'info_dict': {
- 'id': 'nfWlot6h_JM',
- 'ext': 'm4a',
- 'title': 'Taylor Swift - Shake It Off',
- 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
- 'duration': 242,
- 'uploader': 'TaylorSwiftVEVO',
- 'uploader_id': 'TaylorSwiftVEVO',
- 'upload_date': '20140818',
- 'creator': 'Taylor Swift',
- },
- 'params': {
- 'youtube_include_dash_manifest': True,
- 'format': '141/bestaudio[ext=m4a]',
- },
- },
- # Controversy video
- {
- 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
- 'info_dict': {
- 'id': 'T4XJQO3qol8',
- 'ext': 'mp4',
- 'duration': 219,
- 'upload_date': '20100909',
- 'uploader': 'Amazing Atheist',
- 'uploader_id': 'TheAmazingAtheist',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
- 'title': 'Burning Everyone\'s Koran',
- 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
- }
- },
- # Normal age-gate video (No vevo, embed allowed)
- {
- 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
- 'info_dict': {
- 'id': 'HtVdAasjOgU',
- 'ext': 'mp4',
- 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
- 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
- 'duration': 142,
- 'uploader': 'The Witcher',
- 'uploader_id': 'WitcherGame',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
- 'upload_date': '20140605',
- 'age_limit': 18,
- },
- },
- # Age-gate video with encrypted signature
- {
- 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
- 'info_dict': {
- 'id': '6kLq3WMV1nU',
- 'ext': 'mp4',
- 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
- 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
- 'duration': 246,
- 'uploader': 'LloydVEVO',
- 'uploader_id': 'LloydVEVO',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
- 'upload_date': '20110629',
- 'age_limit': 18,
- },
- },
- # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
- # YouTube Red ad is not captured for creator
- {
- 'url': '__2ABJjxzNo',
- 'info_dict': {
- 'id': '__2ABJjxzNo',
- 'ext': 'mp4',
- 'duration': 266,
- 'upload_date': '20100430',
- 'uploader_id': 'deadmau5',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
- 'creator': 'deadmau5',
- 'description': 'md5:12c56784b8032162bb936a5f76d55360',
- 'uploader': 'deadmau5',
- 'title': 'Deadmau5 - Some Chords (HD)',
- 'alt_title': 'Some Chords',
- },
- 'expected_warnings': [
- 'DASH manifest missing',
- ]
- },
- # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
- {
- 'url': 'lqQg6PlCWgI',
- 'info_dict': {
- 'id': 'lqQg6PlCWgI',
- 'ext': 'mp4',
- 'duration': 6085,
- 'upload_date': '20150827',
- 'uploader_id': 'olympic',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
- 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
- 'uploader': 'Olympic',
- 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
- },
- 'params': {
- 'skip_download': 'requires avconv',
- }
- },
- # Non-square pixels
- {
- 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
- 'info_dict': {
- 'id': '_b-2C3KPAM0',
- 'ext': 'mp4',
- 'stretched_ratio': 16 / 9.,
- 'duration': 85,
- 'upload_date': '20110310',
- 'uploader_id': 'AllenMeow',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
- 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
- 'uploader': '孫ᄋᄅ',
- 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
- },
- },
- # url_encoded_fmt_stream_map is empty string
- {
- 'url': 'qEJwOuvDf7I',
- 'info_dict': {
- 'id': 'qEJwOuvDf7I',
- 'ext': 'webm',
- 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
- 'description': '',
- 'upload_date': '20150404',
- 'uploader_id': 'spbelect',
- 'uploader': 'Наблюдатели Петербурга',
- },
- 'params': {
- 'skip_download': 'requires avconv',
- },
- 'skip': 'This live event has ended.',
- },
- # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
- {
- 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
- 'info_dict': {
- 'id': 'FIl7x6_3R5Y',
- 'ext': 'webm',
- 'title': 'md5:7b81415841e02ecd4313668cde88737a',
- 'description': 'md5:116377fd2963b81ec4ce64b542173306',
- 'duration': 220,
- 'upload_date': '20150625',
- 'uploader_id': 'dorappi2000',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
- 'uploader': 'dorappi2000',
- 'formats': 'mincount:31',
- },
- 'skip': 'not actual anymore',
- },
- # DASH manifest with segment_list
- {
- 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
- 'md5': '8ce563a1d667b599d21064e982ab9e31',
- 'info_dict': {
- 'id': 'CsmdDsKjzN8',
- 'ext': 'mp4',
- 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
- 'uploader': 'Airtek',
- 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
- 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
- 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
- },
- 'params': {
- 'youtube_include_dash_manifest': True,
- 'format': '135', # bestvideo
- },
- 'skip': 'This live event has ended.',
- },
- {
- # Multifeed videos (multiple cameras), URL is for Main Camera
- 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
- 'info_dict': {
- 'id': 'jqWvoWXjCVs',
- 'title': 'teamPGP: Rocket League Noob Stream',
- 'description': 'md5:dc7872fb300e143831327f1bae3af010',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': 'jqWvoWXjCVs',
- 'ext': 'mp4',
- 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
- 'description': 'md5:dc7872fb300e143831327f1bae3af010',
- 'duration': 7335,
- 'upload_date': '20150721',
- 'uploader': 'Beer Games Beer',
- 'uploader_id': 'beergamesbeer',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
- 'license': 'Standard YouTube License',
- },
- }, {
- 'info_dict': {
- 'id': '6h8e8xoXJzg',
- 'ext': 'mp4',
- 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
- 'description': 'md5:dc7872fb300e143831327f1bae3af010',
- 'duration': 7337,
- 'upload_date': '20150721',
- 'uploader': 'Beer Games Beer',
- 'uploader_id': 'beergamesbeer',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
- 'license': 'Standard YouTube License',
- },
- }, {
- 'info_dict': {
- 'id': 'PUOgX5z9xZw',
- 'ext': 'mp4',
- 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
- 'description': 'md5:dc7872fb300e143831327f1bae3af010',
- 'duration': 7337,
- 'upload_date': '20150721',
- 'uploader': 'Beer Games Beer',
- 'uploader_id': 'beergamesbeer',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
- 'license': 'Standard YouTube License',
- },
- }, {
- 'info_dict': {
- 'id': 'teuwxikvS5k',
- 'ext': 'mp4',
- 'title': 'teamPGP: Rocket League Noob Stream (zim)',
- 'description': 'md5:dc7872fb300e143831327f1bae3af010',
- 'duration': 7334,
- 'upload_date': '20150721',
- 'uploader': 'Beer Games Beer',
- 'uploader_id': 'beergamesbeer',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
- 'license': 'Standard YouTube License',
- },
- }],
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This video is not available.',
- },
- {
- # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
- 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
- 'info_dict': {
- 'id': 'gVfLd0zydlo',
- 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
- },
- 'playlist_count': 2,
- 'skip': 'Not multifeed anymore',
- },
- {
- 'url': 'https://vid.plus/FlRa-iH7PGw',
- 'only_matching': True,
- },
- {
- 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
- 'only_matching': True,
- },
- {
- # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
- # Also tests cut-off URL expansion in video description (see
- # https://github.com/ytdl-org/youtube-dl/issues/1892,
- # https://github.com/ytdl-org/youtube-dl/issues/8164)
- 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
- 'info_dict': {
- 'id': 'lsguqyKfVQg',
- 'ext': 'mp4',
- 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
- 'alt_title': 'Dark Walk - Position Music',
- 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
- 'duration': 133,
- 'upload_date': '20151119',
- 'uploader_id': 'IronSoulElf',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
- 'uploader': 'IronSoulElf',
- 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
- 'track': 'Dark Walk - Position Music',
- 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
- 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
- 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
- 'only_matching': True,
- },
- {
- # Video with yt:stretch=17:0
- 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
- 'info_dict': {
- 'id': 'Q39EVAstoRM',
- 'ext': 'mp4',
- 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
- 'description': 'md5:ee18a25c350637c8faff806845bddee9',
- 'upload_date': '20151107',
- 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
- 'uploader': 'CH GAMER DROID',
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This video does not exist.',
- },
- {
- # Video licensed under Creative Commons
- 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
- 'info_dict': {
- 'id': 'M4gD1WSo5mA',
- 'ext': 'mp4',
- 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
- 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
- 'duration': 721,
- 'upload_date': '20150127',
- 'uploader_id': 'BerkmanCenter',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
- 'uploader': 'The Berkman Klein Center for Internet & Society',
- 'license': 'Creative Commons Attribution license (reuse allowed)',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Channel-like uploader_url
- 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
- 'info_dict': {
- 'id': 'eQcmzGIKrzg',
- 'ext': 'mp4',
- 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
- 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
- 'duration': 4060,
- 'upload_date': '20151119',
- 'uploader': 'Bernie Sanders',
- 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
- 'license': 'Creative Commons Attribution license (reuse allowed)',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
- 'only_matching': True,
- },
- {
- # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
- 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
- 'only_matching': True,
- },
- {
- # Rental video preview
- 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
- 'info_dict': {
- 'id': 'uGpuVWrhIzE',
- 'ext': 'mp4',
- 'title': 'Piku - Trailer',
- 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
- 'upload_date': '20150811',
- 'uploader': 'FlixMatrix',
- 'uploader_id': 'FlixMatrixKaravan',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
- 'license': 'Standard YouTube License',
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This video is not available.',
- },
- {
- # YouTube Red video with episode data
- 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
- 'info_dict': {
- 'id': 'iqKdEhx-dD4',
- 'ext': 'mp4',
- 'title': 'Isolation - Mind Field (Ep 1)',
- 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
- 'duration': 2085,
- 'upload_date': '20170118',
- 'uploader': 'Vsauce',
- 'uploader_id': 'Vsauce',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
- 'series': 'Mind Field',
- 'season_number': 1,
- 'episode_number': 1,
- },
- 'params': {
- 'skip_download': True,
- },
- 'expected_warnings': [
- 'Skipping DASH manifest',
- ],
- },
- {
- # The following content has been identified by the YouTube community
- # as inappropriate or offensive to some audiences.
- 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
- 'info_dict': {
- 'id': '6SJNVb0GnPI',
- 'ext': 'mp4',
- 'title': 'Race Differences in Intelligence',
- 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
- 'duration': 965,
- 'upload_date': '20140124',
- 'uploader': 'New Century Foundation',
- 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # itag 212
- 'url': '1t24XAntNCY',
- 'only_matching': True,
- },
- {
- # geo restricted to JP
- 'url': 'sJL6WA-aGkQ',
- 'only_matching': True,
- },
- {
- 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
- 'only_matching': True,
- },
- {
- 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
- 'only_matching': True,
- },
- {
- # DRM protected
- 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
- 'only_matching': True,
- },
- {
- # Video with unsupported adaptive stream type formats
- 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
- 'info_dict': {
- 'id': 'Z4Vy8R84T1U',
- 'ext': 'mp4',
- 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
- 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
- 'duration': 433,
- 'upload_date': '20130923',
- 'uploader': 'Amelia Putri Harwita',
- 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
- 'formats': 'maxcount:10',
- },
- 'params': {
- 'skip_download': True,
- 'youtube_include_dash_manifest': False,
- },
- },
- {
- # Youtube Music Auto-generated description
- 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
- 'info_dict': {
- 'id': 'MgNrAu2pzNs',
- 'ext': 'mp4',
- 'title': 'Voyeur Girl',
- 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
- 'upload_date': '20190312',
- 'uploader': 'Various Artists - Topic',
- 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
- 'artist': 'Stephen',
- 'track': 'Voyeur Girl',
- 'album': 'it\'s too much love to know my dear',
- 'release_date': '20190313',
- 'release_year': 2019,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Youtube Music Auto-generated description
- # Retrieve 'artist' field from 'Artist:' in video description
- # when it is present on youtube music video
- 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
- 'info_dict': {
- 'id': 'k0jLE7tTwjY',
- 'ext': 'mp4',
- 'title': 'Latch Feat. Sam Smith',
- 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
- 'upload_date': '20150110',
- 'uploader': 'Various Artists - Topic',
- 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
- 'artist': 'Disclosure',
- 'track': 'Latch Feat. Sam Smith',
- 'album': 'Latch Featuring Sam Smith',
- 'release_date': '20121008',
- 'release_year': 2012,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Youtube Music Auto-generated description
- # handle multiple artists on youtube music video
- 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
- 'info_dict': {
- 'id': '74qn0eJSjpA',
- 'ext': 'mp4',
- 'title': 'Eastside',
- 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
- 'upload_date': '20180710',
- 'uploader': 'Benny Blanco - Topic',
- 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
- 'artist': 'benny blanco, Halsey, Khalid',
- 'track': 'Eastside',
- 'album': 'Eastside',
- 'release_date': '20180713',
- 'release_year': 2018,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Youtube Music Auto-generated description
- # handle youtube music video with release_year and no release_date
- 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
- 'info_dict': {
- 'id': '-hcAI0g-f5M',
- 'ext': 'mp4',
- 'title': 'Put It On Me',
- 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
- 'upload_date': '20180426',
- 'uploader': 'Matt Maeson - Topic',
- 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
- 'artist': 'Matt Maeson',
- 'track': 'Put It On Me',
- 'album': 'The Hearse',
- 'release_date': None,
- 'release_year': 2018,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- ]
-
- def __init__(self, *args, **kwargs):
- super(YoutubeIE, self).__init__(*args, **kwargs)
- self._player_cache = {}
-
- def report_video_info_webpage_download(self, video_id):
- """Report attempt to download video info webpage."""
- self.to_screen('%s: Downloading video info webpage' % video_id)
-
- def report_information_extraction(self, video_id):
- """Report attempt to extract video information."""
- self.to_screen('%s: Extracting video information' % video_id)
-
- def report_unavailable_format(self, video_id, format):
- """Report extracted video URL."""
- self.to_screen('%s: Format %s not available' % (video_id, format))
-
- def report_rtmp_download(self):
- """Indicate the download will use the RTMP protocol."""
- self.to_screen('RTMP download detected')
-
- def _signature_cache_id(self, example_sig):
- """ Return a string representation of a signature """
- return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
-
- def _extract_signature_function(self, video_id, player_url, example_sig):
- id_m = re.match(
- r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
- player_url)
- if not id_m:
- raise ExtractorError('Cannot identify player %r' % player_url)
- player_type = id_m.group('ext')
- player_id = id_m.group('id')
-
- # Read from filesystem cache
- func_id = '%s_%s_%s' % (
- player_type, player_id, self._signature_cache_id(example_sig))
- assert os.path.basename(func_id) == func_id
-
- cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
- if cache_spec is not None:
- return lambda s: ''.join(s[i] for i in cache_spec)
-
- download_note = (
- 'Downloading player %s' % player_url
- if self._downloader.params.get('verbose') else
- 'Downloading %s player %s' % (player_type, player_id)
- )
- if player_type == 'js':
- code = self._download_webpage(
- player_url, video_id,
- note=download_note,
- errnote='Download of %s failed' % player_url)
- res = self._parse_sig_js(code)
- elif player_type == 'swf':
- urlh = self._request_webpage(
- player_url, video_id,
- note=download_note,
- errnote='Download of %s failed' % player_url)
- code = urlh.read()
- res = self._parse_sig_swf(code)
- else:
- assert False, 'Invalid player type %r' % player_type
-
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
- cache_res = res(test_string)
- cache_spec = [ord(c) for c in cache_res]
-
- self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
- return res
-
- def _print_sig_code(self, func, example_sig):
- def gen_sig_code(idxs):
- def _genslice(start, end, step):
- starts = '' if start == 0 else str(start)
- ends = (':%d' % (end + step)) if end + step >= 0 else ':'
- steps = '' if step == 1 else (':%d' % step)
- return 's[%s%s%s]' % (starts, ends, steps)
-
- step = None
- # Quelch pyflakes warnings - start will be set when step is set
- start = '(Never used)'
- for i, prev in zip(idxs[1:], idxs[:-1]):
- if step is not None:
- if i - prev == step:
- continue
- yield _genslice(start, prev, step)
- step = None
- continue
- if i - prev in [-1, 1]:
- step = i - prev
- start = prev
- continue
- else:
- yield 's[%d]' % prev
- if step is None:
- yield 's[%d]' % i
- else:
- yield _genslice(start, i, step)
-
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
- cache_res = func(test_string)
- cache_spec = [ord(c) for c in cache_res]
- expr_code = ' + '.join(gen_sig_code(cache_spec))
- signature_id_tuple = '(%s)' % (
- ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
- code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
- ' return %s\n') % (signature_id_tuple, expr_code)
- self.to_screen('Extracted signature function:\n' + code)
-
- def _parse_sig_js(self, jscode):
- funcname = self._search_regex(
- (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
- # Obsolete patterns
- r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
- r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
- jscode, 'Initial JS player signature function name', group='sig')
-
- jsi = JSInterpreter(jscode)
- initial_function = jsi.extract_function(funcname)
- return lambda s: initial_function([s])
-
- def _parse_sig_swf(self, file_contents):
- swfi = SWFInterpreter(file_contents)
- TARGET_CLASSNAME = 'SignatureDecipher'
- searched_class = swfi.extract_class(TARGET_CLASSNAME)
- initial_function = swfi.extract_function(searched_class, 'decipher')
- return lambda s: initial_function([s])
-
- def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
- """Turn the encrypted s field into a working signature"""
-
- if player_url is None:
- raise ExtractorError('Cannot decrypt signature without player_url')
-
- if player_url.startswith('//'):
- player_url = 'https:' + player_url
- elif not re.match(r'https?://', player_url):
- player_url = compat_urlparse.urljoin(
- 'https://www.youtube.com', player_url)
- try:
- player_id = (player_url, self._signature_cache_id(s))
- if player_id not in self._player_cache:
- func = self._extract_signature_function(
- video_id, player_url, s
- )
- self._player_cache[player_id] = func
- func = self._player_cache[player_id]
- if self._downloader.params.get('youtube_print_sig_code'):
- self._print_sig_code(func, s)
- return func(s)
- except Exception as e:
- tb = traceback.format_exc()
- raise ExtractorError(
- 'Signature extraction failed: ' + tb, cause=e)
-
- def _get_subtitles(self, video_id, webpage):
- try:
- subs_doc = self._download_xml(
- 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
- video_id, note=False)
- except ExtractorError as err:
- self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
- return {}
-
- sub_lang_list = {}
- for track in subs_doc.findall('track'):
- lang = track.attrib['lang_code']
- if lang in sub_lang_list:
- continue
- sub_formats = []
- for ext in self._SUBTITLE_FORMATS:
- params = compat_urllib_parse_urlencode({
- 'lang': lang,
- 'v': video_id,
- 'fmt': ext,
- 'name': track.attrib['name'].encode('utf-8'),
- })
- sub_formats.append({
- 'url': 'https://www.youtube.com/api/timedtext?' + params,
- 'ext': ext,
- })
- sub_lang_list[lang] = sub_formats
- if not sub_lang_list:
- self._downloader.report_warning('video doesn\'t have subtitles')
- return {}
- return sub_lang_list
-
- def _get_ytplayer_config(self, video_id, webpage):
- patterns = (
- # User data may contain arbitrary character sequences that may affect
- # JSON extraction with regex, e.g. when '};' is contained the second
- # regex won't capture the whole JSON. Yet working around by trying more
- # concrete regex first keeping in mind proper quoted string handling
- # to be implemented in future that will replace this workaround (see
- # https://github.com/ytdl-org/youtube-dl/issues/7468,
- # https://github.com/ytdl-org/youtube-dl/pull/7599)
- r';ytplayer\.config\s*=\s*({.+?});ytplayer',
- r';ytplayer\.config\s*=\s*({.+?});',
- )
- config = self._search_regex(
- patterns, webpage, 'ytplayer.config', default=None)
- if config:
- return self._parse_json(
- uppercase_escape(config), video_id, fatal=False)
-
- def _get_automatic_captions(self, video_id, webpage):
- """We need the webpage for getting the captions url, pass it as an
- argument to speed up the process."""
- self.to_screen('%s: Looking for automatic captions' % video_id)
- player_config = self._get_ytplayer_config(video_id, webpage)
- err_msg = 'Couldn\'t find automatic captions for %s' % video_id
- if not player_config:
- self._downloader.report_warning(err_msg)
- return {}
- try:
- args = player_config['args']
- caption_url = args.get('ttsurl')
- if caption_url:
- timestamp = args['timestamp']
- # We get the available subtitles
- list_params = compat_urllib_parse_urlencode({
- 'type': 'list',
- 'tlangs': 1,
- 'asrs': 1,
- })
- list_url = caption_url + '&' + list_params
- caption_list = self._download_xml(list_url, video_id)
- original_lang_node = caption_list.find('track')
- if original_lang_node is None:
- self._downloader.report_warning('Video doesn\'t have automatic captions')
- return {}
- original_lang = original_lang_node.attrib['lang_code']
- caption_kind = original_lang_node.attrib.get('kind', '')
-
- sub_lang_list = {}
- for lang_node in caption_list.findall('target'):
- sub_lang = lang_node.attrib['lang_code']
- sub_formats = []
- for ext in self._SUBTITLE_FORMATS:
- params = compat_urllib_parse_urlencode({
- 'lang': original_lang,
- 'tlang': sub_lang,
- 'fmt': ext,
- 'ts': timestamp,
- 'kind': caption_kind,
- })
- sub_formats.append({
- 'url': caption_url + '&' + params,
- 'ext': ext,
- })
- sub_lang_list[sub_lang] = sub_formats
- return sub_lang_list
-
- def make_captions(sub_url, sub_langs):
- parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
- caption_qs = compat_parse_qs(parsed_sub_url.query)
- captions = {}
- for sub_lang in sub_langs:
- sub_formats = []
- for ext in self._SUBTITLE_FORMATS:
- caption_qs.update({
- 'tlang': [sub_lang],
- 'fmt': [ext],
- })
- sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
- query=compat_urllib_parse_urlencode(caption_qs, True)))
- sub_formats.append({
- 'url': sub_url,
- 'ext': ext,
- })
- captions[sub_lang] = sub_formats
- return captions
-
- # New captions format as of 22.06.2017
- player_response = args.get('player_response')
- if player_response and isinstance(player_response, compat_str):
- player_response = self._parse_json(
- player_response, video_id, fatal=False)
- if player_response:
- renderer = player_response['captions']['playerCaptionsTracklistRenderer']
- base_url = renderer['captionTracks'][0]['baseUrl']
- sub_lang_list = []
- for lang in renderer['translationLanguages']:
- lang_code = lang.get('languageCode')
- if lang_code:
- sub_lang_list.append(lang_code)
- return make_captions(base_url, sub_lang_list)
-
- # Some videos don't provide ttsurl but rather caption_tracks and
- # caption_translation_languages (e.g. 20LmZk1hakA)
- # Does not used anymore as of 22.06.2017
- caption_tracks = args['caption_tracks']
- caption_translation_languages = args['caption_translation_languages']
- caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
- sub_lang_list = []
- for lang in caption_translation_languages.split(','):
- lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
- sub_lang = lang_qs.get('lc', [None])[0]
- if sub_lang:
- sub_lang_list.append(sub_lang)
- return make_captions(caption_url, sub_lang_list)
- # An extractor error can be raise by the download process if there are
- # no automatic captions but there are subtitles
- except (KeyError, IndexError, ExtractorError):
- self._downloader.report_warning(err_msg)
- return {}
-
- def _mark_watched(self, video_id, video_info, player_response):
- playback_url = url_or_none(try_get(
- player_response,
- lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
- video_info, lambda x: x['videostats_playback_base_url'][0]))
- if not playback_url:
- return
- parsed_playback_url = compat_urlparse.urlparse(playback_url)
- qs = compat_urlparse.parse_qs(parsed_playback_url.query)
-
- # cpn generation algorithm is reverse engineered from base.js.
- # In fact it works even with dummy cpn.
- CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
- cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
-
- qs.update({
- 'ver': ['2'],
- 'cpn': [cpn],
- })
- playback_url = compat_urlparse.urlunparse(
- parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
-
- self._download_webpage(
- playback_url, video_id, 'Marking watched',
- 'Unable to mark watched', fatal=False)
-
- @staticmethod
- def _extract_urls(webpage):
- # Embedded YouTube player
- entries = [
- unescapeHTML(mobj.group('url'))
- for mobj in re.finditer(r'''(?x)
- (?:
- <iframe[^>]+?src=|
- data-video-url=|
- <embed[^>]+?src=|
- embedSWF\(?:\s*|
- <object[^>]+data=|
- new\s+SWFObject\(
- )
- (["\'])
- (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
- (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
- \1''', webpage)]
-
- # lazyYT YouTube embed
- entries.extend(list(map(
- unescapeHTML,
- re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
-
- # Wordpress "YouTube Video Importer" plugin
- matches = re.findall(r'''(?x)<div[^>]+
- class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
- data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
- entries.extend(m[-1] for m in matches)
-
- return entries
-
- @staticmethod
- def _extract_url(webpage):
- urls = YoutubeIE._extract_urls(webpage)
- return urls[0] if urls else None
-
- @classmethod
- def extract_id(cls, url):
- mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
- video_id = mobj.group(2)
- return video_id
-
- @staticmethod
- def _extract_chapters(description, duration):
- if not description:
- return None
- chapter_lines = re.findall(
- r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
- description)
- if not chapter_lines:
- return None
- chapters = []
- for next_num, (chapter_line, time_point) in enumerate(
- chapter_lines, start=1):
- start_time = parse_duration(time_point)
- if start_time is None:
- continue
- if start_time > duration:
- break
- end_time = (duration if next_num == len(chapter_lines)
- else parse_duration(chapter_lines[next_num][1]))
- if end_time is None:
- continue
- if end_time > duration:
- end_time = duration
- if start_time > end_time:
- break
- chapter_title = re.sub(
- r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
- chapter_title = re.sub(r'\s+', ' ', chapter_title)
- chapters.append({
- 'start_time': start_time,
- 'end_time': end_time,
- 'title': chapter_title,
- })
- return chapters
-
- ul_tag_pattern = re.compile(r'(</?ul)')
- music_info_pattern = re.compile(r'<h4 class="title">\s*(Song|Music|Artist|Album)\s*</h4>\s*<ul class="content watch-info-tag-list">\s*<li>(?:<a[^>]*>)?([^<]*)(?:</a>)?</li>')
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
-
- proto = (
- 'http' if self._downloader.params.get('prefer_insecure', False)
- else 'https')
-
- start_time = None
- end_time = None
- parsed_url = compat_urllib_parse_urlparse(url)
- for component in [parsed_url.fragment, parsed_url.query]:
- query = compat_parse_qs(component)
- if start_time is None and 't' in query:
- start_time = parse_duration(query['t'][0])
- if start_time is None and 'start' in query:
- start_time = parse_duration(query['start'][0])
- if end_time is None and 'end' in query:
- end_time = parse_duration(query['end'][0])
-
- # Extract original video URL from URL with redirection, like age verification, using next_url parameter
- mobj = re.search(self._NEXT_URL_RE, url)
- if mobj:
- url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
- video_id = self.extract_id(url)
-
- # Get video webpage
- url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
- video_webpage = self._download_webpage(url, video_id)
-
- # Attempt to extract SWF player URL
- mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
- if mobj is not None:
- player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
- else:
- player_url = None
-
- dash_mpds = []
-
- def add_dash_mpd(video_info):
- dash_mpd = video_info.get('dashmpd')
- if dash_mpd and dash_mpd[0] not in dash_mpds:
- dash_mpds.append(dash_mpd[0])
-
- def add_dash_mpd_pr(pl_response):
- dash_mpd = url_or_none(try_get(
- pl_response, lambda x: x['streamingData']['dashManifestUrl'],
- compat_str))
- if dash_mpd and dash_mpd not in dash_mpds:
- dash_mpds.append(dash_mpd)
-
- is_live = None
- view_count = None
-
- def extract_view_count(v_info):
- return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
-
- def extract_token(v_info):
- return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
-
- def extract_player_response(player_response, video_id):
- pl_response = str_or_none(player_response)
- if not pl_response:
- return
- pl_response = self._parse_json(pl_response, video_id, fatal=False)
- if isinstance(pl_response, dict):
- add_dash_mpd_pr(pl_response)
- return pl_response
-
- player_response = {}
-
-
-
- # Is it unlisted?
- unlisted = ('<span id="watch-privacy-icon"' in video_webpage)
-
-
- # Related videos
- related_vids = []
- try:
- rvs_match = re.search(r'"rvs":"(.*?)[^\\]"', video_webpage)
- if rvs_match is not None:
- rvs = json.loads('"' + rvs_match.group(1) + '"') # unescape json string (\u0026 for example)
- related_vid_parts = (compat_parse_qs(related_item) for related_item in rvs.split(","))
- related_vids = [{key : value[0] for key,value in vid.items()} for vid in related_vid_parts]
- else:
- print('Failed to extract related videos: no rvs')
-
- except Exception:
- print('Error while extracting related videos:')
- traceback.print_exc()
-
-
- # Music list
- # Test case: https://www.youtube.com/watch?v=jbkZdRglnKY
- music_list = []
- metadata_start = video_webpage.find('<ul class="watch-extras-section">')
- if metadata_start != -1:
- metadata_start += 33
- tag_index = metadata_start
- open_tags = 1
- while open_tags > 0:
- match = self.ul_tag_pattern.search(video_webpage, tag_index)
- if match is None:
- print("Couldn't match ul tag")
- break
- tag_index = match.end()
- tag = match.group(1)
- if tag == "<ul":
- open_tags += 1
- else:
- open_tags -= 1
- else:
- last_index = 0
- metadata = video_webpage[metadata_start:tag_index]
- current_song = None
- while True:
- match = self.music_info_pattern.search(metadata, last_index)
- if match is None:
- if current_song is not None:
- music_list.append(current_song)
- break
- title, value = match.group(1), html.unescape(match.group(2))
- if title in ("Song", "Music"):
- if current_song is not None:
- music_list.append(current_song)
- current_song = {"title": value}
- else:
- current_song[title.lower()] = value
- last_index = match.end()
-
-
-
- # Get video info
- embed_webpage = None
- if re.search(r'player-age-gate-content">', video_webpage) is not None:
- age_gate = True
- # We simulate the access to the video from www.youtube.com/v/{video_id}
- # this can be viewed without login into Youtube
- url = proto + '://www.youtube.com/embed/%s' % video_id
- embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
- data = compat_urllib_parse_urlencode({
- 'video_id': video_id,
- 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
- 'sts': self._search_regex(
- r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
- })
- video_info_url = proto + '://www.youtube.com/get_video_info?' + data
- video_info_webpage = self._download_webpage(
- video_info_url, video_id,
- note='Refetching age-gated info webpage',
- errnote='unable to download video info webpage')
- video_info = compat_parse_qs(video_info_webpage)
- pl_response = video_info.get('player_response', [None])[0]
- player_response = extract_player_response(pl_response, video_id)
- add_dash_mpd(video_info)
- view_count = extract_view_count(video_info)
- else:
- age_gate = False
- video_info = None
- sts = None
- # Try looking directly into the video webpage
- ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
- if ytplayer_config:
- args = ytplayer_config['args']
- if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
- # Convert to the same format returned by compat_parse_qs
- video_info = dict((k, [v]) for k, v in args.items())
- add_dash_mpd(video_info)
- # Rental video is not rented but preview is available (e.g.
- # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
- # https://github.com/ytdl-org/youtube-dl/issues/10532)
- if not video_info and args.get('ypc_vid'):
- return self.url_result(
- args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
- if args.get('livestream') == '1' or args.get('live_playback') == 1:
- is_live = True
- sts = ytplayer_config.get('sts')
- if not player_response:
- player_response = extract_player_response(args.get('player_response'), video_id)
- if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
- add_dash_mpd_pr(player_response)
- # We also try looking in get_video_info since it may contain different dashmpd
- # URL that points to a DASH manifest with possibly different itag set (some itags
- # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
- # manifest pointed by get_video_info's dashmpd).
- # The general idea is to take a union of itags of both DASH manifests (for example
- # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
- self.report_video_info_webpage_download(video_id)
- for el in ('embedded', 'detailpage', 'vevo', ''):
- query = {
- 'video_id': video_id,
- 'ps': 'default',
- 'eurl': '',
- 'gl': 'US',
- 'hl': 'en',
- }
- if el:
- query['el'] = el
- if sts:
- query['sts'] = sts
- video_info_webpage = self._download_webpage(
- '%s://www.youtube.com/get_video_info' % proto,
- video_id, note=False,
- errnote='unable to download video info webpage',
- fatal=False, query=query)
- if not video_info_webpage:
- continue
- get_video_info = compat_parse_qs(video_info_webpage)
- if not player_response:
- pl_response = get_video_info.get('player_response', [None])[0]
- player_response = extract_player_response(pl_response, video_id)
- add_dash_mpd(get_video_info)
- if view_count is None:
- view_count = extract_view_count(get_video_info)
- if not video_info:
- video_info = get_video_info
- get_token = extract_token(get_video_info)
- if get_token:
- # Different get_video_info requests may report different results, e.g.
- # some may report video unavailability, but some may serve it without
- # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
- # the original webpage as well as el=info and el=embedded get_video_info
- # requests report video unavailability due to geo restriction while
- # el=detailpage succeeds and returns valid data). This is probably
- # due to YouTube measures against IP ranges of hosting providers.
- # Working around by preferring the first succeeded video_info containing
- # the token if no such video_info yet was found.
- token = extract_token(video_info)
- if not token:
- video_info = get_video_info
- break
-
- def extract_unavailable_message():
- messages = []
- for tag, kind in (('h1', 'message'), ('div', 'submessage')):
- msg = self._html_search_regex(
- r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
- video_webpage, 'unavailable %s' % kind, default=None)
- if msg:
- messages.append(msg)
- if messages:
- return '\n'.join(messages)
-
- if not video_info:
- unavailable_message = extract_unavailable_message()
- if not unavailable_message:
- unavailable_message = 'Unable to extract video data'
- raise ExtractorError(
- 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
-
- video_details = try_get(
- player_response, lambda x: x['videoDetails'], dict) or {}
-
- video_title = video_info.get('title', [None])[0] or video_details.get('title')
- if not video_title:
- self._downloader.report_warning('Unable to extract video title')
- video_title = '_'
-
- description_original = video_description = get_element_by_id("eow-description", video_webpage)
- if video_description:
-
- def replace_url(m):
- redir_url = compat_urlparse.urljoin(url, m.group(1))
- parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
- if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
- qs = compat_parse_qs(parsed_redir_url.query)
- q = qs.get('q')
- if q and q[0]:
- return q[0]
- return redir_url
-
- description_original = video_description = re.sub(r'''(?x)
- <a\s+
- (?:[a-zA-Z-]+="[^"]*"\s+)*?
- (?:title|href)="([^"]+)"\s+
- (?:[a-zA-Z-]+="[^"]*"\s+)*?
- class="[^"]*"[^>]*>
- [^<]+\.{3}\s*
- </a>
- ''', replace_url, video_description)
- video_description = clean_html(video_description)
- else:
- video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
-
- if not smuggled_data.get('force_singlefeed', False):
- if not self._downloader.params.get('noplaylist'):
- multifeed_metadata_list = try_get(
- player_response,
- lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
- compat_str) or try_get(
- video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
- if multifeed_metadata_list:
- entries = []
- feed_ids = []
- for feed in multifeed_metadata_list.split(','):
- # Unquote should take place before split on comma (,) since textual
- # fields may contain comma as well (see
- # https://github.com/ytdl-org/youtube-dl/issues/8536)
- feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
- entries.append({
- '_type': 'url_transparent',
- 'ie_key': 'Youtube',
- 'url': smuggle_url(
- '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
- {'force_singlefeed': True}),
- 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
- })
- feed_ids.append(feed_data['id'][0])
- self.to_screen(
- 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
- % (', '.join(feed_ids), video_id))
- return self.playlist_result(entries, video_id, video_title, video_description)
- else:
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
-
- if view_count is None:
- view_count = extract_view_count(video_info)
- if view_count is None and video_details:
- view_count = int_or_none(video_details.get('viewCount'))
-
- if is_live is None:
- is_live = bool_or_none(video_details.get('isLive'))
-
- # Check for "rental" videos
- if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
- raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
-
- def _extract_filesize(media_url):
- return int_or_none(self._search_regex(
- r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
-
- streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
- streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
-
- if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
- self.report_rtmp_download()
- formats = [{
- 'format_id': '_rtmp',
- 'protocol': 'rtmp',
- 'url': video_info['conn'][0],
- 'player_url': player_url,
- }]
- elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
- encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
- if 'rtmpe%3Dyes' in encoded_url_map:
- raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
- formats = []
- formats_spec = {}
- fmt_list = video_info.get('fmt_list', [''])[0]
- if fmt_list:
- for fmt in fmt_list.split(','):
- spec = fmt.split('/')
- if len(spec) > 1:
- width_height = spec[1].split('x')
- if len(width_height) == 2:
- formats_spec[spec[0]] = {
- 'resolution': spec[1],
- 'width': int_or_none(width_height[0]),
- 'height': int_or_none(width_height[1]),
- }
- for fmt in streaming_formats:
- itag = str_or_none(fmt.get('itag'))
- if not itag:
- continue
- quality = fmt.get('quality')
- quality_label = fmt.get('qualityLabel') or quality
- formats_spec[itag] = {
- 'asr': int_or_none(fmt.get('audioSampleRate')),
- 'filesize': int_or_none(fmt.get('contentLength')),
- 'format_note': quality_label,
- 'fps': int_or_none(fmt.get('fps')),
- 'height': int_or_none(fmt.get('height')),
- # bitrate for itag 43 is always 2147483647
- 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
- 'width': int_or_none(fmt.get('width')),
- }
-
- for fmt in streaming_formats:
- if fmt.get('drm_families'):
- continue
- url = url_or_none(fmt.get('url'))
-
- if not url:
- cipher = fmt.get('cipher')
- if not cipher:
- continue
- url_data = compat_parse_qs(cipher)
- url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
- if not url:
- continue
- else:
- cipher = None
- url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
-
- stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
- # Unsupported FORMAT_STREAM_TYPE_OTF
- if stream_type == 3:
- continue
-
- format_id = fmt.get('itag') or url_data['itag'][0]
- if not format_id:
- continue
- format_id = compat_str(format_id)
-
- if cipher:
- if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
- ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
- jsplayer_url_json = self._search_regex(
- ASSETS_RE,
- embed_webpage if age_gate else video_webpage,
- 'JS player URL (1)', default=None)
- if not jsplayer_url_json and not age_gate:
- # We need the embed website after all
- if embed_webpage is None:
- embed_url = proto + '://www.youtube.com/embed/%s' % video_id
- embed_webpage = self._download_webpage(
- embed_url, video_id, 'Downloading embed webpage')
- jsplayer_url_json = self._search_regex(
- ASSETS_RE, embed_webpage, 'JS player URL')
-
- player_url = json.loads(jsplayer_url_json)
- if player_url is None:
- player_url_json = self._search_regex(
- r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
- video_webpage, 'age gate player URL')
- player_url = json.loads(player_url_json)
-
- if 'sig' in url_data:
- url += '&signature=' + url_data['sig'][0]
- elif 's' in url_data:
- encrypted_sig = url_data['s'][0]
-
- if self._downloader.params.get('verbose'):
- if player_url is None:
- player_version = 'unknown'
- player_desc = 'unknown'
- else:
- if player_url.endswith('swf'):
- player_version = self._search_regex(
- r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
- 'flash player', fatal=False)
- player_desc = 'flash player %s' % player_version
- else:
- player_version = self._search_regex(
- [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
- r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
- player_url,
- 'html5 player', fatal=False)
- player_desc = 'html5 player %s' % player_version
-
- parts_sizes = self._signature_cache_id(encrypted_sig)
- self.to_screen('{%s} signature length %s, %s' %
- (format_id, parts_sizes, player_desc))
-
- signature = self._decrypt_signature(
- encrypted_sig, video_id, player_url, age_gate)
- sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
- url += '&%s=%s' % (sp, signature)
- if 'ratebypass' not in url:
- url += '&ratebypass=yes'
-
- dct = {
- 'format_id': format_id,
- 'url': url,
- 'player_url': player_url,
- }
- if format_id in self._formats:
- dct.update(self._formats[format_id])
- if format_id in formats_spec:
- dct.update(formats_spec[format_id])
-
- # Some itags are not included in DASH manifest thus corresponding formats will
- # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
- # Trying to extract metadata from url_encoded_fmt_stream_map entry.
- mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
- width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
-
- if width is None:
- width = int_or_none(fmt.get('width'))
- if height is None:
- height = int_or_none(fmt.get('height'))
-
- filesize = int_or_none(url_data.get(
- 'clen', [None])[0]) or _extract_filesize(url)
-
- quality = url_data.get('quality', [None])[0] or fmt.get('quality')
- quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
-
- tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
- or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
- fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
-
- more_fields = {
- 'filesize': filesize,
- 'tbr': tbr,
- 'width': width,
- 'height': height,
- 'fps': fps,
- 'format_note': quality_label or quality,
- }
- for key, value in more_fields.items():
- if value:
- dct[key] = value
- type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
- if type_:
- type_split = type_.split(';')
- kind_ext = type_split[0].split('/')
- if len(kind_ext) == 2:
- kind, _ = kind_ext
- dct['ext'] = mimetype2ext(type_split[0])
- if kind in ('audio', 'video'):
- codecs = None
- for mobj in re.finditer(
- r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
- if mobj.group('key') == 'codecs':
- codecs = mobj.group('val')
- break
- if codecs:
- dct.update(parse_codecs(codecs))
- if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
- dct['downloader_options'] = {
- # Youtube throttles chunks >~10M
- 'http_chunk_size': 10485760,
- }
- formats.append(dct)
- else:
- manifest_url = (
- url_or_none(try_get(
- player_response,
- lambda x: x['streamingData']['hlsManifestUrl'],
- compat_str))
- or url_or_none(try_get(
- video_info, lambda x: x['hlsvp'][0], compat_str)))
- if manifest_url:
- formats = []
- m3u8_formats = self._extract_m3u8_formats(
- manifest_url, video_id, 'mp4', fatal=False)
- for a_format in m3u8_formats:
- itag = self._search_regex(
- r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
- if itag:
- a_format['format_id'] = itag
- if itag in self._formats:
- dct = self._formats[itag].copy()
- dct.update(a_format)
- a_format = dct
- a_format['player_url'] = player_url
- # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
- a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
- formats.append(a_format)
- else:
- error_message = extract_unavailable_message()
- alt_error_message = clean_html(video_info.get('reason', [None])[0])
- print(alt_error_message)
- if not error_message:
- error_message = alt_error_message
- if not error_message:
- error_message = clean_html(
- try_get(video_info, lambda x: x['reason'][0], compat_str))
- if error_message:
- raise YoutubeError(error_message)
- raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
-
- # uploader
- video_uploader = try_get(
- video_info, lambda x: x['author'][0],
- compat_str) or str_or_none(video_details.get('author'))
- if video_uploader:
- video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
- else:
- self._downloader.report_warning('unable to extract uploader name')
-
- # uploader_id
- video_uploader_id = None
- video_uploader_url = None
- mobj = re.search(
- r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
- video_webpage)
- if mobj is not None:
- video_uploader_id = mobj.group('uploader_id')
- video_uploader_url = mobj.group('uploader_url')
- else:
- self._downloader.report_warning('unable to extract uploader nickname')
-
- channel_id = (
- str_or_none(video_details.get('channelId'))
- or self._html_search_meta(
- 'channelId', video_webpage, 'channel id', default=None)
- or self._search_regex(
- r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
- video_webpage, 'channel id', default=None, group='id'))
- channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
-
- # thumbnail image
- # We try first to get a high quality image:
- m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
- video_webpage, re.DOTALL)
- if m_thumb is not None:
- video_thumbnail = m_thumb.group(1)
- elif 'thumbnail_url' not in video_info:
- self._downloader.report_warning('unable to extract video thumbnail')
- video_thumbnail = None
- else: # don't panic if we can't find it
- video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
-
- # upload date
- upload_date = self._html_search_meta(
- 'datePublished', video_webpage, 'upload date', default=None)
- if not upload_date:
- upload_date = self._search_regex(
- [r'(?s)id="eow-date.*?>(.*?)</span>',
- r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
- video_webpage, 'upload date', default=None)
- upload_date = unified_strdate(upload_date)
-
- video_license = self._html_search_regex(
- r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
- video_webpage, 'license', default=None)
-
- m_music = re.search(
- r'''(?x)
- <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
- <ul[^>]*>\s*
- <li>(?P<title>.+?)
- by (?P<creator>.+?)
- (?:
- \(.+?\)|
- <a[^>]*
- (?:
- \bhref=["\']/red[^>]*>| # drop possible
- >\s*Listen ad-free with YouTube Red # YouTube Red ad
- )
- .*?
- )?</li
- ''',
- video_webpage)
- if m_music:
- video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
- video_creator = clean_html(m_music.group('creator'))
- else:
- video_alt_title = video_creator = None
-
- def extract_meta(field):
- return self._html_search_regex(
- r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
- video_webpage, field, default=None)
-
- track = extract_meta('Song')
- artist = extract_meta('Artist')
- album = extract_meta('Album')
-
- # Youtube Music Auto-generated description
- release_date = release_year = None
- if video_description:
- mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
- if mobj:
- if not track:
- track = mobj.group('track').strip()
- if not artist:
- artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
- if not album:
- album = mobj.group('album'.strip())
- release_year = mobj.group('release_year')
- release_date = mobj.group('release_date')
- if release_date:
- release_date = release_date.replace('-', '')
- if not release_year:
- release_year = int(release_date[:4])
- if release_year:
- release_year = int(release_year)
-
- m_episode = re.search(
- r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
- video_webpage)
- if m_episode:
- series = unescapeHTML(m_episode.group('series'))
- season_number = int(m_episode.group('season'))
- episode_number = int(m_episode.group('episode'))
- else:
- series = season_number = episode_number = None
-
- m_cat_container = self._search_regex(
- r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
- video_webpage, 'categories', default=None)
- if m_cat_container:
- category = self._html_search_regex(
- r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
- default=None)
- video_categories = None if category is None else [category]
- else:
- video_categories = None
-
- video_tags = [
- unescapeHTML(m.group('content'))
- for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
-
- def _extract_count(count_name):
- return str_to_int(self._search_regex(
- r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
- % re.escape(count_name),
- video_webpage, count_name, default=None))
-
- like_count = _extract_count('like')
- dislike_count = _extract_count('dislike')
-
- if view_count is None:
- view_count = str_to_int(self._search_regex(
- r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
- 'view count', default=None))
-
- average_rating = (
- float_or_none(video_details.get('averageRating'))
- or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
-
- # subtitles
- video_subtitles = self._get_subtitles(video_id, video_webpage)
- automatic_captions = self._get_automatic_captions(video_id, video_webpage)
-
- video_duration = try_get(
- video_info, lambda x: int_or_none(x['length_seconds'][0]))
- if not video_duration:
- video_duration = int_or_none(video_details.get('lengthSeconds'))
- if not video_duration:
- video_duration = parse_duration(self._html_search_meta(
- 'duration', video_webpage, 'video duration'))
-
- # annotations
- video_annotations = None
- if self._downloader.params.get('writeannotations', False):
- xsrf_token = self._search_regex(
- r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
- video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
- invideo_url = try_get(
- player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
- if xsrf_token and invideo_url:
- xsrf_field_name = self._search_regex(
- r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
- video_webpage, 'xsrf field name',
- group='xsrf_field_name', default='session_token')
- video_annotations = self._download_webpage(
- self._proto_relative_url(invideo_url),
- video_id, note='Downloading annotations',
- errnote='Unable to download video annotations', fatal=False,
- data=urlencode_postdata({xsrf_field_name: xsrf_token}))
-
- chapters = self._extract_chapters(description_original, video_duration)
-
- # Look for the DASH manifest
- if self._downloader.params.get('youtube_include_dash_manifest', True):
- dash_mpd_fatal = True
- for mpd_url in dash_mpds:
- dash_formats = {}
- try:
- def decrypt_sig(mobj):
- s = mobj.group(1)
- dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
- return '/signature/%s' % dec_s
-
- mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
-
- for df in self._extract_mpd_formats(
- mpd_url, video_id, fatal=dash_mpd_fatal,
- formats_dict=self._formats):
- if not df.get('filesize'):
- df['filesize'] = _extract_filesize(df['url'])
- # Do not overwrite DASH format found in some previous DASH manifest
- if df['format_id'] not in dash_formats:
- dash_formats[df['format_id']] = df
- # Additional DASH manifests may end up in HTTP Error 403 therefore
- # allow them to fail without bug report message if we already have
- # some DASH manifest succeeded. This is temporary workaround to reduce
- # burst of bug reports until we figure out the reason and whether it
- # can be fixed at all.
- dash_mpd_fatal = False
- except (ExtractorError, KeyError) as e:
- self.report_warning(
- 'Skipping DASH manifest: %r' % e, video_id)
- if dash_formats:
- # Remove the formats we found through non-DASH, they
- # contain less info and it can be wrong, because we use
- # fixed values (for example the resolution). See
- # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
- # example.
- formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
- formats.extend(dash_formats.values())
-
- # Check for malformed aspect ratio
- stretched_m = re.search(
- r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
- video_webpage)
- if stretched_m:
- w = float(stretched_m.group('w'))
- h = float(stretched_m.group('h'))
- # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
- # We will only process correct ratios.
- if w > 0 and h > 0:
- ratio = w / h
- for f in formats:
- if f.get('vcodec') != 'none':
- f['stretched_ratio'] = ratio
-
- if not formats:
- token = extract_token(video_info)
- if not token:
- if 'reason' in video_info:
- if 'The uploader has not made this video available in your country.' in video_info['reason']:
- regions_allowed = self._html_search_meta(
- 'regionsAllowed', video_webpage, default=None)
- countries = regions_allowed.split(',') if regions_allowed else None
- self.raise_geo_restricted(
- msg=video_info['reason'][0], countries=countries)
- reason = video_info['reason'][0]
- if 'Invalid parameters' in reason:
- unavailable_message = extract_unavailable_message()
- if unavailable_message:
- reason = unavailable_message
- raise YoutubeError(
- 'YouTube said: %s' % reason,
- expected=True, video_id=video_id)
- else:
- raise ExtractorError(
- '"token" parameter not in video info for unknown reason',
- video_id=video_id)
-
- if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
- raise ExtractorError('This video is DRM protected.', expected=True)
-
- self._sort_formats(formats)
-
- self.mark_watched(video_id, video_info, player_response)
-
- return {
- 'id': video_id,
- 'uploader': video_uploader,
- 'uploader_id': video_uploader_id,
- 'uploader_url': video_uploader_url,
- 'channel_id': channel_id,
- 'channel_url': channel_url,
- 'upload_date': upload_date,
- 'license': video_license,
- 'creator': video_creator or artist,
- 'title': video_title,
- 'alt_title': video_alt_title or track,
- 'thumbnail': video_thumbnail,
- 'description': video_description,
- 'categories': video_categories,
- 'tags': video_tags,
- 'subtitles': video_subtitles,
- 'automatic_captions': automatic_captions,
- 'duration': video_duration,
- 'age_limit': 18 if age_gate else 0,
- 'annotations': video_annotations,
- 'chapters': chapters,
- 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
- 'view_count': view_count,
- 'like_count': like_count,
- 'dislike_count': dislike_count,
- 'average_rating': average_rating,
- 'formats': formats,
- 'is_live': is_live,
- 'start_time': start_time,
- 'end_time': end_time,
- 'series': series,
- 'season_number': season_number,
- 'episode_number': episode_number,
- 'track': track,
- 'artist': artist,
- 'album': album,
- 'release_date': release_date,
- 'release_year': release_year,
- 'related_vids': related_vids,
- 'music_list': music_list,
- 'unlisted': unlisted,
- }
-
-
-class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
- IE_DESC = 'YouTube.com playlists'
- _VALID_URL = r"""(?x)(?:
- (?:https?://)?
- (?:\w+\.)?
- (?:
- (?:
- youtube\.com|
- invidio\.us
- )
- /
- (?:
- (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
- \? (?:.*?[&;])*? (?:p|a|list)=
- | p/
- )|
- youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
- )
- (
- (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
- # Top tracks, they can also include dots
- |(?:MC)[\w\.]*
- )
- .*
- |
- (%(playlist_id)s)
- )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
- _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
- _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
- _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
- IE_NAME = 'youtube:playlist'
- _TESTS = [{
- 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
- 'info_dict': {
- 'title': 'ytdl test PL',
- 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
- },
- 'playlist_count': 3,
- }, {
- 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
- 'info_dict': {
- 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
- 'title': 'YDL_Empty_List',
- },
- 'playlist_count': 0,
- 'skip': 'This playlist is private',
- }, {
- 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
- 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
- 'info_dict': {
- 'title': '29C3: Not my department',
- 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
- 'uploader': 'Christiaan008',
- 'uploader_id': 'ChRiStIaAn008',
- },
- 'playlist_count': 95,
- }, {
- 'note': 'issue #673',
- 'url': 'PLBB231211A4F62143',
- 'info_dict': {
- 'title': '[OLD]Team Fortress 2 (Class-based LP)',
- 'id': 'PLBB231211A4F62143',
- 'uploader': 'Wickydoo',
- 'uploader_id': 'Wickydoo',
- },
- 'playlist_mincount': 26,
- }, {
- 'note': 'Large playlist',
- 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
- 'info_dict': {
- 'title': 'Uploads from Cauchemar',
- 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
- 'uploader': 'Cauchemar',
- 'uploader_id': 'Cauchemar89',
- },
- 'playlist_mincount': 799,
- }, {
- 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
- 'info_dict': {
- 'title': 'YDL_safe_search',
- 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
- },
- 'playlist_count': 2,
- 'skip': 'This playlist is private',
- }, {
- 'note': 'embedded',
- 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
- 'playlist_count': 4,
- 'info_dict': {
- 'title': 'JODA15',
- 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
- 'uploader': 'milan',
- 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
- }
- }, {
- 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
- 'playlist_mincount': 485,
- 'info_dict': {
- 'title': '2018 Chinese New Singles (11/6 updated)',
- 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
- 'uploader': 'LBK',
- 'uploader_id': 'sdragonfang',
- }
- }, {
- 'note': 'Embedded SWF player',
- 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
- 'playlist_count': 4,
- 'info_dict': {
- 'title': 'JODA7',
- 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
- },
- 'skip': 'This playlist does not exist',
- }, {
- 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
- 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
- 'info_dict': {
- 'title': 'Uploads from Interstellar Movie',
- 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
- 'uploader': 'Interstellar Movie',
- 'uploader_id': 'InterstellarMovie1',
- },
- 'playlist_mincount': 21,
- }, {
- # Playlist URL that does not actually serve a playlist
- 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
- 'info_dict': {
- 'id': 'FqZTN594JQw',
- 'ext': 'webm',
- 'title': "Smiley's People 01 detective, Adventure Series, Action",
- 'uploader': 'STREEM',
- 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
- 'upload_date': '20150526',
- 'license': 'Standard YouTube License',
- 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
- 'categories': ['People & Blogs'],
- 'tags': list,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This video is not available.',
- 'add_ie': [YoutubeIE.ie_key()],
- }, {
- 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
- 'info_dict': {
- 'id': 'yeWKywCrFtk',
- 'ext': 'mp4',
- 'title': 'Small Scale Baler and Braiding Rugs',
- 'uploader': 'Backus-Page House Museum',
- 'uploader_id': 'backuspagemuseum',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
- 'upload_date': '20161008',
- 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
- 'categories': ['Nonprofits & Activism'],
- 'tags': list,
- 'like_count': int,
- 'dislike_count': int,
- },
- 'params': {
- 'noplaylist': True,
- 'skip_download': True,
- },
- }, {
- # https://github.com/ytdl-org/youtube-dl/issues/21844
- 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
- 'info_dict': {
- 'title': 'Data Analysis with Dr Mike Pound',
- 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
- 'uploader_id': 'Computerphile',
- 'uploader': 'Computerphile',
- },
- 'playlist_mincount': 11,
- }, {
- 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
- 'only_matching': True,
- }, {
- 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
- 'only_matching': True,
- }, {
- # music album playlist
- 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
- 'only_matching': True,
- }, {
- 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
- 'only_matching': True,
- }]
-
- def _real_initialize(self):
- self._login()
-
- def extract_videos_from_page(self, page):
- ids_in_page = []
- titles_in_page = []
-
- for item in re.findall(
- r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
- attrs = extract_attributes(item)
- video_id = attrs['data-video-id']
- video_title = unescapeHTML(attrs.get('data-title'))
- if video_title:
- video_title = video_title.strip()
- ids_in_page.append(video_id)
- titles_in_page.append(video_title)
-
- # Fallback with old _VIDEO_RE
- self.extract_videos_from_page_impl(
- self._VIDEO_RE, page, ids_in_page, titles_in_page)
-
- # Relaxed fallbacks
- self.extract_videos_from_page_impl(
- r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
- ids_in_page, titles_in_page)
- self.extract_videos_from_page_impl(
- r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
- ids_in_page, titles_in_page)
-
- return zip(ids_in_page, titles_in_page)
-
- def _extract_mix(self, playlist_id):
- # The mixes are generated from a single video
- # the id of the playlist is just 'RD' + video_id
- ids = []
- last_id = playlist_id[-11:]
- for n in itertools.count(1):
- url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
- webpage = self._download_webpage(
- url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
- new_ids = orderedSet(re.findall(
- r'''(?xs)data-video-username=".*?".*?
- href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
- webpage))
- # Fetch new pages until all the videos are repeated, it seems that
- # there are always 51 unique videos.
- new_ids = [_id for _id in new_ids if _id not in ids]
- if not new_ids:
- break
- ids.extend(new_ids)
- last_id = ids[-1]
-
- url_results = self._ids_to_results(ids)
-
- search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
- title_span = (
- search_title('playlist-title')
- or search_title('title long-title')
- or search_title('title'))
- title = clean_html(title_span)
-
- return self.playlist_result(url_results, playlist_id, title)
-
- def _extract_playlist(self, playlist_id):
- url = self._TEMPLATE_URL % playlist_id
- page = self._download_webpage(url, playlist_id)
-
- # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
- for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
- match = match.strip()
- # Check if the playlist exists or is private
- mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
- if mobj:
- reason = mobj.group('reason')
- message = 'This playlist %s' % reason
- if 'private' in reason:
- message += ', use --username or --netrc to access it'
- message += '.'
- raise ExtractorError(message, expected=True)
- elif re.match(r'[^<]*Invalid parameters[^<]*', match):
- raise ExtractorError(
- 'Invalid parameters. Maybe URL is incorrect.',
- expected=True)
- elif re.match(r'[^<]*Choose your language[^<]*', match):
- continue
- else:
- self.report_warning('Youtube gives an alert message: ' + match)
-
- playlist_title = self._html_search_regex(
- r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
- page, 'title', default=None)
-
- _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
- uploader = self._search_regex(
- r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
- page, 'uploader', default=None)
- mobj = re.search(
- r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
- page)
- if mobj:
- uploader_id = mobj.group('uploader_id')
- uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
- else:
- uploader_id = uploader_url = None
-
- has_videos = True
-
- if not playlist_title:
- try:
- # Some playlist URLs don't actually serve a playlist (e.g.
- # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
- next(self._entries(page, playlist_id))
- except StopIteration:
- has_videos = False
-
- playlist = self.playlist_result(
- self._entries(page, playlist_id), playlist_id, playlist_title)
- playlist.update({
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'uploader_url': uploader_url,
- })
-
- return has_videos, playlist
-
- def _check_download_just_video(self, url, playlist_id):
- # Check if it's a video-specific URL
- query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- video_id = query_dict.get('v', [None])[0] or self._search_regex(
- r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
- 'video id', default=None)
- if video_id:
- if self._downloader.params.get('noplaylist'):
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
- return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
- else:
- self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
- return video_id, None
- return None, None
-
- def _real_extract(self, url):
- # Extract playlist id
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
- playlist_id = mobj.group(1) or mobj.group(2)
-
- video_id, video = self._check_download_just_video(url, playlist_id)
- if video:
- return video
-
- if playlist_id.startswith(('RD', 'UL', 'PU')):
- # Mixes require a custom extraction process
- return self._extract_mix(playlist_id)
-
- has_videos, playlist = self._extract_playlist(playlist_id)
- if has_videos or not video_id:
- return playlist
-
- # Some playlist URLs don't actually serve a playlist (see
- # https://github.com/ytdl-org/youtube-dl/issues/10537).
- # Fallback to plain video extraction if there is a video id
- # along with playlist id.
- return self.url_result(video_id, 'Youtube', video_id=video_id)
-
-
-class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
- IE_DESC = 'YouTube.com channels'
- _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
- _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
- _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
- IE_NAME = 'youtube:channel'
- _TESTS = [{
- 'note': 'paginated channel',
- 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
- 'playlist_mincount': 91,
- 'info_dict': {
- 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
- 'title': 'Uploads from lex will',
- 'uploader': 'lex will',
- 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
- }
- }, {
- 'note': 'Age restricted channel',
- # from https://www.youtube.com/user/DeusExOfficial
- 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
- 'playlist_mincount': 64,
- 'info_dict': {
- 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
- 'title': 'Uploads from Deus Ex',
- 'uploader': 'Deus Ex',
- 'uploader_id': 'DeusExOfficial',
- },
- }, {
- 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
- else super(YoutubeChannelIE, cls).suitable(url))
-
- def _build_template_url(self, url, channel_id):
- return self._TEMPLATE_URL % channel_id
-
- def _real_extract(self, url):
- channel_id = self._match_id(url)
-
- url = self._build_template_url(url, channel_id)
-
- # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
- # Workaround by extracting as a playlist if managed to obtain channel playlist URL
- # otherwise fallback on channel by page extraction
- channel_page = self._download_webpage(
- url + '?view=57', channel_id,
- 'Downloading channel page', fatal=False)
- if channel_page is False:
- channel_playlist_id = False
- else:
- channel_playlist_id = self._html_search_meta(
- 'channelId', channel_page, 'channel id', default=None)
- if not channel_playlist_id:
- channel_url = self._html_search_meta(
- ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
- channel_page, 'channel url', default=None)
- if channel_url:
- channel_playlist_id = self._search_regex(
- r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
- channel_url, 'channel id', default=None)
- if channel_playlist_id and channel_playlist_id.startswith('UC'):
- playlist_id = 'UU' + channel_playlist_id[2:]
- return self.url_result(
- compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
-
- channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
- autogenerated = re.search(r'''(?x)
- class="[^"]*?(?:
- channel-header-autogenerated-label|
- yt-channel-title-autogenerated
- )[^"]*"''', channel_page) is not None
-
- if autogenerated:
- # The videos are contained in a single page
- # the ajax pages can't be used, they are empty
- entries = [
- self.url_result(
- video_id, 'Youtube', video_id=video_id,
- video_title=video_title)
- for video_id, video_title in self.extract_videos_from_page(channel_page)]
- return self.playlist_result(entries, channel_id)
-
- try:
- next(self._entries(channel_page, channel_id))
- except StopIteration:
- alert_message = self._html_search_regex(
- r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
- channel_page, 'alert', default=None, group='alert')
- if alert_message:
- raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
-
- return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
-
-
-class YoutubeUserIE(YoutubeChannelIE):
- IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
- _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
- _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
- IE_NAME = 'youtube:user'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
- 'playlist_mincount': 320,
- 'info_dict': {
- 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
- 'title': 'Uploads from The Linux Foundation',
- 'uploader': 'The Linux Foundation',
- 'uploader_id': 'TheLinuxFoundation',
- }
- }, {
- # Only available via https://www.youtube.com/c/12minuteathlete/videos
- # but not https://www.youtube.com/user/12minuteathlete/videos
- 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
- 'playlist_mincount': 249,
- 'info_dict': {
- 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
- 'title': 'Uploads from 12 Minute Athlete',
- 'uploader': '12 Minute Athlete',
- 'uploader_id': 'the12minuteathlete',
- }
- }, {
- 'url': 'ytuser:phihag',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/c/gametrailers',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/gametrailers',
- 'only_matching': True,
- }, {
- # This channel is not available, geo restricted to JP
- 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- # Don't return True if the url can be extracted with other youtube
- # extractor, the regex would is too permissive and it would match.
- other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
- if any(ie.suitable(url) for ie in other_yt_ies):
- return False
- else:
- return super(YoutubeUserIE, cls).suitable(url)
-
- def _build_template_url(self, url, channel_id):
- mobj = re.match(self._VALID_URL, url)
- return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
-
-
-class YoutubeLiveIE(YoutubeBaseInfoExtractor):
- IE_DESC = 'YouTube.com live streams'
- _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
- IE_NAME = 'youtube:live'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
- 'info_dict': {
- 'id': 'a48o2S1cPoo',
- 'ext': 'mp4',
- 'title': 'The Young Turks - Live Main Show',
- 'uploader': 'The Young Turks',
- 'uploader_id': 'TheYoungTurks',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
- 'upload_date': '20150715',
- 'license': 'Standard YouTube License',
- 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
- 'categories': ['News & Politics'],
- 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
- 'like_count': int,
- 'dislike_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/TheYoungTurks/live',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- channel_id = mobj.group('id')
- base_url = mobj.group('base_url')
- webpage = self._download_webpage(url, channel_id, fatal=False)
- if webpage:
- page_type = self._og_search_property(
- 'type', webpage, 'page type', default='')
- video_id = self._html_search_meta(
- 'videoId', webpage, 'video id', default=None)
- if page_type.startswith('video') and video_id and re.match(
- r'^[0-9A-Za-z_-]{11}$', video_id):
- return self.url_result(video_id, YoutubeIE.ie_key())
- return self.url_result(base_url)
-
-
-class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
- IE_DESC = 'YouTube.com user/channel playlists'
- _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
- IE_NAME = 'youtube:playlists'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
- 'playlist_mincount': 4,
- 'info_dict': {
- 'id': 'ThirstForScience',
- 'title': 'ThirstForScience',
- },
- }, {
- # with "Load more" button
- 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
- 'playlist_mincount': 70,
- 'info_dict': {
- 'id': 'igorkle1',
- 'title': 'Игорь Клейнер',
- },
- }, {
- 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
- 'playlist_mincount': 17,
- 'info_dict': {
- 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
- 'title': 'Chem Player',
- },
- 'skip': 'Blocked',
- }]
-
-
-class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
- _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
-
-
-class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
- IE_DESC = 'YouTube.com searches'
- # there doesn't appear to be a real limit, for example if you search for
- # 'python' you get more than 8.000.000 results
- _MAX_RESULTS = float('inf')
- IE_NAME = 'youtube:search'
- _SEARCH_KEY = 'ytsearch'
- _EXTRA_QUERY_ARGS = {}
- _TESTS = []
-
- def _get_n_results(self, query, n):
- """Get a specified number of results for a query"""
-
- videos = []
- limit = n
-
- url_query = {
- 'search_query': query.encode('utf-8'),
- }
- url_query.update(self._EXTRA_QUERY_ARGS)
- result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
-
- for pagenum in itertools.count(1):
- data = self._download_json(
- result_url, video_id='query "%s"' % query,
- note='Downloading page %s' % pagenum,
- errnote='Unable to download API page',
- query={'spf': 'navigate'})
- html_content = data[1]['body']['content']
-
- if 'class="search-message' in html_content:
- raise ExtractorError(
- '[youtube] No video results', expected=True)
-
- new_videos = list(self._process_page(html_content))
- videos += new_videos
- if not new_videos or len(videos) > limit:
- break
- next_link = self._html_search_regex(
- r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
- html_content, 'next link', default=None)
- if next_link is None:
- break
- result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
-
- if len(videos) > n:
- videos = videos[:n]
- return self.playlist_result(videos, query)
-
-
-class YoutubeSearchDateIE(YoutubeSearchIE):
- IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
- _SEARCH_KEY = 'ytsearchdate'
- IE_DESC = 'YouTube.com searches, newest videos first'
- _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
-
-
-class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
- IE_DESC = 'YouTube.com search URLs'
- IE_NAME = 'youtube:search_url'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
- _TESTS = [{
- 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
- 'playlist_mincount': 5,
- 'info_dict': {
- 'title': 'youtube-dl test video',
- }
- }, {
- 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- query = compat_urllib_parse_unquote_plus(mobj.group('query'))
- webpage = self._download_webpage(url, query)
- return self.playlist_result(self._process_page(webpage), playlist_title=query)
-
-
-class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
- IE_DESC = 'YouTube.com (multi-season) shows'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
- IE_NAME = 'youtube:show'
- _TESTS = [{
- 'url': 'https://www.youtube.com/show/airdisasters',
- 'playlist_mincount': 5,
- 'info_dict': {
- 'id': 'airdisasters',
- 'title': 'Air Disasters',
- }
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- return super(YoutubeShowIE, self)._real_extract(
- 'https://www.youtube.com/show/%s/playlists' % playlist_id)
-
-
-class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
- """
- Base class for feed extractors
- Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
- """
- _LOGIN_REQUIRED = True
-
- @property
- def IE_NAME(self):
- return 'youtube:%s' % self._FEED_NAME
-
- def _real_initialize(self):
- self._login()
-
- def _entries(self, page):
- # The extraction process is the same as for playlists, but the regex
- # for the video ids doesn't contain an index
- ids = []
- more_widget_html = content_html = page
- for page_num in itertools.count(1):
- matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
-
- # 'recommended' feed has infinite 'load more' and each new portion spins
- # the same videos in (sometimes) slightly different order, so we'll check
- # for unicity and break when portion has no new videos
- new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
- if not new_ids:
- break
-
- ids.extend(new_ids)
-
- for entry in self._ids_to_results(new_ids):
- yield entry
-
- mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
- if not mobj:
- break
-
- more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
- 'Downloading page #%s' % page_num,
- transform_source=uppercase_escape)
- content_html = more['content_html']
- more_widget_html = more['load_more_widget_html']
-
- def _real_extract(self, url):
- page = self._download_webpage(
- 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
- self._PLAYLIST_TITLE)
- return self.playlist_result(
- self._entries(page), playlist_title=self._PLAYLIST_TITLE)
-
-
-class YoutubeWatchLaterIE(YoutubePlaylistIE):
- IE_NAME = 'youtube:watchlater'
- IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/playlist?list=WL',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- _, video = self._check_download_just_video(url, 'WL')
- if video:
- return video
- _, playlist = self._extract_playlist('WL')
- return playlist
-
-
-class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
- IE_NAME = 'youtube:favorites'
- IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
- _LOGIN_REQUIRED = True
-
- def _real_extract(self, url):
- webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
- playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
- return self.url_result(playlist_id, 'YoutubePlaylist')
-
-
-class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
- _FEED_NAME = 'recommended'
- _PLAYLIST_TITLE = 'Youtube Recommended videos'
-
-
-class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
- _FEED_NAME = 'subscriptions'
- _PLAYLIST_TITLE = 'Youtube Subscriptions'
-
-
-class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
- _FEED_NAME = 'history'
- _PLAYLIST_TITLE = 'Youtube History'
-
-
-class YoutubeTruncatedURLIE(InfoExtractor):
- IE_NAME = 'youtube:truncated_url'
- IE_DESC = False # Do not list
- _VALID_URL = r'''(?x)
- (?:https?://)?
- (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
- (?:watch\?(?:
- feature=[a-z_]+|
- annotation_id=annotation_[^&]+|
- x-yt-cl=[0-9]+|
- hl=[^&]*|
- t=[0-9]+
- )?
- |
- attribution_link\?a=[^&]+
- )
- $
- '''
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?feature=foo',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?hl=en-GB',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?t=2372',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- raise ExtractorError(
- 'Did you forget to quote the URL? Remember that & is a meta '
- 'character in most shells, so you want to put the URL in quotes, '
- 'like youtube-dl '
- '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
- ' or simply youtube-dl BaW_jenozKc .',
- expected=True)
-
-
-class YoutubeTruncatedIDIE(InfoExtractor):
- IE_NAME = 'youtube:truncated_id'
- IE_DESC = False # Do not list
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- raise ExtractorError(
- 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
- expected=True)
diff --git a/youtube_dl/extractor/youtube_unmodified_reference.py b/youtube_dl/extractor/youtube_unmodified_reference.py
deleted file mode 100644
index f002d87..0000000
--- a/youtube_dl/extractor/youtube_unmodified_reference.py
+++ /dev/null
@@ -1,3325 +0,0 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-
-import itertools
-import json
-import os.path
-import random
-import re
-import time
-import traceback
-
-from .common import InfoExtractor, SearchInfoExtractor
-from ..jsinterp import JSInterpreter
-from ..swfinterp import SWFInterpreter
-from ..compat import (
- compat_chr,
- compat_HTTPError,
- compat_kwargs,
- compat_parse_qs,
- compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
- compat_urlparse,
- compat_str,
-)
-from ..utils import (
- bool_or_none,
- clean_html,
- dict_get,
- error_to_compat_str,
- extract_attributes,
- ExtractorError,
- float_or_none,
- get_element_by_attribute,
- get_element_by_id,
- int_or_none,
- mimetype2ext,
- orderedSet,
- parse_codecs,
- parse_duration,
- remove_quotes,
- remove_start,
- smuggle_url,
- str_or_none,
- str_to_int,
- try_get,
- unescapeHTML,
- unified_strdate,
- unsmuggle_url,
- uppercase_escape,
- url_or_none,
- urlencode_postdata,
-)
-
-
-class YoutubeBaseInfoExtractor(InfoExtractor):
- """Provide base functions for Youtube extractors"""
- _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
- _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
-
- _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
- _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
- _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
-
- _NETRC_MACHINE = 'youtube'
- # If True it will raise an error if no login info is provided
- _LOGIN_REQUIRED = False
-
- _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
-
- def _set_language(self):
- self._set_cookie(
- '.youtube.com', 'PREF', 'f1=50000000&hl=en',
- # YouTube sets the expire time to about two months
- expire_time=time.time() + 2 * 30 * 24 * 3600)
-
- def _ids_to_results(self, ids):
- return [
- self.url_result(vid_id, 'Youtube', video_id=vid_id)
- for vid_id in ids]
-
- def _login(self):
- """
- Attempt to log in to YouTube.
- True is returned if successful or skipped.
- False is returned if login failed.
-
- If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
- """
- username, password = self._get_login_info()
- # No authentication to be performed
- if username is None:
- if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
- raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
- return True
-
- login_page = self._download_webpage(
- self._LOGIN_URL, None,
- note='Downloading login page',
- errnote='unable to fetch login page', fatal=False)
- if login_page is False:
- return
-
- login_form = self._hidden_inputs(login_page)
-
- def req(url, f_req, note, errnote):
- data = login_form.copy()
- data.update({
- 'pstMsg': 1,
- 'checkConnection': 'youtube',
- 'checkedDomains': 'youtube',
- 'hl': 'en',
- 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
- 'f.req': json.dumps(f_req),
- 'flowName': 'GlifWebSignIn',
- 'flowEntry': 'ServiceLogin',
- # TODO: reverse actual botguard identifier generation algo
- 'bgRequest': '["identifier",""]',
- })
- return self._download_json(
- url, None, note=note, errnote=errnote,
- transform_source=lambda s: re.sub(r'^[^[]*', '', s),
- fatal=False,
- data=urlencode_postdata(data), headers={
- 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
- 'Google-Accounts-XSRF': 1,
- })
-
- def warn(message):
- self._downloader.report_warning(message)
-
- lookup_req = [
- username,
- None, [], None, 'US', None, None, 2, False, True,
- [
- None, None,
- [2, 1, None, 1,
- 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
- None, [], 4],
- 1, [None, None, []], None, None, None, True
- ],
- username,
- ]
-
- lookup_results = req(
- self._LOOKUP_URL, lookup_req,
- 'Looking up account info', 'Unable to look up account info')
-
- if lookup_results is False:
- return False
-
- user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
- if not user_hash:
- warn('Unable to extract user hash')
- return False
-
- challenge_req = [
- user_hash,
- None, 1, None, [1, None, None, None, [password, None, True]],
- [
- None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
- 1, [None, None, []], None, None, None, True
- ]]
-
- challenge_results = req(
- self._CHALLENGE_URL, challenge_req,
- 'Logging in', 'Unable to log in')
-
- if challenge_results is False:
- return
-
- login_res = try_get(challenge_results, lambda x: x[0][5], list)
- if login_res:
- login_msg = try_get(login_res, lambda x: x[5], compat_str)
- warn(
- 'Unable to login: %s' % 'Invalid password'
- if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
- return False
-
- res = try_get(challenge_results, lambda x: x[0][-1], list)
- if not res:
- warn('Unable to extract result entry')
- return False
-
- login_challenge = try_get(res, lambda x: x[0][0], list)
- if login_challenge:
- challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
- if challenge_str == 'TWO_STEP_VERIFICATION':
- # SEND_SUCCESS - TFA code has been successfully sent to phone
- # QUOTA_EXCEEDED - reached the limit of TFA codes
- status = try_get(login_challenge, lambda x: x[5], compat_str)
- if status == 'QUOTA_EXCEEDED':
- warn('Exceeded the limit of TFA codes, try later')
- return False
-
- tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
- if not tl:
- warn('Unable to extract TL')
- return False
-
- tfa_code = self._get_tfa_info('2-step verification code')
-
- if not tfa_code:
- warn(
- 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
- '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
- return False
-
- tfa_code = remove_start(tfa_code, 'G-')
-
- tfa_req = [
- user_hash, None, 2, None,
- [
- 9, None, None, None, None, None, None, None,
- [None, tfa_code, True, 2]
- ]]
-
- tfa_results = req(
- self._TFA_URL.format(tl), tfa_req,
- 'Submitting TFA code', 'Unable to submit TFA code')
-
- if tfa_results is False:
- return False
-
- tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
- if tfa_res:
- tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
- warn(
- 'Unable to finish TFA: %s' % 'Invalid TFA code'
- if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
- return False
-
- check_cookie_url = try_get(
- tfa_results, lambda x: x[0][-1][2], compat_str)
- else:
- CHALLENGES = {
- 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
- 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
- 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
- }
- challenge = CHALLENGES.get(
- challenge_str,
- '%s returned error %s.' % (self.IE_NAME, challenge_str))
- warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
- return False
- else:
- check_cookie_url = try_get(res, lambda x: x[2], compat_str)
-
- if not check_cookie_url:
- warn('Unable to extract CheckCookie URL')
- return False
-
- check_cookie_results = self._download_webpage(
- check_cookie_url, None, 'Checking cookie', fatal=False)
-
- if check_cookie_results is False:
- return False
-
- if 'https://myaccount.google.com/' not in check_cookie_results:
- warn('Unable to log in')
- return False
-
- return True
-
- def _download_webpage_handle(self, *args, **kwargs):
- query = kwargs.get('query', {}).copy()
- query['disable_polymer'] = 'true'
- kwargs['query'] = query
- return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
- *args, **compat_kwargs(kwargs))
-
- def _real_initialize(self):
- if self._downloader is None:
- return
- self._set_language()
- if not self._login():
- return
-
-
-class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
- # Extract entries from page with "Load more" button
- def _entries(self, page, playlist_id):
- more_widget_html = content_html = page
- for page_num in itertools.count(1):
- for entry in self._process_page(content_html):
- yield entry
-
- mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
- if not mobj:
- break
-
- count = 0
- retries = 3
- while count <= retries:
- try:
- # Downloading page may result in intermittent 5xx HTTP error
- # that is usually worked around with a retry
- more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
- 'Downloading page #%s%s'
- % (page_num, ' (retry #%d)' % count if count else ''),
- transform_source=uppercase_escape)
- break
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
- count += 1
- if count <= retries:
- continue
- raise
-
- content_html = more['content_html']
- if not content_html.strip():
- # Some webpages show a "Load more" button but they don't
- # have more videos
- break
- more_widget_html = more['load_more_widget_html']
-
-
-class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
- def _process_page(self, content):
- for video_id, video_title in self.extract_videos_from_page(content):
- yield self.url_result(video_id, 'Youtube', video_id, video_title)
-
- def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
- for mobj in re.finditer(video_re, page):
- # The link with index 0 is not the first video of the playlist (not sure if still actual)
- if 'index' in mobj.groupdict() and mobj.group('id') == '0':
- continue
- video_id = mobj.group('id')
- video_title = unescapeHTML(
- mobj.group('title')) if 'title' in mobj.groupdict() else None
- if video_title:
- video_title = video_title.strip()
- if video_title == '► Play all':
- video_title = None
- try:
- idx = ids_in_page.index(video_id)
- if video_title and not titles_in_page[idx]:
- titles_in_page[idx] = video_title
- except ValueError:
- ids_in_page.append(video_id)
- titles_in_page.append(video_title)
-
- def extract_videos_from_page(self, page):
- ids_in_page = []
- titles_in_page = []
- self.extract_videos_from_page_impl(
- self._VIDEO_RE, page, ids_in_page, titles_in_page)
- return zip(ids_in_page, titles_in_page)
-
-
-class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
- def _process_page(self, content):
- for playlist_id in orderedSet(re.findall(
- r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
- content)):
- yield self.url_result(
- 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
- title = self._og_search_title(webpage, fatal=False)
- return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
-
-
-class YoutubeIE(YoutubeBaseInfoExtractor):
- IE_DESC = 'YouTube.com'
- _VALID_URL = r"""(?x)^
- (
- (?:https?://|//) # http(s):// or protocol-independent URL
- (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
- (?:www\.)?deturl\.com/www\.youtube\.com/|
- (?:www\.)?pwnyoutube\.com/|
- (?:www\.)?hooktube\.com/|
- (?:www\.)?yourepeat\.com/|
- tube\.majestyc\.net/|
- # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
- (?:(?:www|dev)\.)?invidio\.us/|
- (?:(?:www|no)\.)?invidiou\.sh/|
- (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
- (?:www\.)?invidious\.kabi\.tk/|
- (?:www\.)?invidious\.enkirton\.net/|
- (?:www\.)?invidious\.13ad\.de/|
- (?:www\.)?invidious\.mastodon\.host/|
- (?:www\.)?invidious\.nixnet\.xyz/|
- (?:www\.)?tube\.poal\.co/|
- (?:www\.)?vid\.wxzm\.sx/|
- (?:www\.)?yt\.elukerio\.org/|
- (?:www\.)?kgg2m7yk5aybusll\.onion/|
- (?:www\.)?qklhadlycap4cnod\.onion/|
- (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
- (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
- (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
- (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
- youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
- (?:.*?\#/)? # handle anchor (#/) redirect urls
- (?: # the various things that can precede the ID:
- (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
- |(?: # or the v= param in all its forms
- (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
- (?:\?|\#!?) # the params delimiter ? or # or #!
- (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
- v=
- )
- ))
- |(?:
- youtu\.be| # just youtu.be/xxxx
- vid\.plus| # or vid.plus/xxxx
- zwearz\.com/watch| # or zwearz.com/watch/xxxx
- )/
- |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
- )
- )? # all until now is optional -> you can pass the naked ID
- ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
- (?!.*?\blist=
- (?:
- %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
- WL # WL are handled by the watch later IE
- )
- )
- (?(1).+)? # if we found the ID, everything can follow
- $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
- _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
- _formats = {
- '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
- '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
- '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
- '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
- '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
- '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
- '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
- '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
- '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
- '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
- '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
- '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
-
-
- # 3D videos
- '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
- '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
- '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
- '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
- '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
- '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
- '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
-
- # Apple HTTP Live Streaming
- '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
- '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
- '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
- '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
- '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
-
- # DASH mp4 video
- '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
- '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
- '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
- '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
-
- # Dash mp4 audio
- '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
- '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
- '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
- '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
- '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
- '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
- '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
-
- # Dash webm
- '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
- '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
- '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
-
- # Dash webm audio
- '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
- '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
-
- # Dash webm audio with opus inside
- '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
- '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
- '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
-
- # RTMP (unnamed)
- '_rtmp': {'protocol': 'rtmp'},
-
- # av01 video only formats sometimes served with "unknown" codecs
- '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- }
- _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
-
- _GEO_BYPASS = False
-
- IE_NAME = 'youtube'
- _TESTS = [
- {
- 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
- 'info_dict': {
- 'id': 'BaW_jenozKc',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
- 'uploader': 'Philipp Hagemeister',
- 'uploader_id': 'phihag',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
- 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
- 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
- 'upload_date': '20121002',
- 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
- 'categories': ['Science & Technology'],
- 'tags': ['youtube-dl'],
- 'duration': 10,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- 'start_time': 1,
- 'end_time': 9,
- }
- },
- {
- 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
- 'note': 'Test generic use_cipher_signature video (#897)',
- 'info_dict': {
- 'id': 'UxxajLWwzqY',
- 'ext': 'mp4',
- 'upload_date': '20120506',
- 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
- 'alt_title': 'I Love It (feat. Charli XCX)',
- 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
- 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
- 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
- 'iconic ep', 'iconic', 'love', 'it'],
- 'duration': 180,
- 'uploader': 'Icona Pop',
- 'uploader_id': 'IconaPop',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
- 'creator': 'Icona Pop',
- 'track': 'I Love It (feat. Charli XCX)',
- 'artist': 'Icona Pop',
- }
- },
- {
- 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
- 'note': 'Test VEVO video with age protection (#956)',
- 'info_dict': {
- 'id': '07FYdnEawAQ',
- 'ext': 'mp4',
- 'upload_date': '20130703',
- 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
- 'alt_title': 'Tunnel Vision',
- 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
- 'duration': 419,
- 'uploader': 'justintimberlakeVEVO',
- 'uploader_id': 'justintimberlakeVEVO',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
- 'creator': 'Justin Timberlake',
- 'track': 'Tunnel Vision',
- 'artist': 'Justin Timberlake',
- 'age_limit': 18,
- }
- },
- {
- 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
- 'note': 'Embed-only video (#1746)',
- 'info_dict': {
- 'id': 'yZIXLfi8CZQ',
- 'ext': 'mp4',
- 'upload_date': '20120608',
- 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
- 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
- 'uploader': 'SET India',
- 'uploader_id': 'setindia',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
- 'age_limit': 18,
- }
- },
- {
- 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
- 'note': 'Use the first video ID in the URL',
- 'info_dict': {
- 'id': 'BaW_jenozKc',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
- 'uploader': 'Philipp Hagemeister',
- 'uploader_id': 'phihag',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
- 'upload_date': '20121002',
- 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
- 'categories': ['Science & Technology'],
- 'tags': ['youtube-dl'],
- 'duration': 10,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
- 'note': '256k DASH audio (format 141) via DASH manifest',
- 'info_dict': {
- 'id': 'a9LDPn-MO4I',
- 'ext': 'm4a',
- 'upload_date': '20121002',
- 'uploader_id': '8KVIDEO',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
- 'description': '',
- 'uploader': '8KVIDEO',
- 'title': 'UHDTV TEST 8K VIDEO.mp4'
- },
- 'params': {
- 'youtube_include_dash_manifest': True,
- 'format': '141',
- },
- 'skip': 'format 141 not served anymore',
- },
- # DASH manifest with encrypted signature
- {
- 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
- 'info_dict': {
- 'id': 'IB3lcPjvWLA',
- 'ext': 'm4a',
- 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
- 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
- 'duration': 244,
- 'uploader': 'AfrojackVEVO',
- 'uploader_id': 'AfrojackVEVO',
- 'upload_date': '20131011',
- },
- 'params': {
- 'youtube_include_dash_manifest': True,
- 'format': '141/bestaudio[ext=m4a]',
- },
- },
- # JS player signature function name containing $
- {
- 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
- 'info_dict': {
- 'id': 'nfWlot6h_JM',
- 'ext': 'm4a',
- 'title': 'Taylor Swift - Shake It Off',
- 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
- 'duration': 242,
- 'uploader': 'TaylorSwiftVEVO',
- 'uploader_id': 'TaylorSwiftVEVO',
- 'upload_date': '20140818',
- 'creator': 'Taylor Swift',
- },
- 'params': {
- 'youtube_include_dash_manifest': True,
- 'format': '141/bestaudio[ext=m4a]',
- },
- },
- # Controversy video
- {
- 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
- 'info_dict': {
- 'id': 'T4XJQO3qol8',
- 'ext': 'mp4',
- 'duration': 219,
- 'upload_date': '20100909',
- 'uploader': 'Amazing Atheist',
- 'uploader_id': 'TheAmazingAtheist',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
- 'title': 'Burning Everyone\'s Koran',
- 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
- }
- },
- # Normal age-gate video (No vevo, embed allowed)
- {
- 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
- 'info_dict': {
- 'id': 'HtVdAasjOgU',
- 'ext': 'mp4',
- 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
- 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
- 'duration': 142,
- 'uploader': 'The Witcher',
- 'uploader_id': 'WitcherGame',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
- 'upload_date': '20140605',
- 'age_limit': 18,
- },
- },
- # Age-gate video with encrypted signature
- {
- 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
- 'info_dict': {
- 'id': '6kLq3WMV1nU',
- 'ext': 'mp4',
- 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
- 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
- 'duration': 246,
- 'uploader': 'LloydVEVO',
- 'uploader_id': 'LloydVEVO',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
- 'upload_date': '20110629',
- 'age_limit': 18,
- },
- },
- # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
- # YouTube Red ad is not captured for creator
- {
- 'url': '__2ABJjxzNo',
- 'info_dict': {
- 'id': '__2ABJjxzNo',
- 'ext': 'mp4',
- 'duration': 266,
- 'upload_date': '20100430',
- 'uploader_id': 'deadmau5',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
- 'creator': 'deadmau5',
- 'description': 'md5:12c56784b8032162bb936a5f76d55360',
- 'uploader': 'deadmau5',
- 'title': 'Deadmau5 - Some Chords (HD)',
- 'alt_title': 'Some Chords',
- },
- 'expected_warnings': [
- 'DASH manifest missing',
- ]
- },
- # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
- {
- 'url': 'lqQg6PlCWgI',
- 'info_dict': {
- 'id': 'lqQg6PlCWgI',
- 'ext': 'mp4',
- 'duration': 6085,
- 'upload_date': '20150827',
- 'uploader_id': 'olympic',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
- 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
- 'uploader': 'Olympic',
- 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
- },
- 'params': {
- 'skip_download': 'requires avconv',
- }
- },
- # Non-square pixels
- {
- 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
- 'info_dict': {
- 'id': '_b-2C3KPAM0',
- 'ext': 'mp4',
- 'stretched_ratio': 16 / 9.,
- 'duration': 85,
- 'upload_date': '20110310',
- 'uploader_id': 'AllenMeow',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
- 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
- 'uploader': '孫ᄋᄅ',
- 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
- },
- },
- # url_encoded_fmt_stream_map is empty string
- {
- 'url': 'qEJwOuvDf7I',
- 'info_dict': {
- 'id': 'qEJwOuvDf7I',
- 'ext': 'webm',
- 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
- 'description': '',
- 'upload_date': '20150404',
- 'uploader_id': 'spbelect',
- 'uploader': 'Наблюдатели Петербурга',
- },
- 'params': {
- 'skip_download': 'requires avconv',
- },
- 'skip': 'This live event has ended.',
- },
- # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
- {
- 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
- 'info_dict': {
- 'id': 'FIl7x6_3R5Y',
- 'ext': 'webm',
- 'title': 'md5:7b81415841e02ecd4313668cde88737a',
- 'description': 'md5:116377fd2963b81ec4ce64b542173306',
- 'duration': 220,
- 'upload_date': '20150625',
- 'uploader_id': 'dorappi2000',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
- 'uploader': 'dorappi2000',
- 'formats': 'mincount:31',
- },
- 'skip': 'not actual anymore',
- },
- # DASH manifest with segment_list
- {
- 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
- 'md5': '8ce563a1d667b599d21064e982ab9e31',
- 'info_dict': {
- 'id': 'CsmdDsKjzN8',
- 'ext': 'mp4',
- 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
- 'uploader': 'Airtek',
- 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
- 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
- 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
- },
- 'params': {
- 'youtube_include_dash_manifest': True,
- 'format': '135', # bestvideo
- },
- 'skip': 'This live event has ended.',
- },
- {
- # Multifeed videos (multiple cameras), URL is for Main Camera
- 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
- 'info_dict': {
- 'id': 'jqWvoWXjCVs',
- 'title': 'teamPGP: Rocket League Noob Stream',
- 'description': 'md5:dc7872fb300e143831327f1bae3af010',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': 'jqWvoWXjCVs',
- 'ext': 'mp4',
- 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
- 'description': 'md5:dc7872fb300e143831327f1bae3af010',
- 'duration': 7335,
- 'upload_date': '20150721',
- 'uploader': 'Beer Games Beer',
- 'uploader_id': 'beergamesbeer',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
- 'license': 'Standard YouTube License',
- },
- }, {
- 'info_dict': {
- 'id': '6h8e8xoXJzg',
- 'ext': 'mp4',
- 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
- 'description': 'md5:dc7872fb300e143831327f1bae3af010',
- 'duration': 7337,
- 'upload_date': '20150721',
- 'uploader': 'Beer Games Beer',
- 'uploader_id': 'beergamesbeer',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
- 'license': 'Standard YouTube License',
- },
- }, {
- 'info_dict': {
- 'id': 'PUOgX5z9xZw',
- 'ext': 'mp4',
- 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
- 'description': 'md5:dc7872fb300e143831327f1bae3af010',
- 'duration': 7337,
- 'upload_date': '20150721',
- 'uploader': 'Beer Games Beer',
- 'uploader_id': 'beergamesbeer',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
- 'license': 'Standard YouTube License',
- },
- }, {
- 'info_dict': {
- 'id': 'teuwxikvS5k',
- 'ext': 'mp4',
- 'title': 'teamPGP: Rocket League Noob Stream (zim)',
- 'description': 'md5:dc7872fb300e143831327f1bae3af010',
- 'duration': 7334,
- 'upload_date': '20150721',
- 'uploader': 'Beer Games Beer',
- 'uploader_id': 'beergamesbeer',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
- 'license': 'Standard YouTube License',
- },
- }],
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This video is not available.',
- },
- {
- # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
- 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
- 'info_dict': {
- 'id': 'gVfLd0zydlo',
- 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
- },
- 'playlist_count': 2,
- 'skip': 'Not multifeed anymore',
- },
- {
- 'url': 'https://vid.plus/FlRa-iH7PGw',
- 'only_matching': True,
- },
- {
- 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
- 'only_matching': True,
- },
- {
- # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
- # Also tests cut-off URL expansion in video description (see
- # https://github.com/ytdl-org/youtube-dl/issues/1892,
- # https://github.com/ytdl-org/youtube-dl/issues/8164)
- 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
- 'info_dict': {
- 'id': 'lsguqyKfVQg',
- 'ext': 'mp4',
- 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
- 'alt_title': 'Dark Walk - Position Music',
- 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
- 'duration': 133,
- 'upload_date': '20151119',
- 'uploader_id': 'IronSoulElf',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
- 'uploader': 'IronSoulElf',
- 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
- 'track': 'Dark Walk - Position Music',
- 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
- 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
- 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
- 'only_matching': True,
- },
- {
- # Video with yt:stretch=17:0
- 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
- 'info_dict': {
- 'id': 'Q39EVAstoRM',
- 'ext': 'mp4',
- 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
- 'description': 'md5:ee18a25c350637c8faff806845bddee9',
- 'upload_date': '20151107',
- 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
- 'uploader': 'CH GAMER DROID',
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This video does not exist.',
- },
- {
- # Video licensed under Creative Commons
- 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
- 'info_dict': {
- 'id': 'M4gD1WSo5mA',
- 'ext': 'mp4',
- 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
- 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
- 'duration': 721,
- 'upload_date': '20150127',
- 'uploader_id': 'BerkmanCenter',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
- 'uploader': 'The Berkman Klein Center for Internet & Society',
- 'license': 'Creative Commons Attribution license (reuse allowed)',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Channel-like uploader_url
- 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
- 'info_dict': {
- 'id': 'eQcmzGIKrzg',
- 'ext': 'mp4',
- 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
- 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
- 'duration': 4060,
- 'upload_date': '20151119',
- 'uploader': 'Bernie Sanders',
- 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
- 'license': 'Creative Commons Attribution license (reuse allowed)',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
- 'only_matching': True,
- },
- {
- # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
- 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
- 'only_matching': True,
- },
- {
- # Rental video preview
- 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
- 'info_dict': {
- 'id': 'uGpuVWrhIzE',
- 'ext': 'mp4',
- 'title': 'Piku - Trailer',
- 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
- 'upload_date': '20150811',
- 'uploader': 'FlixMatrix',
- 'uploader_id': 'FlixMatrixKaravan',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
- 'license': 'Standard YouTube License',
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This video is not available.',
- },
- {
- # YouTube Red video with episode data
- 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
- 'info_dict': {
- 'id': 'iqKdEhx-dD4',
- 'ext': 'mp4',
- 'title': 'Isolation - Mind Field (Ep 1)',
- 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
- 'duration': 2085,
- 'upload_date': '20170118',
- 'uploader': 'Vsauce',
- 'uploader_id': 'Vsauce',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
- 'series': 'Mind Field',
- 'season_number': 1,
- 'episode_number': 1,
- },
- 'params': {
- 'skip_download': True,
- },
- 'expected_warnings': [
- 'Skipping DASH manifest',
- ],
- },
- {
- # The following content has been identified by the YouTube community
- # as inappropriate or offensive to some audiences.
- 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
- 'info_dict': {
- 'id': '6SJNVb0GnPI',
- 'ext': 'mp4',
- 'title': 'Race Differences in Intelligence',
- 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
- 'duration': 965,
- 'upload_date': '20140124',
- 'uploader': 'New Century Foundation',
- 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # itag 212
- 'url': '1t24XAntNCY',
- 'only_matching': True,
- },
- {
- # geo restricted to JP
- 'url': 'sJL6WA-aGkQ',
- 'only_matching': True,
- },
- {
- 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
- 'only_matching': True,
- },
- {
- 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
- 'only_matching': True,
- },
- {
- # DRM protected
- 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
- 'only_matching': True,
- },
- {
- # Video with unsupported adaptive stream type formats
- 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
- 'info_dict': {
- 'id': 'Z4Vy8R84T1U',
- 'ext': 'mp4',
- 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
- 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
- 'duration': 433,
- 'upload_date': '20130923',
- 'uploader': 'Amelia Putri Harwita',
- 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
- 'formats': 'maxcount:10',
- },
- 'params': {
- 'skip_download': True,
- 'youtube_include_dash_manifest': False,
- },
- },
- {
- # Youtube Music Auto-generated description
- 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
- 'info_dict': {
- 'id': 'MgNrAu2pzNs',
- 'ext': 'mp4',
- 'title': 'Voyeur Girl',
- 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
- 'upload_date': '20190312',
- 'uploader': 'Various Artists - Topic',
- 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
- 'artist': 'Stephen',
- 'track': 'Voyeur Girl',
- 'album': 'it\'s too much love to know my dear',
- 'release_date': '20190313',
- 'release_year': 2019,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Youtube Music Auto-generated description
- # Retrieve 'artist' field from 'Artist:' in video description
- # when it is present on youtube music video
- 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
- 'info_dict': {
- 'id': 'k0jLE7tTwjY',
- 'ext': 'mp4',
- 'title': 'Latch Feat. Sam Smith',
- 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
- 'upload_date': '20150110',
- 'uploader': 'Various Artists - Topic',
- 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
- 'artist': 'Disclosure',
- 'track': 'Latch Feat. Sam Smith',
- 'album': 'Latch Featuring Sam Smith',
- 'release_date': '20121008',
- 'release_year': 2012,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Youtube Music Auto-generated description
- # handle multiple artists on youtube music video
- 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
- 'info_dict': {
- 'id': '74qn0eJSjpA',
- 'ext': 'mp4',
- 'title': 'Eastside',
- 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
- 'upload_date': '20180710',
- 'uploader': 'Benny Blanco - Topic',
- 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
- 'artist': 'benny blanco, Halsey, Khalid',
- 'track': 'Eastside',
- 'album': 'Eastside',
- 'release_date': '20180713',
- 'release_year': 2018,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Youtube Music Auto-generated description
- # handle youtube music video with release_year and no release_date
- 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
- 'info_dict': {
- 'id': '-hcAI0g-f5M',
- 'ext': 'mp4',
- 'title': 'Put It On Me',
- 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
- 'upload_date': '20180426',
- 'uploader': 'Matt Maeson - Topic',
- 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
- 'artist': 'Matt Maeson',
- 'track': 'Put It On Me',
- 'album': 'The Hearse',
- 'release_date': None,
- 'release_year': 2018,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- ]
-
- def __init__(self, *args, **kwargs):
- super(YoutubeIE, self).__init__(*args, **kwargs)
- self._player_cache = {}
-
- def report_video_info_webpage_download(self, video_id):
- """Report attempt to download video info webpage."""
- self.to_screen('%s: Downloading video info webpage' % video_id)
-
- def report_information_extraction(self, video_id):
- """Report attempt to extract video information."""
- self.to_screen('%s: Extracting video information' % video_id)
-
- def report_unavailable_format(self, video_id, format):
- """Report extracted video URL."""
- self.to_screen('%s: Format %s not available' % (video_id, format))
-
- def report_rtmp_download(self):
- """Indicate the download will use the RTMP protocol."""
- self.to_screen('RTMP download detected')
-
- def _signature_cache_id(self, example_sig):
- """ Return a string representation of a signature """
- return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
-
- def _extract_signature_function(self, video_id, player_url, example_sig):
- id_m = re.match(
- r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
- player_url)
- if not id_m:
- raise ExtractorError('Cannot identify player %r' % player_url)
- player_type = id_m.group('ext')
- player_id = id_m.group('id')
-
- # Read from filesystem cache
- func_id = '%s_%s_%s' % (
- player_type, player_id, self._signature_cache_id(example_sig))
- assert os.path.basename(func_id) == func_id
-
- cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
- if cache_spec is not None:
- return lambda s: ''.join(s[i] for i in cache_spec)
-
- download_note = (
- 'Downloading player %s' % player_url
- if self._downloader.params.get('verbose') else
- 'Downloading %s player %s' % (player_type, player_id)
- )
- if player_type == 'js':
- code = self._download_webpage(
- player_url, video_id,
- note=download_note,
- errnote='Download of %s failed' % player_url)
- res = self._parse_sig_js(code)
- elif player_type == 'swf':
- urlh = self._request_webpage(
- player_url, video_id,
- note=download_note,
- errnote='Download of %s failed' % player_url)
- code = urlh.read()
- res = self._parse_sig_swf(code)
- else:
- assert False, 'Invalid player type %r' % player_type
-
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
- cache_res = res(test_string)
- cache_spec = [ord(c) for c in cache_res]
-
- self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
- return res
-
- def _print_sig_code(self, func, example_sig):
- def gen_sig_code(idxs):
- def _genslice(start, end, step):
- starts = '' if start == 0 else str(start)
- ends = (':%d' % (end + step)) if end + step >= 0 else ':'
- steps = '' if step == 1 else (':%d' % step)
- return 's[%s%s%s]' % (starts, ends, steps)
-
- step = None
- # Quelch pyflakes warnings - start will be set when step is set
- start = '(Never used)'
- for i, prev in zip(idxs[1:], idxs[:-1]):
- if step is not None:
- if i - prev == step:
- continue
- yield _genslice(start, prev, step)
- step = None
- continue
- if i - prev in [-1, 1]:
- step = i - prev
- start = prev
- continue
- else:
- yield 's[%d]' % prev
- if step is None:
- yield 's[%d]' % i
- else:
- yield _genslice(start, i, step)
-
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
- cache_res = func(test_string)
- cache_spec = [ord(c) for c in cache_res]
- expr_code = ' + '.join(gen_sig_code(cache_spec))
- signature_id_tuple = '(%s)' % (
- ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
- code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
- ' return %s\n') % (signature_id_tuple, expr_code)
- self.to_screen('Extracted signature function:\n' + code)
-
- def _parse_sig_js(self, jscode):
- funcname = self._search_regex(
- (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
- # Obsolete patterns
- r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
- r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
- jscode, 'Initial JS player signature function name', group='sig')
-
- jsi = JSInterpreter(jscode)
- initial_function = jsi.extract_function(funcname)
- return lambda s: initial_function([s])
-
- def _parse_sig_swf(self, file_contents):
- swfi = SWFInterpreter(file_contents)
- TARGET_CLASSNAME = 'SignatureDecipher'
- searched_class = swfi.extract_class(TARGET_CLASSNAME)
- initial_function = swfi.extract_function(searched_class, 'decipher')
- return lambda s: initial_function([s])
-
- def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
- """Turn the encrypted s field into a working signature"""
-
- if player_url is None:
- raise ExtractorError('Cannot decrypt signature without player_url')
-
- if player_url.startswith('//'):
- player_url = 'https:' + player_url
- elif not re.match(r'https?://', player_url):
- player_url = compat_urlparse.urljoin(
- 'https://www.youtube.com', player_url)
- try:
- player_id = (player_url, self._signature_cache_id(s))
- if player_id not in self._player_cache:
- func = self._extract_signature_function(
- video_id, player_url, s
- )
- self._player_cache[player_id] = func
- func = self._player_cache[player_id]
- if self._downloader.params.get('youtube_print_sig_code'):
- self._print_sig_code(func, s)
- return func(s)
- except Exception as e:
- tb = traceback.format_exc()
- raise ExtractorError(
- 'Signature extraction failed: ' + tb, cause=e)
-
- def _get_subtitles(self, video_id, webpage):
- try:
- subs_doc = self._download_xml(
- 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
- video_id, note=False)
- except ExtractorError as err:
- self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
- return {}
-
- sub_lang_list = {}
- for track in subs_doc.findall('track'):
- lang = track.attrib['lang_code']
- if lang in sub_lang_list:
- continue
- sub_formats = []
- for ext in self._SUBTITLE_FORMATS:
- params = compat_urllib_parse_urlencode({
- 'lang': lang,
- 'v': video_id,
- 'fmt': ext,
- 'name': track.attrib['name'].encode('utf-8'),
- })
- sub_formats.append({
- 'url': 'https://www.youtube.com/api/timedtext?' + params,
- 'ext': ext,
- })
- sub_lang_list[lang] = sub_formats
- if not sub_lang_list:
- self._downloader.report_warning('video doesn\'t have subtitles')
- return {}
- return sub_lang_list
-
- def _get_ytplayer_config(self, video_id, webpage):
- patterns = (
- # User data may contain arbitrary character sequences that may affect
- # JSON extraction with regex, e.g. when '};' is contained the second
- # regex won't capture the whole JSON. Yet working around by trying more
- # concrete regex first keeping in mind proper quoted string handling
- # to be implemented in future that will replace this workaround (see
- # https://github.com/ytdl-org/youtube-dl/issues/7468,
- # https://github.com/ytdl-org/youtube-dl/pull/7599)
- r';ytplayer\.config\s*=\s*({.+?});ytplayer',
- r';ytplayer\.config\s*=\s*({.+?});',
- )
- config = self._search_regex(
- patterns, webpage, 'ytplayer.config', default=None)
- if config:
- return self._parse_json(
- uppercase_escape(config), video_id, fatal=False)
-
- def _get_automatic_captions(self, video_id, webpage):
- """We need the webpage for getting the captions url, pass it as an
- argument to speed up the process."""
- self.to_screen('%s: Looking for automatic captions' % video_id)
- player_config = self._get_ytplayer_config(video_id, webpage)
- err_msg = 'Couldn\'t find automatic captions for %s' % video_id
- if not player_config:
- self._downloader.report_warning(err_msg)
- return {}
- try:
- args = player_config['args']
- caption_url = args.get('ttsurl')
- if caption_url:
- timestamp = args['timestamp']
- # We get the available subtitles
- list_params = compat_urllib_parse_urlencode({
- 'type': 'list',
- 'tlangs': 1,
- 'asrs': 1,
- })
- list_url = caption_url + '&' + list_params
- caption_list = self._download_xml(list_url, video_id)
- original_lang_node = caption_list.find('track')
- if original_lang_node is None:
- self._downloader.report_warning('Video doesn\'t have automatic captions')
- return {}
- original_lang = original_lang_node.attrib['lang_code']
- caption_kind = original_lang_node.attrib.get('kind', '')
-
- sub_lang_list = {}
- for lang_node in caption_list.findall('target'):
- sub_lang = lang_node.attrib['lang_code']
- sub_formats = []
- for ext in self._SUBTITLE_FORMATS:
- params = compat_urllib_parse_urlencode({
- 'lang': original_lang,
- 'tlang': sub_lang,
- 'fmt': ext,
- 'ts': timestamp,
- 'kind': caption_kind,
- })
- sub_formats.append({
- 'url': caption_url + '&' + params,
- 'ext': ext,
- })
- sub_lang_list[sub_lang] = sub_formats
- return sub_lang_list
-
- def make_captions(sub_url, sub_langs):
- parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
- caption_qs = compat_parse_qs(parsed_sub_url.query)
- captions = {}
- for sub_lang in sub_langs:
- sub_formats = []
- for ext in self._SUBTITLE_FORMATS:
- caption_qs.update({
- 'tlang': [sub_lang],
- 'fmt': [ext],
- })
- sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
- query=compat_urllib_parse_urlencode(caption_qs, True)))
- sub_formats.append({
- 'url': sub_url,
- 'ext': ext,
- })
- captions[sub_lang] = sub_formats
- return captions
-
- # New captions format as of 22.06.2017
- player_response = args.get('player_response')
- if player_response and isinstance(player_response, compat_str):
- player_response = self._parse_json(
- player_response, video_id, fatal=False)
- if player_response:
- renderer = player_response['captions']['playerCaptionsTracklistRenderer']
- base_url = renderer['captionTracks'][0]['baseUrl']
- sub_lang_list = []
- for lang in renderer['translationLanguages']:
- lang_code = lang.get('languageCode')
- if lang_code:
- sub_lang_list.append(lang_code)
- return make_captions(base_url, sub_lang_list)
-
- # Some videos don't provide ttsurl but rather caption_tracks and
- # caption_translation_languages (e.g. 20LmZk1hakA)
- # Does not used anymore as of 22.06.2017
- caption_tracks = args['caption_tracks']
- caption_translation_languages = args['caption_translation_languages']
- caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
- sub_lang_list = []
- for lang in caption_translation_languages.split(','):
- lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
- sub_lang = lang_qs.get('lc', [None])[0]
- if sub_lang:
- sub_lang_list.append(sub_lang)
- return make_captions(caption_url, sub_lang_list)
- # An extractor error can be raise by the download process if there are
- # no automatic captions but there are subtitles
- except (KeyError, IndexError, ExtractorError):
- self._downloader.report_warning(err_msg)
- return {}
-
- def _mark_watched(self, video_id, video_info, player_response):
- playback_url = url_or_none(try_get(
- player_response,
- lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
- video_info, lambda x: x['videostats_playback_base_url'][0]))
- if not playback_url:
- return
- parsed_playback_url = compat_urlparse.urlparse(playback_url)
- qs = compat_urlparse.parse_qs(parsed_playback_url.query)
-
- # cpn generation algorithm is reverse engineered from base.js.
- # In fact it works even with dummy cpn.
- CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
- cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
-
- qs.update({
- 'ver': ['2'],
- 'cpn': [cpn],
- })
- playback_url = compat_urlparse.urlunparse(
- parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
-
- self._download_webpage(
- playback_url, video_id, 'Marking watched',
- 'Unable to mark watched', fatal=False)
-
- @staticmethod
- def _extract_urls(webpage):
- # Embedded YouTube player
- entries = [
- unescapeHTML(mobj.group('url'))
- for mobj in re.finditer(r'''(?x)
- (?:
- <iframe[^>]+?src=|
- data-video-url=|
- <embed[^>]+?src=|
- embedSWF\(?:\s*|
- <object[^>]+data=|
- new\s+SWFObject\(
- )
- (["\'])
- (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
- (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
- \1''', webpage)]
-
- # lazyYT YouTube embed
- entries.extend(list(map(
- unescapeHTML,
- re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
-
- # Wordpress "YouTube Video Importer" plugin
- matches = re.findall(r'''(?x)<div[^>]+
- class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
- data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
- entries.extend(m[-1] for m in matches)
-
- return entries
-
- @staticmethod
- def _extract_url(webpage):
- urls = YoutubeIE._extract_urls(webpage)
- return urls[0] if urls else None
-
- @classmethod
- def extract_id(cls, url):
- mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
- video_id = mobj.group(2)
- return video_id
-
- @staticmethod
- def _extract_chapters(description, duration):
- if not description:
- return None
- chapter_lines = re.findall(
- r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
- description)
- if not chapter_lines:
- return None
- chapters = []
- for next_num, (chapter_line, time_point) in enumerate(
- chapter_lines, start=1):
- start_time = parse_duration(time_point)
- if start_time is None:
- continue
- if start_time > duration:
- break
- end_time = (duration if next_num == len(chapter_lines)
- else parse_duration(chapter_lines[next_num][1]))
- if end_time is None:
- continue
- if end_time > duration:
- end_time = duration
- if start_time > end_time:
- break
- chapter_title = re.sub(
- r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
- chapter_title = re.sub(r'\s+', ' ', chapter_title)
- chapters.append({
- 'start_time': start_time,
- 'end_time': end_time,
- 'title': chapter_title,
- })
- return chapters
-
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
-
- proto = (
- 'http' if self._downloader.params.get('prefer_insecure', False)
- else 'https')
-
- start_time = None
- end_time = None
- parsed_url = compat_urllib_parse_urlparse(url)
- for component in [parsed_url.fragment, parsed_url.query]:
- query = compat_parse_qs(component)
- if start_time is None and 't' in query:
- start_time = parse_duration(query['t'][0])
- if start_time is None and 'start' in query:
- start_time = parse_duration(query['start'][0])
- if end_time is None and 'end' in query:
- end_time = parse_duration(query['end'][0])
-
- # Extract original video URL from URL with redirection, like age verification, using next_url parameter
- mobj = re.search(self._NEXT_URL_RE, url)
- if mobj:
- url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
- video_id = self.extract_id(url)
-
- # Get video webpage
- url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
- video_webpage = self._download_webpage(url, video_id)
-
- # Attempt to extract SWF player URL
- mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
- if mobj is not None:
- player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
- else:
- player_url = None
-
- dash_mpds = []
-
- def add_dash_mpd(video_info):
- dash_mpd = video_info.get('dashmpd')
- if dash_mpd and dash_mpd[0] not in dash_mpds:
- dash_mpds.append(dash_mpd[0])
-
- def add_dash_mpd_pr(pl_response):
- dash_mpd = url_or_none(try_get(
- pl_response, lambda x: x['streamingData']['dashManifestUrl'],
- compat_str))
- if dash_mpd and dash_mpd not in dash_mpds:
- dash_mpds.append(dash_mpd)
-
- is_live = None
- view_count = None
-
- def extract_view_count(v_info):
- return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
-
- def extract_token(v_info):
- return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
-
- def extract_player_response(player_response, video_id):
- pl_response = str_or_none(player_response)
- if not pl_response:
- return
- pl_response = self._parse_json(pl_response, video_id, fatal=False)
- if isinstance(pl_response, dict):
- add_dash_mpd_pr(pl_response)
- return pl_response
-
- player_response = {}
-
- # Get video info
- embed_webpage = None
- if re.search(r'player-age-gate-content">', video_webpage) is not None:
- age_gate = True
- # We simulate the access to the video from www.youtube.com/v/{video_id}
- # this can be viewed without login into Youtube
- url = proto + '://www.youtube.com/embed/%s' % video_id
- embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
- data = compat_urllib_parse_urlencode({
- 'video_id': video_id,
- 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
- 'sts': self._search_regex(
- r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
- })
- video_info_url = proto + '://www.youtube.com/get_video_info?' + data
- video_info_webpage = self._download_webpage(
- video_info_url, video_id,
- note='Refetching age-gated info webpage',
- errnote='unable to download video info webpage')
- video_info = compat_parse_qs(video_info_webpage)
- pl_response = video_info.get('player_response', [None])[0]
- player_response = extract_player_response(pl_response, video_id)
- add_dash_mpd(video_info)
- view_count = extract_view_count(video_info)
- else:
- age_gate = False
- video_info = None
- sts = None
- # Try looking directly into the video webpage
- ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
- if ytplayer_config:
- args = ytplayer_config['args']
- if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
- # Convert to the same format returned by compat_parse_qs
- video_info = dict((k, [v]) for k, v in args.items())
- add_dash_mpd(video_info)
- # Rental video is not rented but preview is available (e.g.
- # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
- # https://github.com/ytdl-org/youtube-dl/issues/10532)
- if not video_info and args.get('ypc_vid'):
- return self.url_result(
- args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
- if args.get('livestream') == '1' or args.get('live_playback') == 1:
- is_live = True
- sts = ytplayer_config.get('sts')
- if not player_response:
- player_response = extract_player_response(args.get('player_response'), video_id)
- if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
- add_dash_mpd_pr(player_response)
- # We also try looking in get_video_info since it may contain different dashmpd
- # URL that points to a DASH manifest with possibly different itag set (some itags
- # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
- # manifest pointed by get_video_info's dashmpd).
- # The general idea is to take a union of itags of both DASH manifests (for example
- # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
- self.report_video_info_webpage_download(video_id)
- for el in ('embedded', 'detailpage', 'vevo', ''):
- query = {
- 'video_id': video_id,
- 'ps': 'default',
- 'eurl': '',
- 'gl': 'US',
- 'hl': 'en',
- }
- if el:
- query['el'] = el
- if sts:
- query['sts'] = sts
- video_info_webpage = self._download_webpage(
- '%s://www.youtube.com/get_video_info' % proto,
- video_id, note=False,
- errnote='unable to download video info webpage',
- fatal=False, query=query)
- if not video_info_webpage:
- continue
- get_video_info = compat_parse_qs(video_info_webpage)
- if not player_response:
- pl_response = get_video_info.get('player_response', [None])[0]
- player_response = extract_player_response(pl_response, video_id)
- add_dash_mpd(get_video_info)
- if view_count is None:
- view_count = extract_view_count(get_video_info)
- if not video_info:
- video_info = get_video_info
- get_token = extract_token(get_video_info)
- if get_token:
- # Different get_video_info requests may report different results, e.g.
- # some may report video unavailability, but some may serve it without
- # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
- # the original webpage as well as el=info and el=embedded get_video_info
- # requests report video unavailability due to geo restriction while
- # el=detailpage succeeds and returns valid data). This is probably
- # due to YouTube measures against IP ranges of hosting providers.
- # Working around by preferring the first succeeded video_info containing
- # the token if no such video_info yet was found.
- token = extract_token(video_info)
- if not token:
- video_info = get_video_info
- break
-
- def extract_unavailable_message():
- messages = []
- for tag, kind in (('h1', 'message'), ('div', 'submessage')):
- msg = self._html_search_regex(
- r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
- video_webpage, 'unavailable %s' % kind, default=None)
- if msg:
- messages.append(msg)
- if messages:
- return '\n'.join(messages)
-
- if not video_info:
- unavailable_message = extract_unavailable_message()
- if not unavailable_message:
- unavailable_message = 'Unable to extract video data'
- raise ExtractorError(
- 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
-
- video_details = try_get(
- player_response, lambda x: x['videoDetails'], dict) or {}
-
- video_title = video_info.get('title', [None])[0] or video_details.get('title')
- if not video_title:
- self._downloader.report_warning('Unable to extract video title')
- video_title = '_'
-
- description_original = video_description = get_element_by_id("eow-description", video_webpage)
- if video_description:
-
- def replace_url(m):
- redir_url = compat_urlparse.urljoin(url, m.group(1))
- parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
- if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
- qs = compat_parse_qs(parsed_redir_url.query)
- q = qs.get('q')
- if q and q[0]:
- return q[0]
- return redir_url
-
- description_original = video_description = re.sub(r'''(?x)
- <a\s+
- (?:[a-zA-Z-]+="[^"]*"\s+)*?
- (?:title|href)="([^"]+)"\s+
- (?:[a-zA-Z-]+="[^"]*"\s+)*?
- class="[^"]*"[^>]*>
- [^<]+\.{3}\s*
- </a>
- ''', replace_url, video_description)
- video_description = clean_html(video_description)
- else:
- video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
-
- if not smuggled_data.get('force_singlefeed', False):
- if not self._downloader.params.get('noplaylist'):
- multifeed_metadata_list = try_get(
- player_response,
- lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
- compat_str) or try_get(
- video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
- if multifeed_metadata_list:
- entries = []
- feed_ids = []
- for feed in multifeed_metadata_list.split(','):
- # Unquote should take place before split on comma (,) since textual
- # fields may contain comma as well (see
- # https://github.com/ytdl-org/youtube-dl/issues/8536)
- feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
- entries.append({
- '_type': 'url_transparent',
- 'ie_key': 'Youtube',
- 'url': smuggle_url(
- '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
- {'force_singlefeed': True}),
- 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
- })
- feed_ids.append(feed_data['id'][0])
- self.to_screen(
- 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
- % (', '.join(feed_ids), video_id))
- return self.playlist_result(entries, video_id, video_title, video_description)
- else:
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
-
- if view_count is None:
- view_count = extract_view_count(video_info)
- if view_count is None and video_details:
- view_count = int_or_none(video_details.get('viewCount'))
-
- if is_live is None:
- is_live = bool_or_none(video_details.get('isLive'))
-
- # Check for "rental" videos
- if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
- raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
-
- def _extract_filesize(media_url):
- return int_or_none(self._search_regex(
- r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
-
- streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
- streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
-
- if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
- self.report_rtmp_download()
- formats = [{
- 'format_id': '_rtmp',
- 'protocol': 'rtmp',
- 'url': video_info['conn'][0],
- 'player_url': player_url,
- }]
- elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
- encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
- if 'rtmpe%3Dyes' in encoded_url_map:
- raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
- formats = []
- formats_spec = {}
- fmt_list = video_info.get('fmt_list', [''])[0]
- if fmt_list:
- for fmt in fmt_list.split(','):
- spec = fmt.split('/')
- if len(spec) > 1:
- width_height = spec[1].split('x')
- if len(width_height) == 2:
- formats_spec[spec[0]] = {
- 'resolution': spec[1],
- 'width': int_or_none(width_height[0]),
- 'height': int_or_none(width_height[1]),
- }
- for fmt in streaming_formats:
- itag = str_or_none(fmt.get('itag'))
- if not itag:
- continue
- quality = fmt.get('quality')
- quality_label = fmt.get('qualityLabel') or quality
- formats_spec[itag] = {
- 'asr': int_or_none(fmt.get('audioSampleRate')),
- 'filesize': int_or_none(fmt.get('contentLength')),
- 'format_note': quality_label,
- 'fps': int_or_none(fmt.get('fps')),
- 'height': int_or_none(fmt.get('height')),
- # bitrate for itag 43 is always 2147483647
- 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
- 'width': int_or_none(fmt.get('width')),
- }
-
- for fmt in streaming_formats:
- if fmt.get('drm_families'):
- continue
- url = url_or_none(fmt.get('url'))
-
- if not url:
- cipher = fmt.get('cipher')
- if not cipher:
- continue
- url_data = compat_parse_qs(cipher)
- url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
- if not url:
- continue
- else:
- cipher = None
- url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
-
- stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
- # Unsupported FORMAT_STREAM_TYPE_OTF
- if stream_type == 3:
- continue
-
- format_id = fmt.get('itag') or url_data['itag'][0]
- if not format_id:
- continue
- format_id = compat_str(format_id)
-
- if cipher:
- if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
- ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
- jsplayer_url_json = self._search_regex(
- ASSETS_RE,
- embed_webpage if age_gate else video_webpage,
- 'JS player URL (1)', default=None)
- if not jsplayer_url_json and not age_gate:
- # We need the embed website after all
- if embed_webpage is None:
- embed_url = proto + '://www.youtube.com/embed/%s' % video_id
- embed_webpage = self._download_webpage(
- embed_url, video_id, 'Downloading embed webpage')
- jsplayer_url_json = self._search_regex(
- ASSETS_RE, embed_webpage, 'JS player URL')
-
- player_url = json.loads(jsplayer_url_json)
- if player_url is None:
- player_url_json = self._search_regex(
- r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
- video_webpage, 'age gate player URL')
- player_url = json.loads(player_url_json)
-
- if 'sig' in url_data:
- url += '&signature=' + url_data['sig'][0]
- elif 's' in url_data:
- encrypted_sig = url_data['s'][0]
-
- if self._downloader.params.get('verbose'):
- if player_url is None:
- player_version = 'unknown'
- player_desc = 'unknown'
- else:
- if player_url.endswith('swf'):
- player_version = self._search_regex(
- r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
- 'flash player', fatal=False)
- player_desc = 'flash player %s' % player_version
- else:
- player_version = self._search_regex(
- [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
- r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
- player_url,
- 'html5 player', fatal=False)
- player_desc = 'html5 player %s' % player_version
-
- parts_sizes = self._signature_cache_id(encrypted_sig)
- self.to_screen('{%s} signature length %s, %s' %
- (format_id, parts_sizes, player_desc))
-
- signature = self._decrypt_signature(
- encrypted_sig, video_id, player_url, age_gate)
- sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
- url += '&%s=%s' % (sp, signature)
- if 'ratebypass' not in url:
- url += '&ratebypass=yes'
-
- dct = {
- 'format_id': format_id,
- 'url': url,
- 'player_url': player_url,
- }
- if format_id in self._formats:
- dct.update(self._formats[format_id])
- if format_id in formats_spec:
- dct.update(formats_spec[format_id])
-
- # Some itags are not included in DASH manifest thus corresponding formats will
- # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
- # Trying to extract metadata from url_encoded_fmt_stream_map entry.
- mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
- width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
-
- if width is None:
- width = int_or_none(fmt.get('width'))
- if height is None:
- height = int_or_none(fmt.get('height'))
-
- filesize = int_or_none(url_data.get(
- 'clen', [None])[0]) or _extract_filesize(url)
-
- quality = url_data.get('quality', [None])[0] or fmt.get('quality')
- quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
-
- tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
- or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
- fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
-
- more_fields = {
- 'filesize': filesize,
- 'tbr': tbr,
- 'width': width,
- 'height': height,
- 'fps': fps,
- 'format_note': quality_label or quality,
- }
- for key, value in more_fields.items():
- if value:
- dct[key] = value
- type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
- if type_:
- type_split = type_.split(';')
- kind_ext = type_split[0].split('/')
- if len(kind_ext) == 2:
- kind, _ = kind_ext
- dct['ext'] = mimetype2ext(type_split[0])
- if kind in ('audio', 'video'):
- codecs = None
- for mobj in re.finditer(
- r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
- if mobj.group('key') == 'codecs':
- codecs = mobj.group('val')
- break
- if codecs:
- dct.update(parse_codecs(codecs))
- if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
- dct['downloader_options'] = {
- # Youtube throttles chunks >~10M
- 'http_chunk_size': 10485760,
- }
- formats.append(dct)
- else:
- manifest_url = (
- url_or_none(try_get(
- player_response,
- lambda x: x['streamingData']['hlsManifestUrl'],
- compat_str))
- or url_or_none(try_get(
- video_info, lambda x: x['hlsvp'][0], compat_str)))
- if manifest_url:
- formats = []
- m3u8_formats = self._extract_m3u8_formats(
- manifest_url, video_id, 'mp4', fatal=False)
- for a_format in m3u8_formats:
- itag = self._search_regex(
- r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
- if itag:
- a_format['format_id'] = itag
- if itag in self._formats:
- dct = self._formats[itag].copy()
- dct.update(a_format)
- a_format = dct
- a_format['player_url'] = player_url
- # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
- a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
- formats.append(a_format)
- else:
- error_message = extract_unavailable_message()
- if not error_message:
- error_message = clean_html(try_get(
- player_response, lambda x: x['playabilityStatus']['reason'],
- compat_str))
- if not error_message:
- error_message = clean_html(
- try_get(video_info, lambda x: x['reason'][0], compat_str))
- if error_message:
- raise ExtractorError(error_message, expected=True)
- raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
-
- # uploader
- video_uploader = try_get(
- video_info, lambda x: x['author'][0],
- compat_str) or str_or_none(video_details.get('author'))
- if video_uploader:
- video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
- else:
- self._downloader.report_warning('unable to extract uploader name')
-
- # uploader_id
- video_uploader_id = None
- video_uploader_url = None
- mobj = re.search(
- r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
- video_webpage)
- if mobj is not None:
- video_uploader_id = mobj.group('uploader_id')
- video_uploader_url = mobj.group('uploader_url')
- else:
- self._downloader.report_warning('unable to extract uploader nickname')
-
- channel_id = (
- str_or_none(video_details.get('channelId'))
- or self._html_search_meta(
- 'channelId', video_webpage, 'channel id', default=None)
- or self._search_regex(
- r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
- video_webpage, 'channel id', default=None, group='id'))
- channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
-
- # thumbnail image
- # We try first to get a high quality image:
- m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
- video_webpage, re.DOTALL)
- if m_thumb is not None:
- video_thumbnail = m_thumb.group(1)
- elif 'thumbnail_url' not in video_info:
- self._downloader.report_warning('unable to extract video thumbnail')
- video_thumbnail = None
- else: # don't panic if we can't find it
- video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
-
- # upload date
- upload_date = self._html_search_meta(
- 'datePublished', video_webpage, 'upload date', default=None)
- if not upload_date:
- upload_date = self._search_regex(
- [r'(?s)id="eow-date.*?>(.*?)</span>',
- r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
- video_webpage, 'upload date', default=None)
- upload_date = unified_strdate(upload_date)
-
- video_license = self._html_search_regex(
- r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
- video_webpage, 'license', default=None)
-
- m_music = re.search(
- r'''(?x)
- <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
- <ul[^>]*>\s*
- <li>(?P<title>.+?)
- by (?P<creator>.+?)
- (?:
- \(.+?\)|
- <a[^>]*
- (?:
- \bhref=["\']/red[^>]*>| # drop possible
- >\s*Listen ad-free with YouTube Red # YouTube Red ad
- )
- .*?
- )?</li
- ''',
- video_webpage)
- if m_music:
- video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
- video_creator = clean_html(m_music.group('creator'))
- else:
- video_alt_title = video_creator = None
-
- def extract_meta(field):
- return self._html_search_regex(
- r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
- video_webpage, field, default=None)
-
- track = extract_meta('Song')
- artist = extract_meta('Artist')
- album = extract_meta('Album')
-
- # Youtube Music Auto-generated description
- release_date = release_year = None
- if video_description:
- mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
- if mobj:
- if not track:
- track = mobj.group('track').strip()
- if not artist:
- artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
- if not album:
- album = mobj.group('album'.strip())
- release_year = mobj.group('release_year')
- release_date = mobj.group('release_date')
- if release_date:
- release_date = release_date.replace('-', '')
- if not release_year:
- release_year = int(release_date[:4])
- if release_year:
- release_year = int(release_year)
-
- m_episode = re.search(
- r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
- video_webpage)
- if m_episode:
- series = unescapeHTML(m_episode.group('series'))
- season_number = int(m_episode.group('season'))
- episode_number = int(m_episode.group('episode'))
- else:
- series = season_number = episode_number = None
-
- m_cat_container = self._search_regex(
- r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
- video_webpage, 'categories', default=None)
- if m_cat_container:
- category = self._html_search_regex(
- r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
- default=None)
- video_categories = None if category is None else [category]
- else:
- video_categories = None
-
- video_tags = [
- unescapeHTML(m.group('content'))
- for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
-
- def _extract_count(count_name):
- return str_to_int(self._search_regex(
- r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
- % re.escape(count_name),
- video_webpage, count_name, default=None))
-
- like_count = _extract_count('like')
- dislike_count = _extract_count('dislike')
-
- if view_count is None:
- view_count = str_to_int(self._search_regex(
- r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
- 'view count', default=None))
-
- average_rating = (
- float_or_none(video_details.get('averageRating'))
- or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
-
- # subtitles
- video_subtitles = self.extract_subtitles(video_id, video_webpage)
- automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
-
- video_duration = try_get(
- video_info, lambda x: int_or_none(x['length_seconds'][0]))
- if not video_duration:
- video_duration = int_or_none(video_details.get('lengthSeconds'))
- if not video_duration:
- video_duration = parse_duration(self._html_search_meta(
- 'duration', video_webpage, 'video duration'))
-
- # annotations
- video_annotations = None
- if self._downloader.params.get('writeannotations', False):
- xsrf_token = self._search_regex(
- r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
- video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
- invideo_url = try_get(
- player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
- if xsrf_token and invideo_url:
- xsrf_field_name = self._search_regex(
- r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
- video_webpage, 'xsrf field name',
- group='xsrf_field_name', default='session_token')
- video_annotations = self._download_webpage(
- self._proto_relative_url(invideo_url),
- video_id, note='Downloading annotations',
- errnote='Unable to download video annotations', fatal=False,
- data=urlencode_postdata({xsrf_field_name: xsrf_token}))
-
- chapters = self._extract_chapters(description_original, video_duration)
-
- # Look for the DASH manifest
- if self._downloader.params.get('youtube_include_dash_manifest', True):
- dash_mpd_fatal = True
- for mpd_url in dash_mpds:
- dash_formats = {}
- try:
- def decrypt_sig(mobj):
- s = mobj.group(1)
- dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
- return '/signature/%s' % dec_s
-
- mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
-
- for df in self._extract_mpd_formats(
- mpd_url, video_id, fatal=dash_mpd_fatal,
- formats_dict=self._formats):
- if not df.get('filesize'):
- df['filesize'] = _extract_filesize(df['url'])
- # Do not overwrite DASH format found in some previous DASH manifest
- if df['format_id'] not in dash_formats:
- dash_formats[df['format_id']] = df
- # Additional DASH manifests may end up in HTTP Error 403 therefore
- # allow them to fail without bug report message if we already have
- # some DASH manifest succeeded. This is temporary workaround to reduce
- # burst of bug reports until we figure out the reason and whether it
- # can be fixed at all.
- dash_mpd_fatal = False
- except (ExtractorError, KeyError) as e:
- self.report_warning(
- 'Skipping DASH manifest: %r' % e, video_id)
- if dash_formats:
- # Remove the formats we found through non-DASH, they
- # contain less info and it can be wrong, because we use
- # fixed values (for example the resolution). See
- # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
- # example.
- formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
- formats.extend(dash_formats.values())
-
- # Check for malformed aspect ratio
- stretched_m = re.search(
- r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
- video_webpage)
- if stretched_m:
- w = float(stretched_m.group('w'))
- h = float(stretched_m.group('h'))
- # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
- # We will only process correct ratios.
- if w > 0 and h > 0:
- ratio = w / h
- for f in formats:
- if f.get('vcodec') != 'none':
- f['stretched_ratio'] = ratio
-
- if not formats:
- token = extract_token(video_info)
- if not token:
- if 'reason' in video_info:
- if 'The uploader has not made this video available in your country.' in video_info['reason']:
- regions_allowed = self._html_search_meta(
- 'regionsAllowed', video_webpage, default=None)
- countries = regions_allowed.split(',') if regions_allowed else None
- self.raise_geo_restricted(
- msg=video_info['reason'][0], countries=countries)
- reason = video_info['reason'][0]
- if 'Invalid parameters' in reason:
- unavailable_message = extract_unavailable_message()
- if unavailable_message:
- reason = unavailable_message
- raise ExtractorError(
- 'YouTube said: %s' % reason,
- expected=True, video_id=video_id)
- else:
- raise ExtractorError(
- '"token" parameter not in video info for unknown reason',
- video_id=video_id)
-
- if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
- raise ExtractorError('This video is DRM protected.', expected=True)
-
- self._sort_formats(formats)
-
- self.mark_watched(video_id, video_info, player_response)
-
- return {
- 'id': video_id,
- 'uploader': video_uploader,
- 'uploader_id': video_uploader_id,
- 'uploader_url': video_uploader_url,
- 'channel_id': channel_id,
- 'channel_url': channel_url,
- 'upload_date': upload_date,
- 'license': video_license,
- 'creator': video_creator or artist,
- 'title': video_title,
- 'alt_title': video_alt_title or track,
- 'thumbnail': video_thumbnail,
- 'description': video_description,
- 'categories': video_categories,
- 'tags': video_tags,
- 'subtitles': video_subtitles,
- 'automatic_captions': automatic_captions,
- 'duration': video_duration,
- 'age_limit': 18 if age_gate else 0,
- 'annotations': video_annotations,
- 'chapters': chapters,
- 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
- 'view_count': view_count,
- 'like_count': like_count,
- 'dislike_count': dislike_count,
- 'average_rating': average_rating,
- 'formats': formats,
- 'is_live': is_live,
- 'start_time': start_time,
- 'end_time': end_time,
- 'series': series,
- 'season_number': season_number,
- 'episode_number': episode_number,
- 'track': track,
- 'artist': artist,
- 'album': album,
- 'release_date': release_date,
- 'release_year': release_year,
- }
-
-
-class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
- IE_DESC = 'YouTube.com playlists'
- _VALID_URL = r"""(?x)(?:
- (?:https?://)?
- (?:\w+\.)?
- (?:
- (?:
- youtube\.com|
- invidio\.us
- )
- /
- (?:
- (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
- \? (?:.*?[&;])*? (?:p|a|list)=
- | p/
- )|
- youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
- )
- (
- (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
- # Top tracks, they can also include dots
- |(?:MC)[\w\.]*
- )
- .*
- |
- (%(playlist_id)s)
- )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
- _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
- _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
- _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
- IE_NAME = 'youtube:playlist'
- _TESTS = [{
- 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
- 'info_dict': {
- 'title': 'ytdl test PL',
- 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
- },
- 'playlist_count': 3,
- }, {
- 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
- 'info_dict': {
- 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
- 'title': 'YDL_Empty_List',
- },
- 'playlist_count': 0,
- 'skip': 'This playlist is private',
- }, {
- 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
- 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
- 'info_dict': {
- 'title': '29C3: Not my department',
- 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
- 'uploader': 'Christiaan008',
- 'uploader_id': 'ChRiStIaAn008',
- },
- 'playlist_count': 95,
- }, {
- 'note': 'issue #673',
- 'url': 'PLBB231211A4F62143',
- 'info_dict': {
- 'title': '[OLD]Team Fortress 2 (Class-based LP)',
- 'id': 'PLBB231211A4F62143',
- 'uploader': 'Wickydoo',
- 'uploader_id': 'Wickydoo',
- },
- 'playlist_mincount': 26,
- }, {
- 'note': 'Large playlist',
- 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
- 'info_dict': {
- 'title': 'Uploads from Cauchemar',
- 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
- 'uploader': 'Cauchemar',
- 'uploader_id': 'Cauchemar89',
- },
- 'playlist_mincount': 799,
- }, {
- 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
- 'info_dict': {
- 'title': 'YDL_safe_search',
- 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
- },
- 'playlist_count': 2,
- 'skip': 'This playlist is private',
- }, {
- 'note': 'embedded',
- 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
- 'playlist_count': 4,
- 'info_dict': {
- 'title': 'JODA15',
- 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
- 'uploader': 'milan',
- 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
- }
- }, {
- 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
- 'playlist_mincount': 485,
- 'info_dict': {
- 'title': '2018 Chinese New Singles (11/6 updated)',
- 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
- 'uploader': 'LBK',
- 'uploader_id': 'sdragonfang',
- }
- }, {
- 'note': 'Embedded SWF player',
- 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
- 'playlist_count': 4,
- 'info_dict': {
- 'title': 'JODA7',
- 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
- },
- 'skip': 'This playlist does not exist',
- }, {
- 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
- 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
- 'info_dict': {
- 'title': 'Uploads from Interstellar Movie',
- 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
- 'uploader': 'Interstellar Movie',
- 'uploader_id': 'InterstellarMovie1',
- },
- 'playlist_mincount': 21,
- }, {
- # Playlist URL that does not actually serve a playlist
- 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
- 'info_dict': {
- 'id': 'FqZTN594JQw',
- 'ext': 'webm',
- 'title': "Smiley's People 01 detective, Adventure Series, Action",
- 'uploader': 'STREEM',
- 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
- 'upload_date': '20150526',
- 'license': 'Standard YouTube License',
- 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
- 'categories': ['People & Blogs'],
- 'tags': list,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This video is not available.',
- 'add_ie': [YoutubeIE.ie_key()],
- }, {
- 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
- 'info_dict': {
- 'id': 'yeWKywCrFtk',
- 'ext': 'mp4',
- 'title': 'Small Scale Baler and Braiding Rugs',
- 'uploader': 'Backus-Page House Museum',
- 'uploader_id': 'backuspagemuseum',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
- 'upload_date': '20161008',
- 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
- 'categories': ['Nonprofits & Activism'],
- 'tags': list,
- 'like_count': int,
- 'dislike_count': int,
- },
- 'params': {
- 'noplaylist': True,
- 'skip_download': True,
- },
- }, {
- # https://github.com/ytdl-org/youtube-dl/issues/21844
- 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
- 'info_dict': {
- 'title': 'Data Analysis with Dr Mike Pound',
- 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
- 'uploader_id': 'Computerphile',
- 'uploader': 'Computerphile',
- },
- 'playlist_mincount': 11,
- }, {
- 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
- 'only_matching': True,
- }, {
- 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
- 'only_matching': True,
- }, {
- # music album playlist
- 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
- 'only_matching': True,
- }, {
- 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
- 'only_matching': True,
- }]
-
- def _real_initialize(self):
- self._login()
-
- def extract_videos_from_page(self, page):
- ids_in_page = []
- titles_in_page = []
-
- for item in re.findall(
- r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
- attrs = extract_attributes(item)
- video_id = attrs['data-video-id']
- video_title = unescapeHTML(attrs.get('data-title'))
- if video_title:
- video_title = video_title.strip()
- ids_in_page.append(video_id)
- titles_in_page.append(video_title)
-
- # Fallback with old _VIDEO_RE
- self.extract_videos_from_page_impl(
- self._VIDEO_RE, page, ids_in_page, titles_in_page)
-
- # Relaxed fallbacks
- self.extract_videos_from_page_impl(
- r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
- ids_in_page, titles_in_page)
- self.extract_videos_from_page_impl(
- r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
- ids_in_page, titles_in_page)
-
- return zip(ids_in_page, titles_in_page)
-
- def _extract_mix(self, playlist_id):
- # The mixes are generated from a single video
- # the id of the playlist is just 'RD' + video_id
- ids = []
- last_id = playlist_id[-11:]
- for n in itertools.count(1):
- url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
- webpage = self._download_webpage(
- url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
- new_ids = orderedSet(re.findall(
- r'''(?xs)data-video-username=".*?".*?
- href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
- webpage))
- # Fetch new pages until all the videos are repeated, it seems that
- # there are always 51 unique videos.
- new_ids = [_id for _id in new_ids if _id not in ids]
- if not new_ids:
- break
- ids.extend(new_ids)
- last_id = ids[-1]
-
- url_results = self._ids_to_results(ids)
-
- search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
- title_span = (
- search_title('playlist-title')
- or search_title('title long-title')
- or search_title('title'))
- title = clean_html(title_span)
-
- return self.playlist_result(url_results, playlist_id, title)
-
- def _extract_playlist(self, playlist_id):
- url = self._TEMPLATE_URL % playlist_id
- page = self._download_webpage(url, playlist_id)
-
- # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
- for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
- match = match.strip()
- # Check if the playlist exists or is private
- mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
- if mobj:
- reason = mobj.group('reason')
- message = 'This playlist %s' % reason
- if 'private' in reason:
- message += ', use --username or --netrc to access it'
- message += '.'
- raise ExtractorError(message, expected=True)
- elif re.match(r'[^<]*Invalid parameters[^<]*', match):
- raise ExtractorError(
- 'Invalid parameters. Maybe URL is incorrect.',
- expected=True)
- elif re.match(r'[^<]*Choose your language[^<]*', match):
- continue
- else:
- self.report_warning('Youtube gives an alert message: ' + match)
-
- playlist_title = self._html_search_regex(
- r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
- page, 'title', default=None)
-
- _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
- uploader = self._search_regex(
- r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
- page, 'uploader', default=None)
- mobj = re.search(
- r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
- page)
- if mobj:
- uploader_id = mobj.group('uploader_id')
- uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
- else:
- uploader_id = uploader_url = None
-
- has_videos = True
-
- if not playlist_title:
- try:
- # Some playlist URLs don't actually serve a playlist (e.g.
- # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
- next(self._entries(page, playlist_id))
- except StopIteration:
- has_videos = False
-
- playlist = self.playlist_result(
- self._entries(page, playlist_id), playlist_id, playlist_title)
- playlist.update({
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'uploader_url': uploader_url,
- })
-
- return has_videos, playlist
-
- def _check_download_just_video(self, url, playlist_id):
- # Check if it's a video-specific URL
- query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- video_id = query_dict.get('v', [None])[0] or self._search_regex(
- r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
- 'video id', default=None)
- if video_id:
- if self._downloader.params.get('noplaylist'):
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
- return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
- else:
- self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
- return video_id, None
- return None, None
-
- def _real_extract(self, url):
- # Extract playlist id
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
- playlist_id = mobj.group(1) or mobj.group(2)
-
- video_id, video = self._check_download_just_video(url, playlist_id)
- if video:
- return video
-
- if playlist_id.startswith(('RD', 'UL', 'PU')):
- # Mixes require a custom extraction process
- return self._extract_mix(playlist_id)
-
- has_videos, playlist = self._extract_playlist(playlist_id)
- if has_videos or not video_id:
- return playlist
-
- # Some playlist URLs don't actually serve a playlist (see
- # https://github.com/ytdl-org/youtube-dl/issues/10537).
- # Fallback to plain video extraction if there is a video id
- # along with playlist id.
- return self.url_result(video_id, 'Youtube', video_id=video_id)
-
-
-class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
- IE_DESC = 'YouTube.com channels'
- _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
- _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
- _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
- IE_NAME = 'youtube:channel'
- _TESTS = [{
- 'note': 'paginated channel',
- 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
- 'playlist_mincount': 91,
- 'info_dict': {
- 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
- 'title': 'Uploads from lex will',
- 'uploader': 'lex will',
- 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
- }
- }, {
- 'note': 'Age restricted channel',
- # from https://www.youtube.com/user/DeusExOfficial
- 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
- 'playlist_mincount': 64,
- 'info_dict': {
- 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
- 'title': 'Uploads from Deus Ex',
- 'uploader': 'Deus Ex',
- 'uploader_id': 'DeusExOfficial',
- },
- }, {
- 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
- else super(YoutubeChannelIE, cls).suitable(url))
-
- def _build_template_url(self, url, channel_id):
- return self._TEMPLATE_URL % channel_id
-
- def _real_extract(self, url):
- channel_id = self._match_id(url)
-
- url = self._build_template_url(url, channel_id)
-
- # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
- # Workaround by extracting as a playlist if managed to obtain channel playlist URL
- # otherwise fallback on channel by page extraction
- channel_page = self._download_webpage(
- url + '?view=57', channel_id,
- 'Downloading channel page', fatal=False)
- if channel_page is False:
- channel_playlist_id = False
- else:
- channel_playlist_id = self._html_search_meta(
- 'channelId', channel_page, 'channel id', default=None)
- if not channel_playlist_id:
- channel_url = self._html_search_meta(
- ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
- channel_page, 'channel url', default=None)
- if channel_url:
- channel_playlist_id = self._search_regex(
- r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
- channel_url, 'channel id', default=None)
- if channel_playlist_id and channel_playlist_id.startswith('UC'):
- playlist_id = 'UU' + channel_playlist_id[2:]
- return self.url_result(
- compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
-
- channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
- autogenerated = re.search(r'''(?x)
- class="[^"]*?(?:
- channel-header-autogenerated-label|
- yt-channel-title-autogenerated
- )[^"]*"''', channel_page) is not None
-
- if autogenerated:
- # The videos are contained in a single page
- # the ajax pages can't be used, they are empty
- entries = [
- self.url_result(
- video_id, 'Youtube', video_id=video_id,
- video_title=video_title)
- for video_id, video_title in self.extract_videos_from_page(channel_page)]
- return self.playlist_result(entries, channel_id)
-
- try:
- next(self._entries(channel_page, channel_id))
- except StopIteration:
- alert_message = self._html_search_regex(
- r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
- channel_page, 'alert', default=None, group='alert')
- if alert_message:
- raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
-
- return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
-
-
-class YoutubeUserIE(YoutubeChannelIE):
- IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
- _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
- _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
- IE_NAME = 'youtube:user'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
- 'playlist_mincount': 320,
- 'info_dict': {
- 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
- 'title': 'Uploads from The Linux Foundation',
- 'uploader': 'The Linux Foundation',
- 'uploader_id': 'TheLinuxFoundation',
- }
- }, {
- # Only available via https://www.youtube.com/c/12minuteathlete/videos
- # but not https://www.youtube.com/user/12minuteathlete/videos
- 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
- 'playlist_mincount': 249,
- 'info_dict': {
- 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
- 'title': 'Uploads from 12 Minute Athlete',
- 'uploader': '12 Minute Athlete',
- 'uploader_id': 'the12minuteathlete',
- }
- }, {
- 'url': 'ytuser:phihag',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/c/gametrailers',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/gametrailers',
- 'only_matching': True,
- }, {
- # This channel is not available, geo restricted to JP
- 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- # Don't return True if the url can be extracted with other youtube
- # extractor, the regex would is too permissive and it would match.
- other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
- if any(ie.suitable(url) for ie in other_yt_ies):
- return False
- else:
- return super(YoutubeUserIE, cls).suitable(url)
-
- def _build_template_url(self, url, channel_id):
- mobj = re.match(self._VALID_URL, url)
- return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
-
-
-class YoutubeLiveIE(YoutubeBaseInfoExtractor):
- IE_DESC = 'YouTube.com live streams'
- _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
- IE_NAME = 'youtube:live'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
- 'info_dict': {
- 'id': 'a48o2S1cPoo',
- 'ext': 'mp4',
- 'title': 'The Young Turks - Live Main Show',
- 'uploader': 'The Young Turks',
- 'uploader_id': 'TheYoungTurks',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
- 'upload_date': '20150715',
- 'license': 'Standard YouTube License',
- 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
- 'categories': ['News & Politics'],
- 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
- 'like_count': int,
- 'dislike_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/TheYoungTurks/live',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- channel_id = mobj.group('id')
- base_url = mobj.group('base_url')
- webpage = self._download_webpage(url, channel_id, fatal=False)
- if webpage:
- page_type = self._og_search_property(
- 'type', webpage, 'page type', default='')
- video_id = self._html_search_meta(
- 'videoId', webpage, 'video id', default=None)
- if page_type.startswith('video') and video_id and re.match(
- r'^[0-9A-Za-z_-]{11}$', video_id):
- return self.url_result(video_id, YoutubeIE.ie_key())
- return self.url_result(base_url)
-
-
-class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
- IE_DESC = 'YouTube.com user/channel playlists'
- _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
- IE_NAME = 'youtube:playlists'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
- 'playlist_mincount': 4,
- 'info_dict': {
- 'id': 'ThirstForScience',
- 'title': 'ThirstForScience',
- },
- }, {
- # with "Load more" button
- 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
- 'playlist_mincount': 70,
- 'info_dict': {
- 'id': 'igorkle1',
- 'title': 'Игорь Клейнер',
- },
- }, {
- 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
- 'playlist_mincount': 17,
- 'info_dict': {
- 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
- 'title': 'Chem Player',
- },
- 'skip': 'Blocked',
- }]
-
-
-class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
- _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
-
-
-class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
- IE_DESC = 'YouTube.com searches'
- # there doesn't appear to be a real limit, for example if you search for
- # 'python' you get more than 8.000.000 results
- _MAX_RESULTS = float('inf')
- IE_NAME = 'youtube:search'
- _SEARCH_KEY = 'ytsearch'
- _EXTRA_QUERY_ARGS = {}
- _TESTS = []
-
- def _get_n_results(self, query, n):
- """Get a specified number of results for a query"""
-
- videos = []
- limit = n
-
- url_query = {
- 'search_query': query.encode('utf-8'),
- }
- url_query.update(self._EXTRA_QUERY_ARGS)
- result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
-
- for pagenum in itertools.count(1):
- data = self._download_json(
- result_url, video_id='query "%s"' % query,
- note='Downloading page %s' % pagenum,
- errnote='Unable to download API page',
- query={'spf': 'navigate'})
- html_content = data[1]['body']['content']
-
- if 'class="search-message' in html_content:
- raise ExtractorError(
- '[youtube] No video results', expected=True)
-
- new_videos = list(self._process_page(html_content))
- videos += new_videos
- if not new_videos or len(videos) > limit:
- break
- next_link = self._html_search_regex(
- r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
- html_content, 'next link', default=None)
- if next_link is None:
- break
- result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
-
- if len(videos) > n:
- videos = videos[:n]
- return self.playlist_result(videos, query)
-
-
-class YoutubeSearchDateIE(YoutubeSearchIE):
- IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
- _SEARCH_KEY = 'ytsearchdate'
- IE_DESC = 'YouTube.com searches, newest videos first'
- _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
-
-
-class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
- IE_DESC = 'YouTube.com search URLs'
- IE_NAME = 'youtube:search_url'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
- _TESTS = [{
- 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
- 'playlist_mincount': 5,
- 'info_dict': {
- 'title': 'youtube-dl test video',
- }
- }, {
- 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- query = compat_urllib_parse_unquote_plus(mobj.group('query'))
- webpage = self._download_webpage(url, query)
- return self.playlist_result(self._process_page(webpage), playlist_title=query)
-
-
-class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
- IE_DESC = 'YouTube.com (multi-season) shows'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
- IE_NAME = 'youtube:show'
- _TESTS = [{
- 'url': 'https://www.youtube.com/show/airdisasters',
- 'playlist_mincount': 5,
- 'info_dict': {
- 'id': 'airdisasters',
- 'title': 'Air Disasters',
- }
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- return super(YoutubeShowIE, self)._real_extract(
- 'https://www.youtube.com/show/%s/playlists' % playlist_id)
-
-
-class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
- """
- Base class for feed extractors
- Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
- """
- _LOGIN_REQUIRED = True
-
- @property
- def IE_NAME(self):
- return 'youtube:%s' % self._FEED_NAME
-
- def _real_initialize(self):
- self._login()
-
- def _entries(self, page):
- # The extraction process is the same as for playlists, but the regex
- # for the video ids doesn't contain an index
- ids = []
- more_widget_html = content_html = page
- for page_num in itertools.count(1):
- matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
-
- # 'recommended' feed has infinite 'load more' and each new portion spins
- # the same videos in (sometimes) slightly different order, so we'll check
- # for unicity and break when portion has no new videos
- new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
- if not new_ids:
- break
-
- ids.extend(new_ids)
-
- for entry in self._ids_to_results(new_ids):
- yield entry
-
- mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
- if not mobj:
- break
-
- more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
- 'Downloading page #%s' % page_num,
- transform_source=uppercase_escape)
- content_html = more['content_html']
- more_widget_html = more['load_more_widget_html']
-
- def _real_extract(self, url):
- page = self._download_webpage(
- 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
- self._PLAYLIST_TITLE)
- return self.playlist_result(
- self._entries(page), playlist_title=self._PLAYLIST_TITLE)
-
-
-class YoutubeWatchLaterIE(YoutubePlaylistIE):
- IE_NAME = 'youtube:watchlater'
- IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/playlist?list=WL',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- _, video = self._check_download_just_video(url, 'WL')
- if video:
- return video
- _, playlist = self._extract_playlist('WL')
- return playlist
-
-
-class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
- IE_NAME = 'youtube:favorites'
- IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
- _LOGIN_REQUIRED = True
-
- def _real_extract(self, url):
- webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
- playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
- return self.url_result(playlist_id, 'YoutubePlaylist')
-
-
-class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
- _FEED_NAME = 'recommended'
- _PLAYLIST_TITLE = 'Youtube Recommended videos'
-
-
-class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
- _FEED_NAME = 'subscriptions'
- _PLAYLIST_TITLE = 'Youtube Subscriptions'
-
-
-class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
- _FEED_NAME = 'history'
- _PLAYLIST_TITLE = 'Youtube History'
-
-
-class YoutubeTruncatedURLIE(InfoExtractor):
- IE_NAME = 'youtube:truncated_url'
- IE_DESC = False # Do not list
- _VALID_URL = r'''(?x)
- (?:https?://)?
- (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
- (?:watch\?(?:
- feature=[a-z_]+|
- annotation_id=annotation_[^&]+|
- x-yt-cl=[0-9]+|
- hl=[^&]*|
- t=[0-9]+
- )?
- |
- attribution_link\?a=[^&]+
- )
- $
- '''
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?feature=foo',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?hl=en-GB',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?t=2372',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- raise ExtractorError(
- 'Did you forget to quote the URL? Remember that & is a meta '
- 'character in most shells, so you want to put the URL in quotes, '
- 'like youtube-dl '
- '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
- ' or simply youtube-dl BaW_jenozKc .',
- expected=True)
-
-
-class YoutubeTruncatedIDIE(InfoExtractor):
- IE_NAME = 'youtube:truncated_id'
- IE_DESC = False # Do not list
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- raise ExtractorError(
- 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
- expected=True)
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
deleted file mode 100644
index 7bda596..0000000
--- a/youtube_dl/jsinterp.py
+++ /dev/null
@@ -1,262 +0,0 @@
-from __future__ import unicode_literals
-
-import json
-import operator
-import re
-
-from .utils import (
- ExtractorError,
- remove_quotes,
-)
-
-_OPERATORS = [
- ('|', operator.or_),
- ('^', operator.xor),
- ('&', operator.and_),
- ('>>', operator.rshift),
- ('<<', operator.lshift),
- ('-', operator.sub),
- ('+', operator.add),
- ('%', operator.mod),
- ('/', operator.truediv),
- ('*', operator.mul),
-]
-_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
-_ASSIGN_OPERATORS.append(('=', lambda cur, right: right))
-
-_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
-
-
-class JSInterpreter(object):
- def __init__(self, code, objects=None):
- if objects is None:
- objects = {}
- self.code = code
- self._functions = {}
- self._objects = objects
-
- def interpret_statement(self, stmt, local_vars, allow_recursion=100):
- if allow_recursion < 0:
- raise ExtractorError('Recursion limit reached')
-
- should_abort = False
- stmt = stmt.lstrip()
- stmt_m = re.match(r'var\s', stmt)
- if stmt_m:
- expr = stmt[len(stmt_m.group(0)):]
- else:
- return_m = re.match(r'return(?:\s+|$)', stmt)
- if return_m:
- expr = stmt[len(return_m.group(0)):]
- should_abort = True
- else:
- # Try interpreting it as an expression
- expr = stmt
-
- v = self.interpret_expression(expr, local_vars, allow_recursion)
- return v, should_abort
-
- def interpret_expression(self, expr, local_vars, allow_recursion):
- expr = expr.strip()
- if expr == '': # Empty expression
- return None
-
- if expr.startswith('('):
- parens_count = 0
- for m in re.finditer(r'[()]', expr):
- if m.group(0) == '(':
- parens_count += 1
- else:
- parens_count -= 1
- if parens_count == 0:
- sub_expr = expr[1:m.start()]
- sub_result = self.interpret_expression(
- sub_expr, local_vars, allow_recursion)
- remaining_expr = expr[m.end():].strip()
- if not remaining_expr:
- return sub_result
- else:
- expr = json.dumps(sub_result) + remaining_expr
- break
- else:
- raise ExtractorError('Premature end of parens in %r' % expr)
-
- for op, opfunc in _ASSIGN_OPERATORS:
- m = re.match(r'''(?x)
- (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
- \s*%s
- (?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
- if not m:
- continue
- right_val = self.interpret_expression(
- m.group('expr'), local_vars, allow_recursion - 1)
-
- if m.groupdict().get('index'):
- lvar = local_vars[m.group('out')]
- idx = self.interpret_expression(
- m.group('index'), local_vars, allow_recursion)
- assert isinstance(idx, int)
- cur = lvar[idx]
- val = opfunc(cur, right_val)
- lvar[idx] = val
- return val
- else:
- cur = local_vars.get(m.group('out'))
- val = opfunc(cur, right_val)
- local_vars[m.group('out')] = val
- return val
-
- if expr.isdigit():
- return int(expr)
-
- var_m = re.match(
- r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE,
- expr)
- if var_m:
- return local_vars[var_m.group('name')]
-
- try:
- return json.loads(expr)
- except ValueError:
- pass
-
- m = re.match(
- r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
- if m:
- val = local_vars[m.group('in')]
- idx = self.interpret_expression(
- m.group('idx'), local_vars, allow_recursion - 1)
- return val[idx]
-
- m = re.match(
- r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,
- expr)
- if m:
- variable = m.group('var')
- member = remove_quotes(m.group('member') or m.group('member2'))
- arg_str = m.group('args')
-
- if variable in local_vars:
- obj = local_vars[variable]
- else:
- if variable not in self._objects:
- self._objects[variable] = self.extract_object(variable)
- obj = self._objects[variable]
-
- if arg_str is None:
- # Member access
- if member == 'length':
- return len(obj)
- return obj[member]
-
- assert expr.endswith(')')
- # Function call
- if arg_str == '':
- argvals = tuple()
- else:
- argvals = tuple([
- self.interpret_expression(v, local_vars, allow_recursion)
- for v in arg_str.split(',')])
-
- if member == 'split':
- assert argvals == ('',)
- return list(obj)
- if member == 'join':
- assert len(argvals) == 1
- return argvals[0].join(obj)
- if member == 'reverse':
- assert len(argvals) == 0
- obj.reverse()
- return obj
- if member == 'slice':
- assert len(argvals) == 1
- return obj[argvals[0]:]
- if member == 'splice':
- assert isinstance(obj, list)
- index, howMany = argvals
- res = []
- for i in range(index, min(index + howMany, len(obj))):
- res.append(obj.pop(index))
- return res
-
- return obj[member](argvals)
-
- for op, opfunc in _OPERATORS:
- m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr)
- if not m:
- continue
- x, abort = self.interpret_statement(
- m.group('x'), local_vars, allow_recursion - 1)
- if abort:
- raise ExtractorError(
- 'Premature left-side return of %s in %r' % (op, expr))
- y, abort = self.interpret_statement(
- m.group('y'), local_vars, allow_recursion - 1)
- if abort:
- raise ExtractorError(
- 'Premature right-side return of %s in %r' % (op, expr))
- return opfunc(x, y)
-
- m = re.match(
- r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
- if m:
- fname = m.group('func')
- argvals = tuple([
- int(v) if v.isdigit() else local_vars[v]
- for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple()
- if fname not in self._functions:
- self._functions[fname] = self.extract_function(fname)
- return self._functions[fname](argvals)
-
- raise ExtractorError('Unsupported JS expression %r' % expr)
-
- def extract_object(self, objname):
- _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
- obj = {}
- obj_m = re.search(
- r'''(?x)
- (?<!this\.)%s\s*=\s*{\s*
- (?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
- }\s*;
- ''' % (re.escape(objname), _FUNC_NAME_RE),
- self.code)
- fields = obj_m.group('fields')
- # Currently, it only supports function definitions
- fields_m = re.finditer(
- r'''(?x)
- (?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}
- ''' % _FUNC_NAME_RE,
- fields)
- for f in fields_m:
- argnames = f.group('args').split(',')
- obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code'))
-
- return obj
-
- def extract_function(self, funcname):
- func_m = re.search(
- r'''(?x)
- (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
- \((?P<args>[^)]*)\)\s*
- \{(?P<code>[^}]+)\}''' % (
- re.escape(funcname), re.escape(funcname), re.escape(funcname)),
- self.code)
- if func_m is None:
- raise ExtractorError('Could not find JS function %r' % funcname)
- argnames = func_m.group('args').split(',')
-
- return self.build_function(argnames, func_m.group('code'))
-
- def call_function(self, funcname, *args):
- f = self.extract_function(funcname)
- return f(args)
-
- def build_function(self, argnames, code):
- def resf(args):
- local_vars = dict(zip(argnames, args))
- for stmt in code.split(';'):
- res, abort = self.interpret_statement(stmt, local_vars)
- if abort:
- break
- return res
- return resf
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
deleted file mode 100644
index e7d8e89..0000000
--- a/youtube_dl/options.py
+++ /dev/null
@@ -1,916 +0,0 @@
-from __future__ import unicode_literals
-
-import os.path
-import optparse
-import re
-import sys
-
-from .downloader.external import list_external_downloaders
-from .compat import (
- compat_expanduser,
- compat_get_terminal_size,
- compat_getenv,
- compat_kwargs,
- compat_shlex_split,
-)
-from .utils import (
- preferredencoding,
- write_string,
-)
-from .version import __version__
-
-
-def _hide_login_info(opts):
- PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'])
- eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
-
- def _scrub_eq(o):
- m = eqre.match(o)
- if m:
- return m.group('key') + '=PRIVATE'
- else:
- return o
-
- opts = list(map(_scrub_eq, opts))
- for idx, opt in enumerate(opts):
- if opt in PRIVATE_OPTS and idx + 1 < len(opts):
- opts[idx + 1] = 'PRIVATE'
- return opts
-
-
-def parseOpts(overrideArguments=None):
- def _readOptions(filename_bytes, default=[]):
- try:
- optionf = open(filename_bytes)
- except IOError:
- return default # silently skip if file is not present
- try:
- # FIXME: https://github.com/rg3/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
- contents = optionf.read()
- if sys.version_info < (3,):
- contents = contents.decode(preferredencoding())
- res = compat_shlex_split(contents, comments=True)
- finally:
- optionf.close()
- return res
-
- def _readUserConf():
- xdg_config_home = compat_getenv('XDG_CONFIG_HOME')
- if xdg_config_home:
- userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
- if not os.path.isfile(userConfFile):
- userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
- else:
- userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl', 'config')
- if not os.path.isfile(userConfFile):
- userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl.conf')
- userConf = _readOptions(userConfFile, None)
-
- if userConf is None:
- appdata_dir = compat_getenv('appdata')
- if appdata_dir:
- userConf = _readOptions(
- os.path.join(appdata_dir, 'youtube-dl', 'config'),
- default=None)
- if userConf is None:
- userConf = _readOptions(
- os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
- default=None)
-
- if userConf is None:
- userConf = _readOptions(
- os.path.join(compat_expanduser('~'), 'youtube-dl.conf'),
- default=None)
- if userConf is None:
- userConf = _readOptions(
- os.path.join(compat_expanduser('~'), 'youtube-dl.conf.txt'),
- default=None)
-
- if userConf is None:
- userConf = []
-
- return userConf
-
- def _format_option_string(option):
- ''' ('-o', '--option') -> -o, --format METAVAR'''
-
- opts = []
-
- if option._short_opts:
- opts.append(option._short_opts[0])
- if option._long_opts:
- opts.append(option._long_opts[0])
- if len(opts) > 1:
- opts.insert(1, ', ')
-
- if option.takes_value():
- opts.append(' %s' % option.metavar)
-
- return ''.join(opts)
-
- def _comma_separated_values_options_callback(option, opt_str, value, parser):
- setattr(parser.values, option.dest, value.split(','))
-
- # No need to wrap help messages if we're on a wide console
- columns = compat_get_terminal_size().columns
- max_width = columns if columns else 80
- max_help_position = 80
-
- fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
- fmt.format_option_strings = _format_option_string
-
- kw = {
- 'version': __version__,
- 'formatter': fmt,
- 'usage': '%prog [OPTIONS] URL [URL...]',
- 'conflict_handler': 'resolve',
- }
-
- parser = optparse.OptionParser(**compat_kwargs(kw))
-
- general = optparse.OptionGroup(parser, 'General Options')
- general.add_option(
- '-h', '--help',
- action='help',
- help='Print this help text and exit')
- general.add_option(
- '-v', '--version',
- action='version',
- help='Print program version and exit')
- general.add_option(
- '-U', '--update',
- action='store_true', dest='update_self',
- help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
- general.add_option(
- '-i', '--ignore-errors',
- action='store_true', dest='ignoreerrors', default=False,
- help='Continue on download errors, for example to skip unavailable videos in a playlist')
- general.add_option(
- '--abort-on-error',
- action='store_false', dest='ignoreerrors',
- help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
- general.add_option(
- '--dump-user-agent',
- action='store_true', dest='dump_user_agent', default=False,
- help='Display the current browser identification')
- general.add_option(
- '--list-extractors',
- action='store_true', dest='list_extractors', default=False,
- help='List all supported extractors')
- general.add_option(
- '--extractor-descriptions',
- action='store_true', dest='list_extractor_descriptions', default=False,
- help='Output descriptions of all supported extractors')
- general.add_option(
- '--force-generic-extractor',
- action='store_true', dest='force_generic_extractor', default=False,
- help='Force extraction to use the generic extractor')
- general.add_option(
- '--default-search',
- dest='default_search', metavar='PREFIX',
- help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
- general.add_option(
- '--ignore-config',
- action='store_true',
- help='Do not read configuration files. '
- 'When given in the global configuration file /etc/youtube-dl.conf: '
- 'Do not read the user configuration in ~/.config/youtube-dl/config '
- '(%APPDATA%/youtube-dl/config.txt on Windows)')
- general.add_option(
- '--config-location',
- dest='config_location', metavar='PATH',
- help='Location of the configuration file; either the path to the config or its containing directory.')
- general.add_option(
- '--flat-playlist',
- action='store_const', dest='extract_flat', const='in_playlist',
- default=False,
- help='Do not extract the videos of a playlist, only list them.')
- general.add_option(
- '--mark-watched',
- action='store_true', dest='mark_watched', default=False,
- help='Mark videos watched (YouTube only)')
- general.add_option(
- '--no-mark-watched',
- action='store_false', dest='mark_watched', default=False,
- help='Do not mark videos watched (YouTube only)')
- general.add_option(
- '--no-color', '--no-colors',
- action='store_true', dest='no_color',
- default=False,
- help='Do not emit color codes in output')
-
- network = optparse.OptionGroup(parser, 'Network Options')
- network.add_option(
- '--proxy', dest='proxy',
- default=None, metavar='URL',
- help='Use the specified HTTP/HTTPS/SOCKS proxy. To enable '
- 'SOCKS proxy, specify a proper scheme. For example '
- 'socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") '
- 'for direct connection')
- network.add_option(
- '--socket-timeout',
- dest='socket_timeout', type=float, default=None, metavar='SECONDS',
- help='Time to wait before giving up, in seconds')
- network.add_option(
- '--source-address',
- metavar='IP', dest='source_address', default=None,
- help='Client-side IP address to bind to',
- )
- network.add_option(
- '-4', '--force-ipv4',
- action='store_const', const='0.0.0.0', dest='source_address',
- help='Make all connections via IPv4',
- )
- network.add_option(
- '-6', '--force-ipv6',
- action='store_const', const='::', dest='source_address',
- help='Make all connections via IPv6',
- )
-
- geo = optparse.OptionGroup(parser, 'Geo Restriction')
- geo.add_option(
- '--geo-verification-proxy',
- dest='geo_verification_proxy', default=None, metavar='URL',
- help='Use this proxy to verify the IP address for some geo-restricted sites. '
- 'The default proxy specified by --proxy (or none, if the option is not present) is used for the actual downloading.')
- geo.add_option(
- '--cn-verification-proxy',
- dest='cn_verification_proxy', default=None, metavar='URL',
- help=optparse.SUPPRESS_HELP)
- geo.add_option(
- '--geo-bypass',
- action='store_true', dest='geo_bypass', default=True,
- help='Bypass geographic restriction via faking X-Forwarded-For HTTP header')
- geo.add_option(
- '--no-geo-bypass',
- action='store_false', dest='geo_bypass', default=True,
- help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header')
- geo.add_option(
- '--geo-bypass-country', metavar='CODE',
- dest='geo_bypass_country', default=None,
- help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code')
- geo.add_option(
- '--geo-bypass-ip-block', metavar='IP_BLOCK',
- dest='geo_bypass_ip_block', default=None,
- help='Force bypass geographic restriction with explicitly provided IP block in CIDR notation')
-
- selection = optparse.OptionGroup(parser, 'Video Selection')
- selection.add_option(
- '--playlist-start',
- dest='playliststart', metavar='NUMBER', default=1, type=int,
- help='Playlist video to start at (default is %default)')
- selection.add_option(
- '--playlist-end',
- dest='playlistend', metavar='NUMBER', default=None, type=int,
- help='Playlist video to end at (default is last)')
- selection.add_option(
- '--playlist-items',
- dest='playlist_items', metavar='ITEM_SPEC', default=None,
- help='Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
- selection.add_option(
- '--match-title',
- dest='matchtitle', metavar='REGEX',
- help='Download only matching titles (regex or caseless sub-string)')
- selection.add_option(
- '--reject-title',
- dest='rejecttitle', metavar='REGEX',
- help='Skip download for matching titles (regex or caseless sub-string)')
- selection.add_option(
- '--max-downloads',
- dest='max_downloads', metavar='NUMBER', type=int, default=None,
- help='Abort after downloading NUMBER files')
- selection.add_option(
- '--min-filesize',
- metavar='SIZE', dest='min_filesize', default=None,
- help='Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)')
- selection.add_option(
- '--max-filesize',
- metavar='SIZE', dest='max_filesize', default=None,
- help='Do not download any videos larger than SIZE (e.g. 50k or 44.6m)')
- selection.add_option(
- '--date',
- metavar='DATE', dest='date', default=None,
- help='Download only videos uploaded in this date')
- selection.add_option(
- '--datebefore',
- metavar='DATE', dest='datebefore', default=None,
- help='Download only videos uploaded on or before this date (i.e. inclusive)')
- selection.add_option(
- '--dateafter',
- metavar='DATE', dest='dateafter', default=None,
- help='Download only videos uploaded on or after this date (i.e. inclusive)')
- selection.add_option(
- '--min-views',
- metavar='COUNT', dest='min_views', default=None, type=int,
- help='Do not download any videos with less than COUNT views')
- selection.add_option(
- '--max-views',
- metavar='COUNT', dest='max_views', default=None, type=int,
- help='Do not download any videos with more than COUNT views')
- selection.add_option(
- '--match-filter',
- metavar='FILTER', dest='match_filter', default=None,
- help=(
- 'Generic video filter. '
- 'Specify any key (see the "OUTPUT TEMPLATE" for a list of available keys) to '
- 'match if the key is present, '
- '!key to check if the key is not present, '
- 'key > NUMBER (like "comment_count > 12", also works with '
- '>=, <, <=, !=, =) to compare against a number, '
- 'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) '
- 'to match against a string literal '
- 'and & to require multiple matches. '
- 'Values which are not known are excluded unless you '
- 'put a question mark (?) after the operator. '
- 'For example, to only match videos that have been liked more than '
- '100 times and disliked less than 50 times (or the dislike '
- 'functionality is not available at the given service), but who '
- 'also have a description, use --match-filter '
- '"like_count > 100 & dislike_count <? 50 & description" .'
- ))
- selection.add_option(
- '--no-playlist',
- action='store_true', dest='noplaylist', default=False,
- help='Download only the video, if the URL refers to a video and a playlist.')
- selection.add_option(
- '--yes-playlist',
- action='store_false', dest='noplaylist', default=False,
- help='Download the playlist, if the URL refers to a video and a playlist.')
- selection.add_option(
- '--age-limit',
- metavar='YEARS', dest='age_limit', default=None, type=int,
- help='Download only videos suitable for the given age')
- selection.add_option(
- '--download-archive', metavar='FILE',
- dest='download_archive',
- help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
- selection.add_option(
- '--include-ads',
- dest='include_ads', action='store_true',
- help='Download advertisements as well (experimental)')
-
- authentication = optparse.OptionGroup(parser, 'Authentication Options')
- authentication.add_option(
- '-u', '--username',
- dest='username', metavar='USERNAME',
- help='Login with this account ID')
- authentication.add_option(
- '-p', '--password',
- dest='password', metavar='PASSWORD',
- help='Account password. If this option is left out, youtube-dl will ask interactively.')
- authentication.add_option(
- '-2', '--twofactor',
- dest='twofactor', metavar='TWOFACTOR',
- help='Two-factor authentication code')
- authentication.add_option(
- '-n', '--netrc',
- action='store_true', dest='usenetrc', default=False,
- help='Use .netrc authentication data')
- authentication.add_option(
- '--video-password',
- dest='videopassword', metavar='PASSWORD',
- help='Video password (vimeo, smotri, youku)')
-
- adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options')
- adobe_pass.add_option(
- '--ap-mso',
- dest='ap_mso', metavar='MSO',
- help='Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs')
- adobe_pass.add_option(
- '--ap-username',
- dest='ap_username', metavar='USERNAME',
- help='Multiple-system operator account login')
- adobe_pass.add_option(
- '--ap-password',
- dest='ap_password', metavar='PASSWORD',
- help='Multiple-system operator account password. If this option is left out, youtube-dl will ask interactively.')
- adobe_pass.add_option(
- '--ap-list-mso',
- action='store_true', dest='ap_list_mso', default=False,
- help='List all supported multiple-system operators')
-
- video_format = optparse.OptionGroup(parser, 'Video Format Options')
- video_format.add_option(
- '-f', '--format',
- action='store', dest='format', metavar='FORMAT', default=None,
- help='Video format code, see the "FORMAT SELECTION" for all the info')
- video_format.add_option(
- '--all-formats',
- action='store_const', dest='format', const='all',
- help='Download all available video formats')
- video_format.add_option(
- '--prefer-free-formats',
- action='store_true', dest='prefer_free_formats', default=False,
- help='Prefer free video formats unless a specific one is requested')
- video_format.add_option(
- '-F', '--list-formats',
- action='store_true', dest='listformats',
- help='List all available formats of requested videos')
- video_format.add_option(
- '--youtube-include-dash-manifest',
- action='store_true', dest='youtube_include_dash_manifest', default=True,
- help=optparse.SUPPRESS_HELP)
- video_format.add_option(
- '--youtube-skip-dash-manifest',
- action='store_false', dest='youtube_include_dash_manifest',
- help='Do not download the DASH manifests and related data on YouTube videos')
- video_format.add_option(
- '--merge-output-format',
- action='store', dest='merge_output_format', metavar='FORMAT', default=None,
- help=(
- 'If a merge is required (e.g. bestvideo+bestaudio), '
- 'output to given container format. One of mkv, mp4, ogg, webm, flv. '
- 'Ignored if no merge is required'))
-
- subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
- subtitles.add_option(
- '--write-sub', '--write-srt',
- action='store_true', dest='writesubtitles', default=False,
- help='Write subtitle file')
- subtitles.add_option(
- '--write-auto-sub', '--write-automatic-sub',
- action='store_true', dest='writeautomaticsub', default=False,
- help='Write automatically generated subtitle file (YouTube only)')
- subtitles.add_option(
- '--all-subs',
- action='store_true', dest='allsubtitles', default=False,
- help='Download all the available subtitles of the video')
- subtitles.add_option(
- '--list-subs',
- action='store_true', dest='listsubtitles', default=False,
- help='List all available subtitles for the video')
- subtitles.add_option(
- '--sub-format',
- action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
- help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"')
- subtitles.add_option(
- '--sub-lang', '--sub-langs', '--srt-lang',
- action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
- default=[], callback=_comma_separated_values_options_callback,
- help='Languages of the subtitles to download (optional) separated by commas, use --list-subs for available language tags')
-
- downloader = optparse.OptionGroup(parser, 'Download Options')
- downloader.add_option(
- '-r', '--limit-rate', '--rate-limit',
- dest='ratelimit', metavar='RATE',
- help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
- downloader.add_option(
- '-R', '--retries',
- dest='retries', metavar='RETRIES', default=10,
- help='Number of retries (default is %default), or "infinite".')
- downloader.add_option(
- '--fragment-retries',
- dest='fragment_retries', metavar='RETRIES', default=10,
- help='Number of retries for a fragment (default is %default), or "infinite" (DASH, hlsnative and ISM)')
- downloader.add_option(
- '--skip-unavailable-fragments',
- action='store_true', dest='skip_unavailable_fragments', default=True,
- help='Skip unavailable fragments (DASH, hlsnative and ISM)')
- downloader.add_option(
- '--abort-on-unavailable-fragment',
- action='store_false', dest='skip_unavailable_fragments',
- help='Abort downloading when some fragment is not available')
- downloader.add_option(
- '--keep-fragments',
- action='store_true', dest='keep_fragments', default=False,
- help='Keep downloaded fragments on disk after downloading is finished; fragments are erased by default')
- downloader.add_option(
- '--buffer-size',
- dest='buffersize', metavar='SIZE', default='1024',
- help='Size of download buffer (e.g. 1024 or 16K) (default is %default)')
- downloader.add_option(
- '--no-resize-buffer',
- action='store_true', dest='noresizebuffer', default=False,
- help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
- downloader.add_option(
- '--http-chunk-size',
- dest='http_chunk_size', metavar='SIZE', default=None,
- help='Size of a chunk for chunk-based HTTP downloading (e.g. 10485760 or 10M) (default is disabled). '
- 'May be useful for bypassing bandwidth throttling imposed by a webserver (experimental)')
- downloader.add_option(
- '--test',
- action='store_true', dest='test', default=False,
- help=optparse.SUPPRESS_HELP)
- downloader.add_option(
- '--playlist-reverse',
- action='store_true',
- help='Download playlist videos in reverse order')
- downloader.add_option(
- '--playlist-random',
- action='store_true',
- help='Download playlist videos in random order')
- downloader.add_option(
- '--xattr-set-filesize',
- dest='xattr_set_filesize', action='store_true',
- help='Set file xattribute ytdl.filesize with expected file size')
- downloader.add_option(
- '--hls-prefer-native',
- dest='hls_prefer_native', action='store_true', default=None,
- help='Use the native HLS downloader instead of ffmpeg')
- downloader.add_option(
- '--hls-prefer-ffmpeg',
- dest='hls_prefer_native', action='store_false', default=None,
- help='Use ffmpeg instead of the native HLS downloader')
- downloader.add_option(
- '--hls-use-mpegts',
- dest='hls_use_mpegts', action='store_true',
- help='Use the mpegts container for HLS videos, allowing to play the '
- 'video while downloading (some players may not be able to play it)')
- downloader.add_option(
- '--external-downloader',
- dest='external_downloader', metavar='COMMAND',
- help='Use the specified external downloader. '
- 'Currently supports %s' % ','.join(list_external_downloaders()))
- downloader.add_option(
- '--external-downloader-args',
- dest='external_downloader_args', metavar='ARGS',
- help='Give these arguments to the external downloader')
-
- workarounds = optparse.OptionGroup(parser, 'Workarounds')
- workarounds.add_option(
- '--encoding',
- dest='encoding', metavar='ENCODING',
- help='Force the specified encoding (experimental)')
- workarounds.add_option(
- '--no-check-certificate',
- action='store_true', dest='no_check_certificate', default=False,
- help='Suppress HTTPS certificate validation')
- workarounds.add_option(
- '--prefer-insecure',
- '--prefer-unsecure', action='store_true', dest='prefer_insecure',
- help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
- workarounds.add_option(
- '--user-agent',
- metavar='UA', dest='user_agent',
- help='Specify a custom user agent')
- workarounds.add_option(
- '--referer',
- metavar='URL', dest='referer', default=None,
- help='Specify a custom referer, use if the video access is restricted to one domain',
- )
- workarounds.add_option(
- '--add-header',
- metavar='FIELD:VALUE', dest='headers', action='append',
- help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
- )
- workarounds.add_option(
- '--bidi-workaround',
- dest='bidi_workaround', action='store_true',
- help='Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
- workarounds.add_option(
- '--sleep-interval', '--min-sleep-interval', metavar='SECONDS',
- dest='sleep_interval', type=float,
- help=(
- 'Number of seconds to sleep before each download when used alone '
- 'or a lower bound of a range for randomized sleep before each download '
- '(minimum possible number of seconds to sleep) when used along with '
- '--max-sleep-interval.'))
- workarounds.add_option(
- '--max-sleep-interval', metavar='SECONDS',
- dest='max_sleep_interval', type=float,
- help=(
- 'Upper bound of a range for randomized sleep before each download '
- '(maximum possible number of seconds to sleep). Must only be used '
- 'along with --min-sleep-interval.'))
-
- verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
- verbosity.add_option(
- '-q', '--quiet',
- action='store_true', dest='quiet', default=False,
- help='Activate quiet mode')
- verbosity.add_option(
- '--no-warnings',
- dest='no_warnings', action='store_true', default=False,
- help='Ignore warnings')
- verbosity.add_option(
- '-s', '--simulate',
- action='store_true', dest='simulate', default=False,
- help='Do not download the video and do not write anything to disk')
- verbosity.add_option(
- '--skip-download',
- action='store_true', dest='skip_download', default=False,
- help='Do not download the video')
- verbosity.add_option(
- '-g', '--get-url',
- action='store_true', dest='geturl', default=False,
- help='Simulate, quiet but print URL')
- verbosity.add_option(
- '-e', '--get-title',
- action='store_true', dest='gettitle', default=False,
- help='Simulate, quiet but print title')
- verbosity.add_option(
- '--get-id',
- action='store_true', dest='getid', default=False,
- help='Simulate, quiet but print id')
- verbosity.add_option(
- '--get-thumbnail',
- action='store_true', dest='getthumbnail', default=False,
- help='Simulate, quiet but print thumbnail URL')
- verbosity.add_option(
- '--get-description',
- action='store_true', dest='getdescription', default=False,
- help='Simulate, quiet but print video description')
- verbosity.add_option(
- '--get-duration',
- action='store_true', dest='getduration', default=False,
- help='Simulate, quiet but print video length')
- verbosity.add_option(
- '--get-filename',
- action='store_true', dest='getfilename', default=False,
- help='Simulate, quiet but print output filename')
- verbosity.add_option(
- '--get-format',
- action='store_true', dest='getformat', default=False,
- help='Simulate, quiet but print output format')
- verbosity.add_option(
- '-j', '--dump-json',
- action='store_true', dest='dumpjson', default=False,
- help='Simulate, quiet but print JSON information. See the "OUTPUT TEMPLATE" for a description of available keys.')
- verbosity.add_option(
- '-J', '--dump-single-json',
- action='store_true', dest='dump_single_json', default=False,
- help='Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
- verbosity.add_option(
- '--print-json',
- action='store_true', dest='print_json', default=False,
- help='Be quiet and print the video information as JSON (video is still being downloaded).',
- )
- verbosity.add_option(
- '--newline',
- action='store_true', dest='progress_with_newline', default=False,
- help='Output progress bar as new lines')
- verbosity.add_option(
- '--no-progress',
- action='store_true', dest='noprogress', default=False,
- help='Do not print progress bar')
- verbosity.add_option(
- '--console-title',
- action='store_true', dest='consoletitle', default=False,
- help='Display progress in console titlebar')
- verbosity.add_option(
- '-v', '--verbose',
- action='store_true', dest='verbose', default=False,
- help='Print various debugging information')
- verbosity.add_option(
- '--dump-pages', '--dump-intermediate-pages',
- action='store_true', dest='dump_intermediate_pages', default=False,
- help='Print downloaded pages encoded using base64 to debug problems (very verbose)')
- verbosity.add_option(
- '--write-pages',
- action='store_true', dest='write_pages', default=False,
- help='Write downloaded intermediary pages to files in the current directory to debug problems')
- verbosity.add_option(
- '--youtube-print-sig-code',
- action='store_true', dest='youtube_print_sig_code', default=False,
- help=optparse.SUPPRESS_HELP)
- verbosity.add_option(
- '--print-traffic', '--dump-headers',
- dest='debug_printtraffic', action='store_true', default=False,
- help='Display sent and read HTTP traffic')
- verbosity.add_option(
- '-C', '--call-home',
- dest='call_home', action='store_true', default=False,
- help='Contact the youtube-dl server for debugging')
- verbosity.add_option(
- '--no-call-home',
- dest='call_home', action='store_false', default=False,
- help='Do NOT contact the youtube-dl server for debugging')
-
- filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
- filesystem.add_option(
- '-a', '--batch-file',
- dest='batchfile', metavar='FILE',
- help="File containing URLs to download ('-' for stdin), one URL per line. "
- "Lines starting with '#', ';' or ']' are considered as comments and ignored.")
- filesystem.add_option(
- '--id', default=False,
- action='store_true', dest='useid', help='Use only video ID in file name')
- filesystem.add_option(
- '-o', '--output',
- dest='outtmpl', metavar='TEMPLATE',
- help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info'))
- filesystem.add_option(
- '--autonumber-size',
- dest='autonumber_size', metavar='NUMBER', type=int,
- help=optparse.SUPPRESS_HELP)
- filesystem.add_option(
- '--autonumber-start',
- dest='autonumber_start', metavar='NUMBER', default=1, type=int,
- help='Specify the start value for %(autonumber)s (default is %default)')
- filesystem.add_option(
- '--restrict-filenames',
- action='store_true', dest='restrictfilenames', default=False,
- help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames')
- filesystem.add_option(
- '-A', '--auto-number',
- action='store_true', dest='autonumber', default=False,
- help=optparse.SUPPRESS_HELP)
- filesystem.add_option(
- '-t', '--title',
- action='store_true', dest='usetitle', default=False,
- help=optparse.SUPPRESS_HELP)
- filesystem.add_option(
- '-l', '--literal', default=False,
- action='store_true', dest='usetitle',
- help=optparse.SUPPRESS_HELP)
- filesystem.add_option(
- '-w', '--no-overwrites',
- action='store_true', dest='nooverwrites', default=False,
- help='Do not overwrite files')
- filesystem.add_option(
- '-c', '--continue',
- action='store_true', dest='continue_dl', default=True,
- help='Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.')
- filesystem.add_option(
- '--no-continue',
- action='store_false', dest='continue_dl',
- help='Do not resume partially downloaded files (restart from beginning)')
- filesystem.add_option(
- '--no-part',
- action='store_true', dest='nopart', default=False,
- help='Do not use .part files - write directly into output file')
- filesystem.add_option(
- '--no-mtime',
- action='store_false', dest='updatetime', default=True,
- help='Do not use the Last-modified header to set the file modification time')
- filesystem.add_option(
- '--write-description',
- action='store_true', dest='writedescription', default=False,
- help='Write video description to a .description file')
- filesystem.add_option(
- '--write-info-json',
- action='store_true', dest='writeinfojson', default=False,
- help='Write video metadata to a .info.json file')
- filesystem.add_option(
- '--write-annotations',
- action='store_true', dest='writeannotations', default=False,
- help='Write video annotations to a .annotations.xml file')
- filesystem.add_option(
- '--load-info-json', '--load-info',
- dest='load_info_filename', metavar='FILE',
- help='JSON file containing the video information (created with the "--write-info-json" option)')
- filesystem.add_option(
- '--cookies',
- dest='cookiefile', metavar='FILE',
- help='File to read cookies from and dump cookie jar in')
- filesystem.add_option(
- '--cache-dir', dest='cachedir', default=None, metavar='DIR',
- help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
- filesystem.add_option(
- '--no-cache-dir', action='store_const', const=False, dest='cachedir',
- help='Disable filesystem caching')
- filesystem.add_option(
- '--rm-cache-dir',
- action='store_true', dest='rm_cachedir',
- help='Delete all filesystem cache files')
-
- thumbnail = optparse.OptionGroup(parser, 'Thumbnail images')
- thumbnail.add_option(
- '--write-thumbnail',
- action='store_true', dest='writethumbnail', default=False,
- help='Write thumbnail image to disk')
- thumbnail.add_option(
- '--write-all-thumbnails',
- action='store_true', dest='write_all_thumbnails', default=False,
- help='Write all thumbnail image formats to disk')
- thumbnail.add_option(
- '--list-thumbnails',
- action='store_true', dest='list_thumbnails', default=False,
- help='Simulate and list all available thumbnail formats')
-
- postproc = optparse.OptionGroup(parser, 'Post-processing Options')
- postproc.add_option(
- '-x', '--extract-audio',
- action='store_true', dest='extractaudio', default=False,
- help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
- postproc.add_option(
- '--audio-format', metavar='FORMAT', dest='audioformat', default='best',
- help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')
- postproc.add_option(
- '--audio-quality', metavar='QUALITY',
- dest='audioquality', default='5',
- help='Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
- postproc.add_option(
- '--recode-video',
- metavar='FORMAT', dest='recodevideo', default=None,
- help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)')
- postproc.add_option(
- '--postprocessor-args',
- dest='postprocessor_args', metavar='ARGS',
- help='Give these arguments to the postprocessor')
- postproc.add_option(
- '-k', '--keep-video',
- action='store_true', dest='keepvideo', default=False,
- help='Keep the video file on disk after the post-processing; the video is erased by default')
- postproc.add_option(
- '--no-post-overwrites',
- action='store_true', dest='nopostoverwrites', default=False,
- help='Do not overwrite post-processed files; the post-processed files are overwritten by default')
- postproc.add_option(
- '--embed-subs',
- action='store_true', dest='embedsubtitles', default=False,
- help='Embed subtitles in the video (only for mp4, webm and mkv videos)')
- postproc.add_option(
- '--embed-thumbnail',
- action='store_true', dest='embedthumbnail', default=False,
- help='Embed thumbnail in the audio as cover art')
- postproc.add_option(
- '--add-metadata',
- action='store_true', dest='addmetadata', default=False,
- help='Write metadata to the video file')
- postproc.add_option(
- '--metadata-from-title',
- metavar='FORMAT', dest='metafromtitle',
- help='Parse additional metadata like song title / artist from the video title. '
- 'The format syntax is the same as --output. Regular expression with '
- 'named capture groups may also be used. '
- 'The parsed parameters replace existing values. '
- 'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
- '"Coldplay - Paradise". '
- 'Example (regex): --metadata-from-title "(?P<artist>.+?) - (?P<title>.+)"')
- postproc.add_option(
- '--xattrs',
- action='store_true', dest='xattrs', default=False,
- help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
- postproc.add_option(
- '--fixup',
- metavar='POLICY', dest='fixup', default='detect_or_warn',
- help='Automatically correct known faults of the file. '
- 'One of never (do nothing), warn (only emit a warning), '
- 'detect_or_warn (the default; fix file if we can, warn otherwise)')
- postproc.add_option(
- '--prefer-avconv',
- action='store_false', dest='prefer_ffmpeg',
- help='Prefer avconv over ffmpeg for running the postprocessors')
- postproc.add_option(
- '--prefer-ffmpeg',
- action='store_true', dest='prefer_ffmpeg',
- help='Prefer ffmpeg over avconv for running the postprocessors (default)')
- postproc.add_option(
- '--ffmpeg-location', '--avconv-location', metavar='PATH',
- dest='ffmpeg_location',
- help='Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.')
- postproc.add_option(
- '--exec',
- metavar='CMD', dest='exec_cmd',
- help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
- postproc.add_option(
- '--convert-subs', '--convert-subtitles',
- metavar='FORMAT', dest='convertsubtitles', default=None,
- help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)')
-
- parser.add_option_group(general)
- parser.add_option_group(network)
- parser.add_option_group(geo)
- parser.add_option_group(selection)
- parser.add_option_group(downloader)
- parser.add_option_group(filesystem)
- parser.add_option_group(thumbnail)
- parser.add_option_group(verbosity)
- parser.add_option_group(workarounds)
- parser.add_option_group(video_format)
- parser.add_option_group(subtitles)
- parser.add_option_group(authentication)
- parser.add_option_group(adobe_pass)
- parser.add_option_group(postproc)
-
- if overrideArguments is not None:
- opts, args = parser.parse_args(overrideArguments)
- if opts.verbose:
- write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
- else:
- def compat_conf(conf):
- if sys.version_info < (3,):
- return [a.decode(preferredencoding(), 'replace') for a in conf]
- return conf
-
- command_line_conf = compat_conf(sys.argv[1:])
- opts, args = parser.parse_args(command_line_conf)
-
- system_conf = user_conf = custom_conf = []
-
- if '--config-location' in command_line_conf:
- location = compat_expanduser(opts.config_location)
- if os.path.isdir(location):
- location = os.path.join(location, 'youtube-dl.conf')
- if not os.path.exists(location):
- parser.error('config-location %s does not exist.' % location)
- custom_conf = _readOptions(location)
- elif '--ignore-config' in command_line_conf:
- pass
- else:
- system_conf = _readOptions('/etc/youtube-dl.conf')
- if '--ignore-config' not in system_conf:
- user_conf = _readUserConf()
-
- argv = system_conf + user_conf + custom_conf + command_line_conf
- opts, args = parser.parse_args(argv)
- if opts.verbose:
- for conf_label, conf in (
- ('System config', system_conf),
- ('User config', user_conf),
- ('Custom config', custom_conf),
- ('Command-line args', command_line_conf)):
- write_string('[debug] %s: %s\n' % (conf_label, repr(_hide_login_info(conf))))
-
- return parser, opts, args
diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py
deleted file mode 100644
index 3ea5183..0000000
--- a/youtube_dl/postprocessor/__init__.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from __future__ import unicode_literals
-
-from .embedthumbnail import EmbedThumbnailPP
-from .ffmpeg import (
- FFmpegPostProcessor,
- FFmpegEmbedSubtitlePP,
- FFmpegExtractAudioPP,
- FFmpegFixupStretchedPP,
- FFmpegFixupM3u8PP,
- FFmpegFixupM4aPP,
- FFmpegMergerPP,
- FFmpegMetadataPP,
- FFmpegVideoConvertorPP,
- FFmpegSubtitlesConvertorPP,
-)
-from .xattrpp import XAttrMetadataPP
-from .execafterdownload import ExecAfterDownloadPP
-from .metadatafromtitle import MetadataFromTitlePP
-
-
-def get_postprocessor(key):
- return globals()[key + 'PP']
-
-
-__all__ = [
- 'EmbedThumbnailPP',
- 'ExecAfterDownloadPP',
- 'FFmpegEmbedSubtitlePP',
- 'FFmpegExtractAudioPP',
- 'FFmpegFixupM3u8PP',
- 'FFmpegFixupM4aPP',
- 'FFmpegFixupStretchedPP',
- 'FFmpegMergerPP',
- 'FFmpegMetadataPP',
- 'FFmpegPostProcessor',
- 'FFmpegSubtitlesConvertorPP',
- 'FFmpegVideoConvertorPP',
- 'MetadataFromTitlePP',
- 'XAttrMetadataPP',
-]
diff --git a/youtube_dl/postprocessor/common.py b/youtube_dl/postprocessor/common.py
deleted file mode 100644
index 599dd1d..0000000
--- a/youtube_dl/postprocessor/common.py
+++ /dev/null
@@ -1,69 +0,0 @@
-from __future__ import unicode_literals
-
-import os
-
-from ..utils import (
- PostProcessingError,
- cli_configuration_args,
- encodeFilename,
-)
-
-
-class PostProcessor(object):
- """Post Processor class.
-
- PostProcessor objects can be added to downloaders with their
- add_post_processor() method. When the downloader has finished a
- successful download, it will take its internal chain of PostProcessors
- and start calling the run() method on each one of them, first with
- an initial argument and then with the returned value of the previous
- PostProcessor.
-
- The chain will be stopped if one of them ever returns None or the end
- of the chain is reached.
-
- PostProcessor objects follow a "mutual registration" process similar
- to InfoExtractor objects.
-
- Optionally PostProcessor can use a list of additional command-line arguments
- with self._configuration_args.
- """
-
- _downloader = None
-
- def __init__(self, downloader=None):
- self._downloader = downloader
-
- def set_downloader(self, downloader):
- """Sets the downloader for this PP."""
- self._downloader = downloader
-
- def run(self, information):
- """Run the PostProcessor.
-
- The "information" argument is a dictionary like the ones
- composed by InfoExtractors. The only difference is that this
- one has an extra field called "filepath" that points to the
- downloaded file.
-
- This method returns a tuple, the first element is a list of the files
- that can be deleted, and the second of which is the updated
- information.
-
- In addition, this method may raise a PostProcessingError
- exception if post processing fails.
- """
- return [], information # by default, keep file and do nothing
-
- def try_utime(self, path, atime, mtime, errnote='Cannot update utime of file'):
- try:
- os.utime(encodeFilename(path), (atime, mtime))
- except Exception:
- self._downloader.report_warning(errnote)
-
- def _configuration_args(self, default=[]):
- return cli_configuration_args(self._downloader.params, 'postprocessor_args', default)
-
-
-class AudioConversionError(PostProcessingError):
- pass
diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py
deleted file mode 100644
index 56be914..0000000
--- a/youtube_dl/postprocessor/embedthumbnail.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
-import os
-import subprocess
-
-from .ffmpeg import FFmpegPostProcessor
-
-from ..utils import (
- check_executable,
- encodeArgument,
- encodeFilename,
- PostProcessingError,
- prepend_extension,
- shell_quote
-)
-
-
-class EmbedThumbnailPPError(PostProcessingError):
- pass
-
-
-class EmbedThumbnailPP(FFmpegPostProcessor):
- def __init__(self, downloader=None, already_have_thumbnail=False):
- super(EmbedThumbnailPP, self).__init__(downloader)
- self._already_have_thumbnail = already_have_thumbnail
-
- def run(self, info):
- filename = info['filepath']
- temp_filename = prepend_extension(filename, 'temp')
-
- if not info.get('thumbnails'):
- self._downloader.to_screen('[embedthumbnail] There aren\'t any thumbnails to embed')
- return [], info
-
- thumbnail_filename = info['thumbnails'][-1]['filename']
-
- if not os.path.exists(encodeFilename(thumbnail_filename)):
- self._downloader.report_warning(
- 'Skipping embedding the thumbnail because the file is missing.')
- return [], info
-
- if info['ext'] == 'mp3':
- options = [
- '-c', 'copy', '-map', '0', '-map', '1',
- '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"']
-
- self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename)
-
- self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
-
- if not self._already_have_thumbnail:
- os.remove(encodeFilename(thumbnail_filename))
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
-
- elif info['ext'] in ['m4a', 'mp4']:
- if not check_executable('AtomicParsley', ['-v']):
- raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
-
- cmd = [encodeFilename('AtomicParsley', True),
- encodeFilename(filename, True),
- encodeArgument('--artwork'),
- encodeFilename(thumbnail_filename, True),
- encodeArgument('-o'),
- encodeFilename(temp_filename, True)]
-
- self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
-
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
-
- p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout, stderr = p.communicate()
-
- if p.returncode != 0:
- msg = stderr.decode('utf-8', 'replace').strip()
- raise EmbedThumbnailPPError(msg)
-
- if not self._already_have_thumbnail:
- os.remove(encodeFilename(thumbnail_filename))
- # for formats that don't support thumbnails (like 3gp) AtomicParsley
- # won't create to the temporary file
- if b'No changes' in stdout:
- self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail')
- else:
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- else:
- raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
-
- return [], info
diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py
deleted file mode 100644
index 64dabe7..0000000
--- a/youtube_dl/postprocessor/execafterdownload.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from __future__ import unicode_literals
-
-import subprocess
-
-from .common import PostProcessor
-from ..compat import compat_shlex_quote
-from ..utils import (
- encodeArgument,
- PostProcessingError,
-)
-
-
-class ExecAfterDownloadPP(PostProcessor):
- def __init__(self, downloader, exec_cmd):
- super(ExecAfterDownloadPP, self).__init__(downloader)
- self.exec_cmd = exec_cmd
-
- def run(self, information):
- cmd = self.exec_cmd
- if '{}' not in cmd:
- cmd += ' {}'
-
- cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
-
- self._downloader.to_screen('[exec] Executing command: %s' % cmd)
- retCode = subprocess.call(encodeArgument(cmd), shell=True)
- if retCode != 0:
- raise PostProcessingError(
- 'Command returned error code %d' % retCode)
-
- return [], information
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
deleted file mode 100644
index 757b496..0000000
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ /dev/null
@@ -1,613 +0,0 @@
-from __future__ import unicode_literals
-
-import io
-import os
-import subprocess
-import time
-import re
-
-
-from .common import AudioConversionError, PostProcessor
-
-from ..compat import (
- compat_subprocess_get_DEVNULL,
-)
-from ..utils import (
- encodeArgument,
- encodeFilename,
- get_exe_version,
- is_outdated_version,
- PostProcessingError,
- prepend_extension,
- shell_quote,
- subtitles_filename,
- dfxp2srt,
- ISO639Utils,
- replace_extension,
-)
-
-
-EXT_TO_OUT_FORMATS = {
- 'aac': 'adts',
- 'flac': 'flac',
- 'm4a': 'ipod',
- 'mka': 'matroska',
- 'mkv': 'matroska',
- 'mpg': 'mpeg',
- 'ogv': 'ogg',
- 'ts': 'mpegts',
- 'wma': 'asf',
- 'wmv': 'asf',
-}
-ACODECS = {
- 'mp3': 'libmp3lame',
- 'aac': 'aac',
- 'flac': 'flac',
- 'm4a': 'aac',
- 'opus': 'libopus',
- 'vorbis': 'libvorbis',
- 'wav': None,
-}
-
-
-class FFmpegPostProcessorError(PostProcessingError):
- pass
-
-
-class FFmpegPostProcessor(PostProcessor):
- def __init__(self, downloader=None):
- PostProcessor.__init__(self, downloader)
- self._determine_executables()
-
- def check_version(self):
- if not self.available:
- raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
-
- required_version = '10-0' if self.basename == 'avconv' else '1.0'
- if is_outdated_version(
- self._versions[self.basename], required_version):
- warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
- self.basename, self.basename, required_version)
- if self._downloader:
- self._downloader.report_warning(warning)
-
- @staticmethod
- def get_versions(downloader=None):
- return FFmpegPostProcessor(downloader)._versions
-
- def _determine_executables(self):
- programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
- prefer_ffmpeg = True
-
- self.basename = None
- self.probe_basename = None
-
- self._paths = None
- self._versions = None
- if self._downloader:
- prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True)
- location = self._downloader.params.get('ffmpeg_location')
- if location is not None:
- if not os.path.exists(location):
- self._downloader.report_warning(
- 'ffmpeg-location %s does not exist! '
- 'Continuing without avconv/ffmpeg.' % (location))
- self._versions = {}
- return
- elif not os.path.isdir(location):
- basename = os.path.splitext(os.path.basename(location))[0]
- if basename not in programs:
- self._downloader.report_warning(
- 'Cannot identify executable %s, its basename should be one of %s. '
- 'Continuing without avconv/ffmpeg.' %
- (location, ', '.join(programs)))
- self._versions = {}
- return None
- location = os.path.dirname(os.path.abspath(location))
- if basename in ('ffmpeg', 'ffprobe'):
- prefer_ffmpeg = True
-
- self._paths = dict(
- (p, os.path.join(location, p)) for p in programs)
- self._versions = dict(
- (p, get_exe_version(self._paths[p], args=['-version']))
- for p in programs)
- if self._versions is None:
- self._versions = dict(
- (p, get_exe_version(p, args=['-version'])) for p in programs)
- self._paths = dict((p, p) for p in programs)
-
- if prefer_ffmpeg is False:
- prefs = ('avconv', 'ffmpeg')
- else:
- prefs = ('ffmpeg', 'avconv')
- for p in prefs:
- if self._versions[p]:
- self.basename = p
- break
-
- if prefer_ffmpeg is False:
- prefs = ('avprobe', 'ffprobe')
- else:
- prefs = ('ffprobe', 'avprobe')
- for p in prefs:
- if self._versions[p]:
- self.probe_basename = p
- break
-
- @property
- def available(self):
- return self.basename is not None
-
- @property
- def executable(self):
- return self._paths[self.basename]
-
- @property
- def probe_available(self):
- return self.probe_basename is not None
-
- @property
- def probe_executable(self):
- return self._paths[self.probe_basename]
-
- def get_audio_codec(self, path):
- if not self.probe_available:
- raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
- try:
- cmd = [
- encodeFilename(self.probe_executable, True),
- encodeArgument('-show_streams'),
- encodeFilename(self._ffmpeg_filename_argument(path), True)]
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
- handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
- output = handle.communicate()[0]
- if handle.wait() != 0:
- return None
- except (IOError, OSError):
- return None
- audio_codec = None
- for line in output.decode('ascii', 'ignore').split('\n'):
- if line.startswith('codec_name='):
- audio_codec = line.split('=')[1].strip()
- elif line.strip() == 'codec_type=audio' and audio_codec is not None:
- return audio_codec
- return None
-
- def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
- self.check_version()
-
- oldest_mtime = min(
- os.stat(encodeFilename(path)).st_mtime for path in input_paths)
-
- opts += self._configuration_args()
-
- files_cmd = []
- for path in input_paths:
- files_cmd.extend([
- encodeArgument('-i'),
- encodeFilename(self._ffmpeg_filename_argument(path), True)
- ])
- cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] +
- files_cmd +
- [encodeArgument(o) for o in opts] +
- [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
-
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
- p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
- stdout, stderr = p.communicate()
- if p.returncode != 0:
- stderr = stderr.decode('utf-8', 'replace')
- msg = stderr.strip().split('\n')[-1]
- raise FFmpegPostProcessorError(msg)
- self.try_utime(out_path, oldest_mtime, oldest_mtime)
-
- def run_ffmpeg(self, path, out_path, opts):
- self.run_ffmpeg_multiple_files([path], out_path, opts)
-
- def _ffmpeg_filename_argument(self, fn):
- # Always use 'file:' because the filename may contain ':' (ffmpeg
- # interprets that as a protocol) or can start with '-' (-- is broken in
- # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
- # Also leave '-' intact in order not to break streaming to stdout.
- return 'file:' + fn if fn != '-' else fn
-
-
-class FFmpegExtractAudioPP(FFmpegPostProcessor):
- def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
- FFmpegPostProcessor.__init__(self, downloader)
- if preferredcodec is None:
- preferredcodec = 'best'
- self._preferredcodec = preferredcodec
- self._preferredquality = preferredquality
- self._nopostoverwrites = nopostoverwrites
-
- def run_ffmpeg(self, path, out_path, codec, more_opts):
- if codec is None:
- acodec_opts = []
- else:
- acodec_opts = ['-acodec', codec]
- opts = ['-vn'] + acodec_opts + more_opts
- try:
- FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
- except FFmpegPostProcessorError as err:
- raise AudioConversionError(err.msg)
-
- def run(self, information):
- path = information['filepath']
-
- filecodec = self.get_audio_codec(path)
- if filecodec is None:
- raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
-
- more_opts = []
- if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
- if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
- # Lossless, but in another container
- acodec = 'copy'
- extension = 'm4a'
- more_opts = ['-bsf:a', 'aac_adtstoasc']
- elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']:
- # Lossless if possible
- acodec = 'copy'
- extension = filecodec
- if filecodec == 'aac':
- more_opts = ['-f', 'adts']
- if filecodec == 'vorbis':
- extension = 'ogg'
- else:
- # MP3 otherwise.
- acodec = 'libmp3lame'
- extension = 'mp3'
- more_opts = []
- if self._preferredquality is not None:
- if int(self._preferredquality) < 10:
- more_opts += ['-q:a', self._preferredquality]
- else:
- more_opts += ['-b:a', self._preferredquality + 'k']
- else:
- # We convert the audio (lossy if codec is lossy)
- acodec = ACODECS[self._preferredcodec]
- extension = self._preferredcodec
- more_opts = []
- if self._preferredquality is not None:
- # The opus codec doesn't support the -aq option
- if int(self._preferredquality) < 10 and extension != 'opus':
- more_opts += ['-q:a', self._preferredquality]
- else:
- more_opts += ['-b:a', self._preferredquality + 'k']
- if self._preferredcodec == 'aac':
- more_opts += ['-f', 'adts']
- if self._preferredcodec == 'm4a':
- more_opts += ['-bsf:a', 'aac_adtstoasc']
- if self._preferredcodec == 'vorbis':
- extension = 'ogg'
- if self._preferredcodec == 'wav':
- extension = 'wav'
- more_opts += ['-f', 'wav']
-
- prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
- new_path = prefix + sep + extension
-
- information['filepath'] = new_path
- information['ext'] = extension
-
- # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
- if (new_path == path or
- (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
- self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path)
- return [], information
-
- try:
- self._downloader.to_screen('[ffmpeg] Destination: ' + new_path)
- self.run_ffmpeg(path, new_path, acodec, more_opts)
- except AudioConversionError as e:
- raise PostProcessingError(
- 'audio conversion failed: ' + e.msg)
- except Exception:
- raise PostProcessingError('error running ' + self.basename)
-
- # Try to update the date time for extracted audio file.
- if information.get('filetime') is not None:
- self.try_utime(
- new_path, time.time(), information['filetime'],
- errnote='Cannot update utime of audio file')
-
- return [path], information
-
-
-class FFmpegVideoConvertorPP(FFmpegPostProcessor):
- def __init__(self, downloader=None, preferedformat=None):
- super(FFmpegVideoConvertorPP, self).__init__(downloader)
- self._preferedformat = preferedformat
-
- def run(self, information):
- path = information['filepath']
- if information['ext'] == self._preferedformat:
- self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
- return [], information
- options = []
- if self._preferedformat == 'avi':
- options.extend(['-c:v', 'libxvid', '-vtag', 'XVID'])
- prefix, sep, ext = path.rpartition('.')
- outpath = prefix + sep + self._preferedformat
- self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
- self.run_ffmpeg(path, outpath, options)
- information['filepath'] = outpath
- information['format'] = self._preferedformat
- information['ext'] = self._preferedformat
- return [path], information
-
-
-class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
- def run(self, information):
- if information['ext'] not in ('mp4', 'webm', 'mkv'):
- self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4, webm or mkv files')
- return [], information
- subtitles = information.get('requested_subtitles')
- if not subtitles:
- self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
- return [], information
-
- filename = information['filepath']
-
- ext = information['ext']
- sub_langs = []
- sub_filenames = []
- webm_vtt_warn = False
-
- for lang, sub_info in subtitles.items():
- sub_ext = sub_info['ext']
- if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
- sub_langs.append(lang)
- sub_filenames.append(subtitles_filename(filename, lang, sub_ext))
- else:
- if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
- webm_vtt_warn = True
- self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files')
-
- if not sub_langs:
- return [], information
-
- input_files = [filename] + sub_filenames
-
- opts = [
- '-map', '0',
- '-c', 'copy',
- # Don't copy the existing subtitles, we may be running the
- # postprocessor a second time
- '-map', '-0:s',
- ]
- if information['ext'] == 'mp4':
- opts += ['-c:s', 'mov_text']
- for (i, lang) in enumerate(sub_langs):
- opts.extend(['-map', '%d:0' % (i + 1)])
- lang_code = ISO639Utils.short2long(lang)
- if lang_code is not None:
- opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
-
- temp_filename = prepend_extension(filename, 'temp')
- self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
- self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
-
- return sub_filenames, information
-
-
-class FFmpegMetadataPP(FFmpegPostProcessor):
- def run(self, info):
- metadata = {}
-
- def add(meta_list, info_list=None):
- if not info_list:
- info_list = meta_list
- if not isinstance(meta_list, (list, tuple)):
- meta_list = (meta_list,)
- if not isinstance(info_list, (list, tuple)):
- info_list = (info_list,)
- for info_f in info_list:
- if info.get(info_f) is not None:
- for meta_f in meta_list:
- metadata[meta_f] = info[info_f]
- break
-
- add('title', ('track', 'title'))
- add('date', 'upload_date')
- add(('description', 'comment'), 'description')
- add('purl', 'webpage_url')
- add('track', 'track_number')
- add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
- add('genre')
- add('album')
- add('album_artist')
- add('disc', 'disc_number')
-
- if not metadata:
- self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
- return [], info
-
- filename = info['filepath']
- temp_filename = prepend_extension(filename, 'temp')
- in_filenames = [filename]
- options = []
-
- if info['ext'] == 'm4a':
- options.extend(['-vn', '-acodec', 'copy'])
- else:
- options.extend(['-c', 'copy'])
-
- for (name, value) in metadata.items():
- options.extend(['-metadata', '%s=%s' % (name, value)])
-
- chapters = info.get('chapters', [])
- if chapters:
- metadata_filename = replace_extension(filename, 'meta')
- with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
- def ffmpeg_escape(text):
- return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
-
- metadata_file_content = ';FFMETADATA1\n'
- for chapter in chapters:
- metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
- metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
- metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
- chapter_title = chapter.get('title')
- if chapter_title:
- metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
- f.write(metadata_file_content)
- in_filenames.append(metadata_filename)
- options.extend(['-map_metadata', '1'])
-
- self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
- self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options)
- if chapters:
- os.remove(metadata_filename)
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- return [], info
-
-
-class FFmpegMergerPP(FFmpegPostProcessor):
- def run(self, info):
- filename = info['filepath']
- temp_filename = prepend_extension(filename, 'temp')
- args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']
- self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
- self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args)
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- return info['__files_to_merge'], info
-
- def can_merge(self):
- # TODO: figure out merge-capable ffmpeg version
- if self.basename != 'avconv':
- return True
-
- required_version = '10-0'
- if is_outdated_version(
- self._versions[self.basename], required_version):
- warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
- 'youtube-dl will download single file media. '
- 'Update %s to version %s or newer to fix this.') % (
- self.basename, self.basename, required_version)
- if self._downloader:
- self._downloader.report_warning(warning)
- return False
- return True
-
-
-class FFmpegFixupStretchedPP(FFmpegPostProcessor):
- def run(self, info):
- stretched_ratio = info.get('stretched_ratio')
- if stretched_ratio is None or stretched_ratio == 1:
- return [], info
-
- filename = info['filepath']
- temp_filename = prepend_extension(filename, 'temp')
-
- options = ['-c', 'copy', '-aspect', '%f' % stretched_ratio]
- self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename)
- self.run_ffmpeg(filename, temp_filename, options)
-
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
-
- return [], info
-
-
-class FFmpegFixupM4aPP(FFmpegPostProcessor):
- def run(self, info):
- if info.get('container') != 'm4a_dash':
- return [], info
-
- filename = info['filepath']
- temp_filename = prepend_extension(filename, 'temp')
-
- options = ['-c', 'copy', '-f', 'mp4']
- self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename)
- self.run_ffmpeg(filename, temp_filename, options)
-
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
-
- return [], info
-
-
-class FFmpegFixupM3u8PP(FFmpegPostProcessor):
- def run(self, info):
- filename = info['filepath']
- if self.get_audio_codec(filename) == 'aac':
- temp_filename = prepend_extension(filename, 'temp')
-
- options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
- self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename)
- self.run_ffmpeg(filename, temp_filename, options)
-
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- return [], info
-
-
-class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
- def __init__(self, downloader=None, format=None):
- super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
- self.format = format
-
- def run(self, info):
- subs = info.get('requested_subtitles')
- filename = info['filepath']
- new_ext = self.format
- new_format = new_ext
- if new_format == 'vtt':
- new_format = 'webvtt'
- if subs is None:
- self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
- return [], info
- self._downloader.to_screen('[ffmpeg] Converting subtitles')
- sub_filenames = []
- for lang, sub in subs.items():
- ext = sub['ext']
- if ext == new_ext:
- self._downloader.to_screen(
- '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
- continue
- old_file = subtitles_filename(filename, lang, ext)
- sub_filenames.append(old_file)
- new_file = subtitles_filename(filename, lang, new_ext)
-
- if ext in ('dfxp', 'ttml', 'tt'):
- self._downloader.report_warning(
- 'You have requested to convert dfxp (TTML) subtitles into another format, '
- 'which results in style information loss')
-
- dfxp_file = old_file
- srt_file = subtitles_filename(filename, lang, 'srt')
-
- with open(dfxp_file, 'rb') as f:
- srt_data = dfxp2srt(f.read())
-
- with io.open(srt_file, 'wt', encoding='utf-8') as f:
- f.write(srt_data)
- old_file = srt_file
-
- subs[lang] = {
- 'ext': 'srt',
- 'data': srt_data
- }
-
- if new_ext == 'srt':
- continue
- else:
- sub_filenames.append(srt_file)
-
- self.run_ffmpeg(old_file, new_file, ['-f', new_format])
-
- with io.open(new_file, 'rt', encoding='utf-8') as f:
- subs[lang] = {
- 'ext': new_ext,
- 'data': f.read(),
- }
-
- return sub_filenames, info
diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py
deleted file mode 100644
index f5c14d9..0000000
--- a/youtube_dl/postprocessor/metadatafromtitle.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import PostProcessor
-
-
-class MetadataFromTitlePP(PostProcessor):
- def __init__(self, downloader, titleformat):
- super(MetadataFromTitlePP, self).__init__(downloader)
- self._titleformat = titleformat
- self._titleregex = (self.format_to_regex(titleformat)
- if re.search(r'%\(\w+\)s', titleformat)
- else titleformat)
-
- def format_to_regex(self, fmt):
- r"""
- Converts a string like
- '%(title)s - %(artist)s'
- to a regex like
- '(?P<title>.+)\ \-\ (?P<artist>.+)'
- """
- lastpos = 0
- regex = ''
- # replace %(..)s with regex group and escape other string parts
- for match in re.finditer(r'%\((\w+)\)s', fmt):
- regex += re.escape(fmt[lastpos:match.start()])
- regex += r'(?P<' + match.group(1) + '>.+)'
- lastpos = match.end()
- if lastpos < len(fmt):
- regex += re.escape(fmt[lastpos:])
- return regex
-
- def run(self, info):
- title = info['title']
- match = re.match(self._titleregex, title)
- if match is None:
- self._downloader.to_screen(
- '[fromtitle] Could not interpret title of video as "%s"'
- % self._titleformat)
- return [], info
- for attribute, value in match.groupdict().items():
- info[attribute] = value
- self._downloader.to_screen(
- '[fromtitle] parsed %s: %s'
- % (attribute, value if value is not None else 'NA'))
-
- return [], info
diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py
deleted file mode 100644
index b0aed9c..0000000
--- a/youtube_dl/postprocessor/xattrpp.py
+++ /dev/null
@@ -1,79 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import PostProcessor
-from ..compat import compat_os_name
-from ..utils import (
- hyphenate_date,
- write_xattr,
- XAttrMetadataError,
- XAttrUnavailableError,
-)
-
-
-class XAttrMetadataPP(PostProcessor):
-
- #
- # More info about extended attributes for media:
- # http://freedesktop.org/wiki/CommonExtendedAttributes/
- # http://www.freedesktop.org/wiki/PhreedomDraft/
- # http://dublincore.org/documents/usageguide/elements.shtml
- #
- # TODO:
- # * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated)
- # * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution'
- #
-
- def run(self, info):
- """ Set extended attributes on downloaded file (if xattr support is found). """
-
- # Write the metadata to the file's xattrs
- self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs')
-
- filename = info['filepath']
-
- try:
- xattr_mapping = {
- 'user.xdg.referrer.url': 'webpage_url',
- # 'user.xdg.comment': 'description',
- 'user.dublincore.title': 'title',
- 'user.dublincore.date': 'upload_date',
- 'user.dublincore.description': 'description',
- 'user.dublincore.contributor': 'uploader',
- 'user.dublincore.format': 'format',
- }
-
- num_written = 0
- for xattrname, infoname in xattr_mapping.items():
-
- value = info.get(infoname)
-
- if value:
- if infoname == 'upload_date':
- value = hyphenate_date(value)
-
- byte_value = value.encode('utf-8')
- write_xattr(filename, xattrname, byte_value)
- num_written += 1
-
- return [], info
-
- except XAttrUnavailableError as e:
- self._downloader.report_error(str(e))
- return [], info
-
- except XAttrMetadataError as e:
- if e.reason == 'NO_SPACE':
- self._downloader.report_warning(
- 'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. ' +
- (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize())
- elif e.reason == 'VALUE_TOO_LONG':
- self._downloader.report_warning(
- 'Unable to write extended attributes due to too long values.')
- else:
- msg = 'This filesystem doesn\'t support extended attributes. '
- if compat_os_name == 'nt':
- msg += 'You need to use NTFS.'
- else:
- msg += '(You may have to enable them in your /etc/fstab)'
- self._downloader.report_error(msg)
- return [], info
diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py
deleted file mode 100644
index 5d4adbe..0000000
--- a/youtube_dl/socks.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# Public Domain SOCKS proxy protocol implementation
-# Adapted from https://gist.github.com/bluec0re/cafd3764412967417fd3
-
-from __future__ import unicode_literals
-
-# References:
-# SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol
-# SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol
-# SOCKS5 protocol https://tools.ietf.org/html/rfc1928
-# SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929
-
-import collections
-import socket
-
-from .compat import (
- compat_ord,
- compat_struct_pack,
- compat_struct_unpack,
-)
-
-__author__ = 'Timo Schmid <coding@timoschmid.de>'
-
-SOCKS4_VERSION = 4
-SOCKS4_REPLY_VERSION = 0x00
-# Excerpt from SOCKS4A protocol:
-# if the client cannot resolve the destination host's domain name to find its
-# IP address, it should set the first three bytes of DSTIP to NULL and the last
-# byte to a non-zero value.
-SOCKS4_DEFAULT_DSTIP = compat_struct_pack('!BBBB', 0, 0, 0, 0xFF)
-
-SOCKS5_VERSION = 5
-SOCKS5_USER_AUTH_VERSION = 0x01
-SOCKS5_USER_AUTH_SUCCESS = 0x00
-
-
-class Socks4Command(object):
- CMD_CONNECT = 0x01
- CMD_BIND = 0x02
-
-
-class Socks5Command(Socks4Command):
- CMD_UDP_ASSOCIATE = 0x03
-
-
-class Socks5Auth(object):
- AUTH_NONE = 0x00
- AUTH_GSSAPI = 0x01
- AUTH_USER_PASS = 0x02
- AUTH_NO_ACCEPTABLE = 0xFF # For server response
-
-
-class Socks5AddressType(object):
- ATYP_IPV4 = 0x01
- ATYP_DOMAINNAME = 0x03
- ATYP_IPV6 = 0x04
-
-
-class ProxyError(socket.error):
- ERR_SUCCESS = 0x00
-
- def __init__(self, code=None, msg=None):
- if code is not None and msg is None:
- msg = self.CODES.get(code) or 'unknown error'
- super(ProxyError, self).__init__(code, msg)
-
-
-class InvalidVersionError(ProxyError):
- def __init__(self, expected_version, got_version):
- msg = ('Invalid response version from server. Expected {0:02x} got '
- '{1:02x}'.format(expected_version, got_version))
- super(InvalidVersionError, self).__init__(0, msg)
-
-
-class Socks4Error(ProxyError):
- ERR_SUCCESS = 90
-
- CODES = {
- 91: 'request rejected or failed',
- 92: 'request rejected because SOCKS server cannot connect to identd on the client',
- 93: 'request rejected because the client program and identd report different user-ids'
- }
-
-
-class Socks5Error(ProxyError):
- ERR_GENERAL_FAILURE = 0x01
-
- CODES = {
- 0x01: 'general SOCKS server failure',
- 0x02: 'connection not allowed by ruleset',
- 0x03: 'Network unreachable',
- 0x04: 'Host unreachable',
- 0x05: 'Connection refused',
- 0x06: 'TTL expired',
- 0x07: 'Command not supported',
- 0x08: 'Address type not supported',
- 0xFE: 'unknown username or invalid password',
- 0xFF: 'all offered authentication methods were rejected'
- }
-
-
-class ProxyType(object):
- SOCKS4 = 0
- SOCKS4A = 1
- SOCKS5 = 2
-
-
-Proxy = collections.namedtuple('Proxy', (
- 'type', 'host', 'port', 'username', 'password', 'remote_dns'))
-
-
-class sockssocket(socket.socket):
- def __init__(self, *args, **kwargs):
- self._proxy = None
- super(sockssocket, self).__init__(*args, **kwargs)
-
- def setproxy(self, proxytype, addr, port, rdns=True, username=None, password=None):
- assert proxytype in (ProxyType.SOCKS4, ProxyType.SOCKS4A, ProxyType.SOCKS5)
-
- self._proxy = Proxy(proxytype, addr, port, username, password, rdns)
-
- def recvall(self, cnt):
- data = b''
- while len(data) < cnt:
- cur = self.recv(cnt - len(data))
- if not cur:
- raise EOFError('{0} bytes missing'.format(cnt - len(data)))
- data += cur
- return data
-
- def _recv_bytes(self, cnt):
- data = self.recvall(cnt)
- return compat_struct_unpack('!{0}B'.format(cnt), data)
-
- @staticmethod
- def _len_and_data(data):
- return compat_struct_pack('!B', len(data)) + data
-
- def _check_response_version(self, expected_version, got_version):
- if got_version != expected_version:
- self.close()
- raise InvalidVersionError(expected_version, got_version)
-
- def _resolve_address(self, destaddr, default, use_remote_dns):
- try:
- return socket.inet_aton(destaddr)
- except socket.error:
- if use_remote_dns and self._proxy.remote_dns:
- return default
- else:
- return socket.inet_aton(socket.gethostbyname(destaddr))
-
- def _setup_socks4(self, address, is_4a=False):
- destaddr, port = address
-
- ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a)
-
- packet = compat_struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr
-
- username = (self._proxy.username or '').encode('utf-8')
- packet += username + b'\x00'
-
- if is_4a and self._proxy.remote_dns:
- packet += destaddr.encode('utf-8') + b'\x00'
-
- self.sendall(packet)
-
- version, resp_code, dstport, dsthost = compat_struct_unpack('!BBHI', self.recvall(8))
-
- self._check_response_version(SOCKS4_REPLY_VERSION, version)
-
- if resp_code != Socks4Error.ERR_SUCCESS:
- self.close()
- raise Socks4Error(resp_code)
-
- return (dsthost, dstport)
-
- def _setup_socks4a(self, address):
- self._setup_socks4(address, is_4a=True)
-
- def _socks5_auth(self):
- packet = compat_struct_pack('!B', SOCKS5_VERSION)
-
- auth_methods = [Socks5Auth.AUTH_NONE]
- if self._proxy.username and self._proxy.password:
- auth_methods.append(Socks5Auth.AUTH_USER_PASS)
-
- packet += compat_struct_pack('!B', len(auth_methods))
- packet += compat_struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods)
-
- self.sendall(packet)
-
- version, method = self._recv_bytes(2)
-
- self._check_response_version(SOCKS5_VERSION, version)
-
- if method == Socks5Auth.AUTH_NO_ACCEPTABLE or (
- method == Socks5Auth.AUTH_USER_PASS and (not self._proxy.username or not self._proxy.password)):
- self.close()
- raise Socks5Error(Socks5Auth.AUTH_NO_ACCEPTABLE)
-
- if method == Socks5Auth.AUTH_USER_PASS:
- username = self._proxy.username.encode('utf-8')
- password = self._proxy.password.encode('utf-8')
- packet = compat_struct_pack('!B', SOCKS5_USER_AUTH_VERSION)
- packet += self._len_and_data(username) + self._len_and_data(password)
- self.sendall(packet)
-
- version, status = self._recv_bytes(2)
-
- self._check_response_version(SOCKS5_USER_AUTH_VERSION, version)
-
- if status != SOCKS5_USER_AUTH_SUCCESS:
- self.close()
- raise Socks5Error(Socks5Error.ERR_GENERAL_FAILURE)
-
- def _setup_socks5(self, address):
- destaddr, port = address
-
- ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True)
-
- self._socks5_auth()
-
- reserved = 0
- packet = compat_struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved)
- if ipaddr is None:
- destaddr = destaddr.encode('utf-8')
- packet += compat_struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME)
- packet += self._len_and_data(destaddr)
- else:
- packet += compat_struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr
- packet += compat_struct_pack('!H', port)
-
- self.sendall(packet)
-
- version, status, reserved, atype = self._recv_bytes(4)
-
- self._check_response_version(SOCKS5_VERSION, version)
-
- if status != Socks5Error.ERR_SUCCESS:
- self.close()
- raise Socks5Error(status)
-
- if atype == Socks5AddressType.ATYP_IPV4:
- destaddr = self.recvall(4)
- elif atype == Socks5AddressType.ATYP_DOMAINNAME:
- alen = compat_ord(self.recv(1))
- destaddr = self.recvall(alen)
- elif atype == Socks5AddressType.ATYP_IPV6:
- destaddr = self.recvall(16)
- destport = compat_struct_unpack('!H', self.recvall(2))[0]
-
- return (destaddr, destport)
-
- def _make_proxy(self, connect_func, address):
- if not self._proxy:
- return connect_func(self, address)
-
- result = connect_func(self, (self._proxy.host, self._proxy.port))
- if result != 0 and result is not None:
- return result
- setup_funcs = {
- ProxyType.SOCKS4: self._setup_socks4,
- ProxyType.SOCKS4A: self._setup_socks4a,
- ProxyType.SOCKS5: self._setup_socks5,
- }
- setup_funcs[self._proxy.type](address)
- return result
-
- def connect(self, address):
- self._make_proxy(socket.socket.connect, address)
-
- def connect_ex(self, address):
- return self._make_proxy(socket.socket.connect_ex, address)
diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py
deleted file mode 100644
index 0c71585..0000000
--- a/youtube_dl/swfinterp.py
+++ /dev/null
@@ -1,834 +0,0 @@
-from __future__ import unicode_literals
-
-import collections
-import io
-import zlib
-
-from .compat import (
- compat_str,
- compat_struct_unpack,
-)
-from .utils import (
- ExtractorError,
-)
-
-
-def _extract_tags(file_contents):
- if file_contents[1:3] != b'WS':
- raise ExtractorError(
- 'Not an SWF file; header is %r' % file_contents[:3])
- if file_contents[:1] == b'C':
- content = zlib.decompress(file_contents[8:])
- else:
- raise NotImplementedError(
- 'Unsupported compression format %r' %
- file_contents[:1])
-
- # Determine number of bits in framesize rectangle
- framesize_nbits = compat_struct_unpack('!B', content[:1])[0] >> 3
- framesize_len = (5 + 4 * framesize_nbits + 7) // 8
-
- pos = framesize_len + 2 + 2
- while pos < len(content):
- header16 = compat_struct_unpack('<H', content[pos:pos + 2])[0]
- pos += 2
- tag_code = header16 >> 6
- tag_len = header16 & 0x3f
- if tag_len == 0x3f:
- tag_len = compat_struct_unpack('<I', content[pos:pos + 4])[0]
- pos += 4
- assert pos + tag_len <= len(content), \
- ('Tag %d ends at %d+%d - that\'s longer than the file (%d)'
- % (tag_code, pos, tag_len, len(content)))
- yield (tag_code, content[pos:pos + tag_len])
- pos += tag_len
-
-
-class _AVMClass_Object(object):
- def __init__(self, avm_class):
- self.avm_class = avm_class
-
- def __repr__(self):
- return '%s#%x' % (self.avm_class.name, id(self))
-
-
-class _ScopeDict(dict):
- def __init__(self, avm_class):
- super(_ScopeDict, self).__init__()
- self.avm_class = avm_class
-
- def __repr__(self):
- return '%s__Scope(%s)' % (
- self.avm_class.name,
- super(_ScopeDict, self).__repr__())
-
-
-class _AVMClass(object):
- def __init__(self, name_idx, name, static_properties=None):
- self.name_idx = name_idx
- self.name = name
- self.method_names = {}
- self.method_idxs = {}
- self.methods = {}
- self.method_pyfunctions = {}
- self.static_properties = static_properties if static_properties else {}
-
- self.variables = _ScopeDict(self)
- self.constants = {}
-
- def make_object(self):
- return _AVMClass_Object(self)
-
- def __repr__(self):
- return '_AVMClass(%s)' % (self.name)
-
- def register_methods(self, methods):
- self.method_names.update(methods.items())
- self.method_idxs.update(dict(
- (idx, name)
- for name, idx in methods.items()))
-
-
-class _Multiname(object):
- def __init__(self, kind):
- self.kind = kind
-
- def __repr__(self):
- return '[MULTINAME kind: 0x%x]' % self.kind
-
-
-def _read_int(reader):
- res = 0
- shift = 0
- for _ in range(5):
- buf = reader.read(1)
- assert len(buf) == 1
- b = compat_struct_unpack('<B', buf)[0]
- res = res | ((b & 0x7f) << shift)
- if b & 0x80 == 0:
- break
- shift += 7
- return res
-
-
-def _u30(reader):
- res = _read_int(reader)
- assert res & 0xf0000000 == 0
- return res
-
-
-_u32 = _read_int
-
-
-def _s32(reader):
- v = _read_int(reader)
- if v & 0x80000000 != 0:
- v = - ((v ^ 0xffffffff) + 1)
- return v
-
-
-def _s24(reader):
- bs = reader.read(3)
- assert len(bs) == 3
- last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00'
- return compat_struct_unpack('<i', bs + last_byte)[0]
-
-
-def _read_string(reader):
- slen = _u30(reader)
- resb = reader.read(slen)
- assert len(resb) == slen
- return resb.decode('utf-8')
-
-
-def _read_bytes(count, reader):
- assert count >= 0
- resb = reader.read(count)
- assert len(resb) == count
- return resb
-
-
-def _read_byte(reader):
- resb = _read_bytes(1, reader=reader)
- res = compat_struct_unpack('<B', resb)[0]
- return res
-
-
-StringClass = _AVMClass('(no name idx)', 'String')
-ByteArrayClass = _AVMClass('(no name idx)', 'ByteArray')
-TimerClass = _AVMClass('(no name idx)', 'Timer')
-TimerEventClass = _AVMClass('(no name idx)', 'TimerEvent', {'TIMER': 'timer'})
-_builtin_classes = {
- StringClass.name: StringClass,
- ByteArrayClass.name: ByteArrayClass,
- TimerClass.name: TimerClass,
- TimerEventClass.name: TimerEventClass,
-}
-
-
-class _Undefined(object):
- def __bool__(self):
- return False
- __nonzero__ = __bool__
-
- def __hash__(self):
- return 0
-
- def __str__(self):
- return 'undefined'
- __repr__ = __str__
-
-
-undefined = _Undefined()
-
-
-class SWFInterpreter(object):
- def __init__(self, file_contents):
- self._patched_functions = {
- (TimerClass, 'addEventListener'): lambda params: undefined,
- }
- code_tag = next(tag
- for tag_code, tag in _extract_tags(file_contents)
- if tag_code == 82)
- p = code_tag.index(b'\0', 4) + 1
- code_reader = io.BytesIO(code_tag[p:])
-
- # Parse ABC (AVM2 ByteCode)
-
- # Define a couple convenience methods
- u30 = lambda *args: _u30(*args, reader=code_reader)
- s32 = lambda *args: _s32(*args, reader=code_reader)
- u32 = lambda *args: _u32(*args, reader=code_reader)
- read_bytes = lambda *args: _read_bytes(*args, reader=code_reader)
- read_byte = lambda *args: _read_byte(*args, reader=code_reader)
-
- # minor_version + major_version
- read_bytes(2 + 2)
-
- # Constant pool
- int_count = u30()
- self.constant_ints = [0]
- for _c in range(1, int_count):
- self.constant_ints.append(s32())
- self.constant_uints = [0]
- uint_count = u30()
- for _c in range(1, uint_count):
- self.constant_uints.append(u32())
- double_count = u30()
- read_bytes(max(0, (double_count - 1)) * 8)
- string_count = u30()
- self.constant_strings = ['']
- for _c in range(1, string_count):
- s = _read_string(code_reader)
- self.constant_strings.append(s)
- namespace_count = u30()
- for _c in range(1, namespace_count):
- read_bytes(1) # kind
- u30() # name
- ns_set_count = u30()
- for _c in range(1, ns_set_count):
- count = u30()
- for _c2 in range(count):
- u30()
- multiname_count = u30()
- MULTINAME_SIZES = {
- 0x07: 2, # QName
- 0x0d: 2, # QNameA
- 0x0f: 1, # RTQName
- 0x10: 1, # RTQNameA
- 0x11: 0, # RTQNameL
- 0x12: 0, # RTQNameLA
- 0x09: 2, # Multiname
- 0x0e: 2, # MultinameA
- 0x1b: 1, # MultinameL
- 0x1c: 1, # MultinameLA
- }
- self.multinames = ['']
- for _c in range(1, multiname_count):
- kind = u30()
- assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind
- if kind == 0x07:
- u30() # namespace_idx
- name_idx = u30()
- self.multinames.append(self.constant_strings[name_idx])
- elif kind == 0x09:
- name_idx = u30()
- u30()
- self.multinames.append(self.constant_strings[name_idx])
- else:
- self.multinames.append(_Multiname(kind))
- for _c2 in range(MULTINAME_SIZES[kind]):
- u30()
-
- # Methods
- method_count = u30()
- MethodInfo = collections.namedtuple(
- 'MethodInfo',
- ['NEED_ARGUMENTS', 'NEED_REST'])
- method_infos = []
- for method_id in range(method_count):
- param_count = u30()
- u30() # return type
- for _ in range(param_count):
- u30() # param type
- u30() # name index (always 0 for youtube)
- flags = read_byte()
- if flags & 0x08 != 0:
- # Options present
- option_count = u30()
- for c in range(option_count):
- u30() # val
- read_bytes(1) # kind
- if flags & 0x80 != 0:
- # Param names present
- for _ in range(param_count):
- u30() # param name
- mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
- method_infos.append(mi)
-
- # Metadata
- metadata_count = u30()
- for _c in range(metadata_count):
- u30() # name
- item_count = u30()
- for _c2 in range(item_count):
- u30() # key
- u30() # value
-
- def parse_traits_info():
- trait_name_idx = u30()
- kind_full = read_byte()
- kind = kind_full & 0x0f
- attrs = kind_full >> 4
- methods = {}
- constants = None
- if kind == 0x00: # Slot
- u30() # Slot id
- u30() # type_name_idx
- vindex = u30()
- if vindex != 0:
- read_byte() # vkind
- elif kind == 0x06: # Const
- u30() # Slot id
- u30() # type_name_idx
- vindex = u30()
- vkind = 'any'
- if vindex != 0:
- vkind = read_byte()
- if vkind == 0x03: # Constant_Int
- value = self.constant_ints[vindex]
- elif vkind == 0x04: # Constant_UInt
- value = self.constant_uints[vindex]
- else:
- return {}, None # Ignore silently for now
- constants = {self.multinames[trait_name_idx]: value}
- elif kind in (0x01, 0x02, 0x03): # Method / Getter / Setter
- u30() # disp_id
- method_idx = u30()
- methods[self.multinames[trait_name_idx]] = method_idx
- elif kind == 0x04: # Class
- u30() # slot_id
- u30() # classi
- elif kind == 0x05: # Function
- u30() # slot_id
- function_idx = u30()
- methods[function_idx] = self.multinames[trait_name_idx]
- else:
- raise ExtractorError('Unsupported trait kind %d' % kind)
-
- if attrs & 0x4 != 0: # Metadata present
- metadata_count = u30()
- for _c3 in range(metadata_count):
- u30() # metadata index
-
- return methods, constants
-
- # Classes
- class_count = u30()
- classes = []
- for class_id in range(class_count):
- name_idx = u30()
-
- cname = self.multinames[name_idx]
- avm_class = _AVMClass(name_idx, cname)
- classes.append(avm_class)
-
- u30() # super_name idx
- flags = read_byte()
- if flags & 0x08 != 0: # Protected namespace is present
- u30() # protected_ns_idx
- intrf_count = u30()
- for _c2 in range(intrf_count):
- u30()
- u30() # iinit
- trait_count = u30()
- for _c2 in range(trait_count):
- trait_methods, trait_constants = parse_traits_info()
- avm_class.register_methods(trait_methods)
- if trait_constants:
- avm_class.constants.update(trait_constants)
-
- assert len(classes) == class_count
- self._classes_by_name = dict((c.name, c) for c in classes)
-
- for avm_class in classes:
- avm_class.cinit_idx = u30()
- trait_count = u30()
- for _c2 in range(trait_count):
- trait_methods, trait_constants = parse_traits_info()
- avm_class.register_methods(trait_methods)
- if trait_constants:
- avm_class.constants.update(trait_constants)
-
- # Scripts
- script_count = u30()
- for _c in range(script_count):
- u30() # init
- trait_count = u30()
- for _c2 in range(trait_count):
- parse_traits_info()
-
- # Method bodies
- method_body_count = u30()
- Method = collections.namedtuple('Method', ['code', 'local_count'])
- self._all_methods = []
- for _c in range(method_body_count):
- method_idx = u30()
- u30() # max_stack
- local_count = u30()
- u30() # init_scope_depth
- u30() # max_scope_depth
- code_length = u30()
- code = read_bytes(code_length)
- m = Method(code, local_count)
- self._all_methods.append(m)
- for avm_class in classes:
- if method_idx in avm_class.method_idxs:
- avm_class.methods[avm_class.method_idxs[method_idx]] = m
- exception_count = u30()
- for _c2 in range(exception_count):
- u30() # from
- u30() # to
- u30() # target
- u30() # exc_type
- u30() # var_name
- trait_count = u30()
- for _c2 in range(trait_count):
- parse_traits_info()
-
- assert p + code_reader.tell() == len(code_tag)
-
- def patch_function(self, avm_class, func_name, f):
- self._patched_functions[(avm_class, func_name)] = f
-
- def extract_class(self, class_name, call_cinit=True):
- try:
- res = self._classes_by_name[class_name]
- except KeyError:
- raise ExtractorError('Class %r not found' % class_name)
-
- if call_cinit and hasattr(res, 'cinit_idx'):
- res.register_methods({'$cinit': res.cinit_idx})
- res.methods['$cinit'] = self._all_methods[res.cinit_idx]
- cinit = self.extract_function(res, '$cinit')
- cinit([])
-
- return res
-
- def extract_function(self, avm_class, func_name):
- p = self._patched_functions.get((avm_class, func_name))
- if p:
- return p
- if func_name in avm_class.method_pyfunctions:
- return avm_class.method_pyfunctions[func_name]
- if func_name in self._classes_by_name:
- return self._classes_by_name[func_name].make_object()
- if func_name not in avm_class.methods:
- raise ExtractorError('Cannot find function %s.%s' % (
- avm_class.name, func_name))
- m = avm_class.methods[func_name]
-
- def resfunc(args):
- # Helper functions
- coder = io.BytesIO(m.code)
- s24 = lambda: _s24(coder)
- u30 = lambda: _u30(coder)
-
- registers = [avm_class.variables] + list(args) + [None] * m.local_count
- stack = []
- scopes = collections.deque([
- self._classes_by_name, avm_class.constants, avm_class.variables])
- while True:
- opcode = _read_byte(coder)
- if opcode == 9: # label
- pass # Spec says: "Do nothing."
- elif opcode == 16: # jump
- offset = s24()
- coder.seek(coder.tell() + offset)
- elif opcode == 17: # iftrue
- offset = s24()
- value = stack.pop()
- if value:
- coder.seek(coder.tell() + offset)
- elif opcode == 18: # iffalse
- offset = s24()
- value = stack.pop()
- if not value:
- coder.seek(coder.tell() + offset)
- elif opcode == 19: # ifeq
- offset = s24()
- value2 = stack.pop()
- value1 = stack.pop()
- if value2 == value1:
- coder.seek(coder.tell() + offset)
- elif opcode == 20: # ifne
- offset = s24()
- value2 = stack.pop()
- value1 = stack.pop()
- if value2 != value1:
- coder.seek(coder.tell() + offset)
- elif opcode == 21: # iflt
- offset = s24()
- value2 = stack.pop()
- value1 = stack.pop()
- if value1 < value2:
- coder.seek(coder.tell() + offset)
- elif opcode == 32: # pushnull
- stack.append(None)
- elif opcode == 33: # pushundefined
- stack.append(undefined)
- elif opcode == 36: # pushbyte
- v = _read_byte(coder)
- stack.append(v)
- elif opcode == 37: # pushshort
- v = u30()
- stack.append(v)
- elif opcode == 38: # pushtrue
- stack.append(True)
- elif opcode == 39: # pushfalse
- stack.append(False)
- elif opcode == 40: # pushnan
- stack.append(float('NaN'))
- elif opcode == 42: # dup
- value = stack[-1]
- stack.append(value)
- elif opcode == 44: # pushstring
- idx = u30()
- stack.append(self.constant_strings[idx])
- elif opcode == 48: # pushscope
- new_scope = stack.pop()
- scopes.append(new_scope)
- elif opcode == 66: # construct
- arg_count = u30()
- args = list(reversed(
- [stack.pop() for _ in range(arg_count)]))
- obj = stack.pop()
- res = obj.avm_class.make_object()
- stack.append(res)
- elif opcode == 70: # callproperty
- index = u30()
- mname = self.multinames[index]
- arg_count = u30()
- args = list(reversed(
- [stack.pop() for _ in range(arg_count)]))
- obj = stack.pop()
-
- if obj == StringClass:
- if mname == 'String':
- assert len(args) == 1
- assert isinstance(args[0], (
- int, compat_str, _Undefined))
- if args[0] == undefined:
- res = 'undefined'
- else:
- res = compat_str(args[0])
- stack.append(res)
- continue
- else:
- raise NotImplementedError(
- 'Function String.%s is not yet implemented'
- % mname)
- elif isinstance(obj, _AVMClass_Object):
- func = self.extract_function(obj.avm_class, mname)
- res = func(args)
- stack.append(res)
- continue
- elif isinstance(obj, _AVMClass):
- func = self.extract_function(obj, mname)
- res = func(args)
- stack.append(res)
- continue
- elif isinstance(obj, _ScopeDict):
- if mname in obj.avm_class.method_names:
- func = self.extract_function(obj.avm_class, mname)
- res = func(args)
- else:
- res = obj[mname]
- stack.append(res)
- continue
- elif isinstance(obj, compat_str):
- if mname == 'split':
- assert len(args) == 1
- assert isinstance(args[0], compat_str)
- if args[0] == '':
- res = list(obj)
- else:
- res = obj.split(args[0])
- stack.append(res)
- continue
- elif mname == 'charCodeAt':
- assert len(args) <= 1
- idx = 0 if len(args) == 0 else args[0]
- assert isinstance(idx, int)
- res = ord(obj[idx])
- stack.append(res)
- continue
- elif isinstance(obj, list):
- if mname == 'slice':
- assert len(args) == 1
- assert isinstance(args[0], int)
- res = obj[args[0]:]
- stack.append(res)
- continue
- elif mname == 'join':
- assert len(args) == 1
- assert isinstance(args[0], compat_str)
- res = args[0].join(obj)
- stack.append(res)
- continue
- raise NotImplementedError(
- 'Unsupported property %r on %r'
- % (mname, obj))
- elif opcode == 71: # returnvoid
- res = undefined
- return res
- elif opcode == 72: # returnvalue
- res = stack.pop()
- return res
- elif opcode == 73: # constructsuper
- # Not yet implemented, just hope it works without it
- arg_count = u30()
- args = list(reversed(
- [stack.pop() for _ in range(arg_count)]))
- obj = stack.pop()
- elif opcode == 74: # constructproperty
- index = u30()
- arg_count = u30()
- args = list(reversed(
- [stack.pop() for _ in range(arg_count)]))
- obj = stack.pop()
-
- mname = self.multinames[index]
- assert isinstance(obj, _AVMClass)
-
- # We do not actually call the constructor for now;
- # we just pretend it does nothing
- stack.append(obj.make_object())
- elif opcode == 79: # callpropvoid
- index = u30()
- mname = self.multinames[index]
- arg_count = u30()
- args = list(reversed(
- [stack.pop() for _ in range(arg_count)]))
- obj = stack.pop()
- if isinstance(obj, _AVMClass_Object):
- func = self.extract_function(obj.avm_class, mname)
- res = func(args)
- assert res is undefined
- continue
- if isinstance(obj, _ScopeDict):
- assert mname in obj.avm_class.method_names
- func = self.extract_function(obj.avm_class, mname)
- res = func(args)
- assert res is undefined
- continue
- if mname == 'reverse':
- assert isinstance(obj, list)
- obj.reverse()
- else:
- raise NotImplementedError(
- 'Unsupported (void) property %r on %r'
- % (mname, obj))
- elif opcode == 86: # newarray
- arg_count = u30()
- arr = []
- for i in range(arg_count):
- arr.append(stack.pop())
- arr = arr[::-1]
- stack.append(arr)
- elif opcode == 93: # findpropstrict
- index = u30()
- mname = self.multinames[index]
- for s in reversed(scopes):
- if mname in s:
- res = s
- break
- else:
- res = scopes[0]
- if mname not in res and mname in _builtin_classes:
- stack.append(_builtin_classes[mname])
- else:
- stack.append(res[mname])
- elif opcode == 94: # findproperty
- index = u30()
- mname = self.multinames[index]
- for s in reversed(scopes):
- if mname in s:
- res = s
- break
- else:
- res = avm_class.variables
- stack.append(res)
- elif opcode == 96: # getlex
- index = u30()
- mname = self.multinames[index]
- for s in reversed(scopes):
- if mname in s:
- scope = s
- break
- else:
- scope = avm_class.variables
-
- if mname in scope:
- res = scope[mname]
- elif mname in _builtin_classes:
- res = _builtin_classes[mname]
- else:
- # Assume uninitialized
- # TODO warn here
- res = undefined
- stack.append(res)
- elif opcode == 97: # setproperty
- index = u30()
- value = stack.pop()
- idx = self.multinames[index]
- if isinstance(idx, _Multiname):
- idx = stack.pop()
- obj = stack.pop()
- obj[idx] = value
- elif opcode == 98: # getlocal
- index = u30()
- stack.append(registers[index])
- elif opcode == 99: # setlocal
- index = u30()
- value = stack.pop()
- registers[index] = value
- elif opcode == 102: # getproperty
- index = u30()
- pname = self.multinames[index]
- if pname == 'length':
- obj = stack.pop()
- assert isinstance(obj, (compat_str, list))
- stack.append(len(obj))
- elif isinstance(pname, compat_str): # Member access
- obj = stack.pop()
- if isinstance(obj, _AVMClass):
- res = obj.static_properties[pname]
- stack.append(res)
- continue
-
- assert isinstance(obj, (dict, _ScopeDict)),\
- 'Accessing member %r on %r' % (pname, obj)
- res = obj.get(pname, undefined)
- stack.append(res)
- else: # Assume attribute access
- idx = stack.pop()
- assert isinstance(idx, int)
- obj = stack.pop()
- assert isinstance(obj, list)
- stack.append(obj[idx])
- elif opcode == 104: # initproperty
- index = u30()
- value = stack.pop()
- idx = self.multinames[index]
- if isinstance(idx, _Multiname):
- idx = stack.pop()
- obj = stack.pop()
- obj[idx] = value
- elif opcode == 115: # convert_
- value = stack.pop()
- intvalue = int(value)
- stack.append(intvalue)
- elif opcode == 128: # coerce
- u30()
- elif opcode == 130: # coerce_a
- value = stack.pop()
- # um, yes, it's any value
- stack.append(value)
- elif opcode == 133: # coerce_s
- assert isinstance(stack[-1], (type(None), compat_str))
- elif opcode == 147: # decrement
- value = stack.pop()
- assert isinstance(value, int)
- stack.append(value - 1)
- elif opcode == 149: # typeof
- value = stack.pop()
- return {
- _Undefined: 'undefined',
- compat_str: 'String',
- int: 'Number',
- float: 'Number',
- }[type(value)]
- elif opcode == 160: # add
- value2 = stack.pop()
- value1 = stack.pop()
- res = value1 + value2
- stack.append(res)
- elif opcode == 161: # subtract
- value2 = stack.pop()
- value1 = stack.pop()
- res = value1 - value2
- stack.append(res)
- elif opcode == 162: # multiply
- value2 = stack.pop()
- value1 = stack.pop()
- res = value1 * value2
- stack.append(res)
- elif opcode == 164: # modulo
- value2 = stack.pop()
- value1 = stack.pop()
- res = value1 % value2
- stack.append(res)
- elif opcode == 168: # bitand
- value2 = stack.pop()
- value1 = stack.pop()
- assert isinstance(value1, int)
- assert isinstance(value2, int)
- res = value1 & value2
- stack.append(res)
- elif opcode == 171: # equals
- value2 = stack.pop()
- value1 = stack.pop()
- result = value1 == value2
- stack.append(result)
- elif opcode == 175: # greaterequals
- value2 = stack.pop()
- value1 = stack.pop()
- result = value1 >= value2
- stack.append(result)
- elif opcode == 192: # increment_i
- value = stack.pop()
- assert isinstance(value, int)
- stack.append(value + 1)
- elif opcode == 208: # getlocal_0
- stack.append(registers[0])
- elif opcode == 209: # getlocal_1
- stack.append(registers[1])
- elif opcode == 210: # getlocal_2
- stack.append(registers[2])
- elif opcode == 211: # getlocal_3
- stack.append(registers[3])
- elif opcode == 212: # setlocal_0
- registers[0] = stack.pop()
- elif opcode == 213: # setlocal_1
- registers[1] = stack.pop()
- elif opcode == 214: # setlocal_2
- registers[2] = stack.pop()
- elif opcode == 215: # setlocal_3
- registers[3] = stack.pop()
- else:
- raise NotImplementedError(
- 'Unsupported opcode %d' % opcode)
-
- avm_class.method_pyfunctions[func_name] = resfunc
- return resfunc
diff --git a/youtube_dl/update.py b/youtube_dl/update.py
deleted file mode 100644
index ebce966..0000000
--- a/youtube_dl/update.py
+++ /dev/null
@@ -1,187 +0,0 @@
-from __future__ import unicode_literals
-
-import io
-import json
-import traceback
-import hashlib
-import os
-import subprocess
-import sys
-from zipimport import zipimporter
-
-from .utils import encode_compat_str
-
-from .version import __version__
-
-
-def rsa_verify(message, signature, key):
- from hashlib import sha256
- assert isinstance(message, bytes)
- byte_size = (len(bin(key[0])) - 2 + 8 - 1) // 8
- signature = ('%x' % pow(int(signature, 16), key[1], key[0])).encode()
- signature = (byte_size * 2 - len(signature)) * b'0' + signature
- asn1 = b'3031300d060960864801650304020105000420'
- asn1 += sha256(message).hexdigest().encode()
- if byte_size < len(asn1) // 2 + 11:
- return False
- expected = b'0001' + (byte_size - len(asn1) // 2 - 3) * b'ff' + b'00' + asn1
- return expected == signature
-
-
-def update_self(to_screen, verbose, opener):
- """Update the program file with the latest version from the repository"""
-
- UPDATE_URL = 'https://rg3.github.io/youtube-dl/update/'
- VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
- JSON_URL = UPDATE_URL + 'versions.json'
- UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
-
- if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, 'frozen'):
- to_screen('It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
- return
-
- # Check if there is a new version
- try:
- newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
- except Exception:
- if verbose:
- to_screen(encode_compat_str(traceback.format_exc()))
- to_screen('ERROR: can\'t find the current version. Please try again later.')
- return
- if newversion == __version__:
- to_screen('youtube-dl is up-to-date (' + __version__ + ')')
- return
-
- # Download and check versions info
- try:
- versions_info = opener.open(JSON_URL).read().decode('utf-8')
- versions_info = json.loads(versions_info)
- except Exception:
- if verbose:
- to_screen(encode_compat_str(traceback.format_exc()))
- to_screen('ERROR: can\'t obtain versions info. Please try again later.')
- return
- if 'signature' not in versions_info:
- to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
- return
- signature = versions_info['signature']
- del versions_info['signature']
- if not rsa_verify(json.dumps(versions_info, sort_keys=True).encode('utf-8'), signature, UPDATES_RSA_KEY):
- to_screen('ERROR: the versions file signature is invalid. Aborting.')
- return
-
- version_id = versions_info['latest']
-
- def version_tuple(version_str):
- return tuple(map(int, version_str.split('.')))
- if version_tuple(__version__) >= version_tuple(version_id):
- to_screen('youtube-dl is up to date (%s)' % __version__)
- return
-
- to_screen('Updating to version ' + version_id + ' ...')
- version = versions_info['versions'][version_id]
-
- print_notes(to_screen, versions_info['versions'])
-
- # sys.executable is set to the full pathname of the exe-file for py2exe
- filename = sys.executable if hasattr(sys, 'frozen') else sys.argv[0]
-
- if not os.access(filename, os.W_OK):
- to_screen('ERROR: no write permissions on %s' % filename)
- return
-
- # Py2EXE
- if hasattr(sys, 'frozen'):
- exe = filename
- directory = os.path.dirname(exe)
- if not os.access(directory, os.W_OK):
- to_screen('ERROR: no write permissions on %s' % directory)
- return
-
- try:
- urlh = opener.open(version['exe'][0])
- newcontent = urlh.read()
- urlh.close()
- except (IOError, OSError):
- if verbose:
- to_screen(encode_compat_str(traceback.format_exc()))
- to_screen('ERROR: unable to download latest version')
- return
-
- newcontent_hash = hashlib.sha256(newcontent).hexdigest()
- if newcontent_hash != version['exe'][1]:
- to_screen('ERROR: the downloaded file hash does not match. Aborting.')
- return
-
- try:
- with open(exe + '.new', 'wb') as outf:
- outf.write(newcontent)
- except (IOError, OSError):
- if verbose:
- to_screen(encode_compat_str(traceback.format_exc()))
- to_screen('ERROR: unable to write the new version')
- return
-
- try:
- bat = os.path.join(directory, 'youtube-dl-updater.bat')
- with io.open(bat, 'w') as batfile:
- batfile.write('''
-@echo off
-echo Waiting for file handle to be closed ...
-ping 127.0.0.1 -n 5 -w 1000 > NUL
-move /Y "%s.new" "%s" > NUL
-echo Updated youtube-dl to version %s.
-start /b "" cmd /c del "%%~f0"&exit /b"
- \n''' % (exe, exe, version_id))
-
- subprocess.Popen([bat]) # Continues to run in the background
- return # Do not show premature success messages
- except (IOError, OSError):
- if verbose:
- to_screen(encode_compat_str(traceback.format_exc()))
- to_screen('ERROR: unable to overwrite current version')
- return
-
- # Zip unix package
- elif isinstance(globals().get('__loader__'), zipimporter):
- try:
- urlh = opener.open(version['bin'][0])
- newcontent = urlh.read()
- urlh.close()
- except (IOError, OSError):
- if verbose:
- to_screen(encode_compat_str(traceback.format_exc()))
- to_screen('ERROR: unable to download latest version')
- return
-
- newcontent_hash = hashlib.sha256(newcontent).hexdigest()
- if newcontent_hash != version['bin'][1]:
- to_screen('ERROR: the downloaded file hash does not match. Aborting.')
- return
-
- try:
- with open(filename, 'wb') as outf:
- outf.write(newcontent)
- except (IOError, OSError):
- if verbose:
- to_screen(encode_compat_str(traceback.format_exc()))
- to_screen('ERROR: unable to overwrite current version')
- return
-
- to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.')
-
-
-def get_notes(versions, fromVersion):
- notes = []
- for v, vdata in sorted(versions.items()):
- if v > fromVersion:
- notes.extend(vdata.get('notes', []))
- return notes
-
-
-def print_notes(to_screen, versions, fromVersion=__version__):
- notes = get_notes(versions, fromVersion)
- if notes:
- to_screen('PLEASE NOTE:')
- for note in notes:
- to_screen(note)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
deleted file mode 100644
index d2d3c1a..0000000
--- a/youtube_dl/utils.py
+++ /dev/null
@@ -1,3990 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-import base64
-import binascii
-import calendar
-import codecs
-import contextlib
-import ctypes
-import datetime
-import email.utils
-import email.header
-import errno
-import functools
-import gzip
-import io
-import itertools
-import json
-import locale
-import math
-import operator
-import os
-import platform
-import random
-import re
-import socket
-import ssl
-import subprocess
-import sys
-import tempfile
-import traceback
-import xml.etree.ElementTree
-import zlib
-
-from .compat import (
- compat_HTMLParseError,
- compat_HTMLParser,
- compat_basestring,
- compat_chr,
- compat_cookiejar,
- compat_ctypes_WINFUNCTYPE,
- compat_etree_fromstring,
- compat_expanduser,
- compat_html_entities,
- compat_html_entities_html5,
- compat_http_client,
- compat_kwargs,
- compat_os_name,
- compat_parse_qs,
- compat_shlex_quote,
- compat_str,
- compat_struct_pack,
- compat_struct_unpack,
- compat_urllib_error,
- compat_urllib_parse,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
- compat_urllib_parse_unquote_plus,
- compat_urllib_request,
- compat_urlparse,
- compat_xpath,
-)
-
-from .socks import (
- ProxyType,
- sockssocket,
-)
-
-
-def register_socks_protocols():
- # "Register" SOCKS protocols
- # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
- # URLs with protocols not in urlparse.uses_netloc are not handled correctly
- for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
- if scheme not in compat_urlparse.uses_netloc:
- compat_urlparse.uses_netloc.append(scheme)
-
-
-# This is not clearly defined otherwise
-compiled_regex_type = type(re.compile(''))
-
-std_headers = {
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0',
- 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
- 'Accept-Encoding': 'gzip, deflate',
- 'Accept-Language': 'en-us,en;q=0.5',
-}
-
-
-USER_AGENTS = {
- 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
-}
-
-
-NO_DEFAULT = object()
-
-ENGLISH_MONTH_NAMES = [
- 'January', 'February', 'March', 'April', 'May', 'June',
- 'July', 'August', 'September', 'October', 'November', 'December']
-
-MONTH_NAMES = {
- 'en': ENGLISH_MONTH_NAMES,
- 'fr': [
- 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
- 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
-}
-
-KNOWN_EXTENSIONS = (
- 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
- 'flv', 'f4v', 'f4a', 'f4b',
- 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
- 'mkv', 'mka', 'mk3d',
- 'avi', 'divx',
- 'mov',
- 'asf', 'wmv', 'wma',
- '3gp', '3g2',
- 'mp3',
- 'flac',
- 'ape',
- 'wav',
- 'f4f', 'f4m', 'm3u8', 'smil')
-
-# needed for sanitizing filenames in restricted mode
-ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
- itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
- 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
-
-DATE_FORMATS = (
- '%d %B %Y',
- '%d %b %Y',
- '%B %d %Y',
- '%B %dst %Y',
- '%B %dnd %Y',
- '%B %dth %Y',
- '%b %d %Y',
- '%b %dst %Y',
- '%b %dnd %Y',
- '%b %dth %Y',
- '%b %dst %Y %I:%M',
- '%b %dnd %Y %I:%M',
- '%b %dth %Y %I:%M',
- '%Y %m %d',
- '%Y-%m-%d',
- '%Y/%m/%d',
- '%Y/%m/%d %H:%M',
- '%Y/%m/%d %H:%M:%S',
- '%Y-%m-%d %H:%M',
- '%Y-%m-%d %H:%M:%S',
- '%Y-%m-%d %H:%M:%S.%f',
- '%d.%m.%Y %H:%M',
- '%d.%m.%Y %H.%M',
- '%Y-%m-%dT%H:%M:%SZ',
- '%Y-%m-%dT%H:%M:%S.%fZ',
- '%Y-%m-%dT%H:%M:%S.%f0Z',
- '%Y-%m-%dT%H:%M:%S',
- '%Y-%m-%dT%H:%M:%S.%f',
- '%Y-%m-%dT%H:%M',
- '%b %d %Y at %H:%M',
- '%b %d %Y at %H:%M:%S',
- '%B %d %Y at %H:%M',
- '%B %d %Y at %H:%M:%S',
-)
-
-DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
-DATE_FORMATS_DAY_FIRST.extend([
- '%d-%m-%Y',
- '%d.%m.%Y',
- '%d.%m.%y',
- '%d/%m/%Y',
- '%d/%m/%y',
- '%d/%m/%Y %H:%M:%S',
-])
-
-DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
-DATE_FORMATS_MONTH_FIRST.extend([
- '%m-%d-%Y',
- '%m.%d.%Y',
- '%m/%d/%Y',
- '%m/%d/%y',
- '%m/%d/%Y %H:%M:%S',
-])
-
-PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
-JSON_LD_RE = r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
-
-
-def preferredencoding():
- """Get preferred encoding.
-
- Returns the best encoding scheme for the system, based on
- locale.getpreferredencoding() and some further tweaks.
- """
- try:
- pref = locale.getpreferredencoding()
- 'TEST'.encode(pref)
- except Exception:
- pref = 'UTF-8'
-
- return pref
-
-
-def write_json_file(obj, fn):
- """ Encode obj as JSON and write it to fn, atomically if possible """
-
- fn = encodeFilename(fn)
- if sys.version_info < (3, 0) and sys.platform != 'win32':
- encoding = get_filesystem_encoding()
- # os.path.basename returns a bytes object, but NamedTemporaryFile
- # will fail if the filename contains non ascii characters unless we
- # use a unicode object
- path_basename = lambda f: os.path.basename(fn).decode(encoding)
- # the same for os.path.dirname
- path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
- else:
- path_basename = os.path.basename
- path_dirname = os.path.dirname
-
- args = {
- 'suffix': '.tmp',
- 'prefix': path_basename(fn) + '.',
- 'dir': path_dirname(fn),
- 'delete': False,
- }
-
- # In Python 2.x, json.dump expects a bytestream.
- # In Python 3.x, it writes to a character stream
- if sys.version_info < (3, 0):
- args['mode'] = 'wb'
- else:
- args.update({
- 'mode': 'w',
- 'encoding': 'utf-8',
- })
-
- tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
-
- try:
- with tf:
- json.dump(obj, tf)
- if sys.platform == 'win32':
- # Need to remove existing file on Windows, else os.rename raises
- # WindowsError or FileExistsError.
- try:
- os.unlink(fn)
- except OSError:
- pass
- os.rename(tf.name, fn)
- except Exception:
- try:
- os.remove(tf.name)
- except OSError:
- pass
- raise
-
-
-if sys.version_info >= (2, 7):
- def find_xpath_attr(node, xpath, key, val=None):
- """ Find the xpath xpath[@key=val] """
- assert re.match(r'^[a-zA-Z_-]+$', key)
- expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
- return node.find(expr)
-else:
- def find_xpath_attr(node, xpath, key, val=None):
- for f in node.findall(compat_xpath(xpath)):
- if key not in f.attrib:
- continue
- if val is None or f.attrib.get(key) == val:
- return f
- return None
-
-# On python2.6 the xml.etree.ElementTree.Element methods don't support
-# the namespace parameter
-
-
-def xpath_with_ns(path, ns_map):
- components = [c.split(':') for c in path.split('/')]
- replaced = []
- for c in components:
- if len(c) == 1:
- replaced.append(c[0])
- else:
- ns, tag = c
- replaced.append('{%s}%s' % (ns_map[ns], tag))
- return '/'.join(replaced)
-
-
-def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
- def _find_xpath(xpath):
- return node.find(compat_xpath(xpath))
-
- if isinstance(xpath, (str, compat_str)):
- n = _find_xpath(xpath)
- else:
- for xp in xpath:
- n = _find_xpath(xp)
- if n is not None:
- break
-
- if n is None:
- if default is not NO_DEFAULT:
- return default
- elif fatal:
- name = xpath if name is None else name
- raise ExtractorError('Could not find XML element %s' % name)
- else:
- return None
- return n
-
-
-def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
- n = xpath_element(node, xpath, name, fatal=fatal, default=default)
- if n is None or n == default:
- return n
- if n.text is None:
- if default is not NO_DEFAULT:
- return default
- elif fatal:
- name = xpath if name is None else name
- raise ExtractorError('Could not find XML element\'s text %s' % name)
- else:
- return None
- return n.text
-
-
-def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
- n = find_xpath_attr(node, xpath, key)
- if n is None:
- if default is not NO_DEFAULT:
- return default
- elif fatal:
- name = '%s[@%s]' % (xpath, key) if name is None else name
- raise ExtractorError('Could not find XML attribute %s' % name)
- else:
- return None
- return n.attrib[key]
-
-
-def get_element_by_id(id, html):
- """Return the content of the tag with the specified ID in the passed HTML document"""
- return get_element_by_attribute('id', id, html)
-
-
-def get_element_by_class(class_name, html):
- """Return the content of the first tag with the specified class in the passed HTML document"""
- retval = get_elements_by_class(class_name, html)
- return retval[0] if retval else None
-
-
-def get_element_by_attribute(attribute, value, html, escape_value=True):
- retval = get_elements_by_attribute(attribute, value, html, escape_value)
- return retval[0] if retval else None
-
-
-def get_elements_by_class(class_name, html):
- """Return the content of all tags with the specified class in the passed HTML document as a list"""
- return get_elements_by_attribute(
- 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
- html, escape_value=False)
-
-
-def get_elements_by_attribute(attribute, value, html, escape_value=True):
- """Return the content of the tag with the specified attribute in the passed HTML document"""
-
- value = re.escape(value) if escape_value else value
-
- retlist = []
- for m in re.finditer(r'''(?xs)
- <([a-zA-Z0-9:._-]+)
- (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
- \s+%s=['"]?%s['"]?
- (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
- \s*>
- (?P<content>.*?)
- </\1>
- ''' % (re.escape(attribute), value), html):
- res = m.group('content')
-
- if res.startswith('"') or res.startswith("'"):
- res = res[1:-1]
-
- retlist.append(unescapeHTML(res))
-
- return retlist
-
-
-class HTMLAttributeParser(compat_HTMLParser):
- """Trivial HTML parser to gather the attributes for a single element"""
- def __init__(self):
- self.attrs = {}
- compat_HTMLParser.__init__(self)
-
- def handle_starttag(self, tag, attrs):
- self.attrs = dict(attrs)
-
-
-def extract_attributes(html_element):
- """Given a string for an HTML element such as
- <el
- a="foo" B="bar" c="&98;az" d=boz
- empty= noval entity="&amp;"
- sq='"' dq="'"
- >
- Decode and return a dictionary of attributes.
- {
- 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
- 'empty': '', 'noval': None, 'entity': '&',
- 'sq': '"', 'dq': '\''
- }.
- NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
- but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
- """
- parser = HTMLAttributeParser()
- try:
- parser.feed(html_element)
- parser.close()
- # Older Python may throw HTMLParseError in case of malformed HTML
- except compat_HTMLParseError:
- pass
- return parser.attrs
-
-
-def clean_html(html):
- """Clean an HTML snippet into a readable string"""
-
- if html is None: # Convenience for sanitizing descriptions etc.
- return html
-
- # Newline vs <br />
- html = html.replace('\n', ' ')
- html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
- html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
- # Strip html tags
- html = re.sub('<.*?>', '', html)
- # Replace html entities
- html = unescapeHTML(html)
- return html.strip()
-
-
-def sanitize_open(filename, open_mode):
- """Try to open the given filename, and slightly tweak it if this fails.
-
- Attempts to open the given filename. If this fails, it tries to change
- the filename slightly, step by step, until it's either able to open it
- or it fails and raises a final exception, like the standard open()
- function.
-
- It returns the tuple (stream, definitive_file_name).
- """
- try:
- if filename == '-':
- if sys.platform == 'win32':
- import msvcrt
- msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
- return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
- stream = open(encodeFilename(filename), open_mode)
- return (stream, filename)
- except (IOError, OSError) as err:
- if err.errno in (errno.EACCES,):
- raise
-
- # In case of error, try to remove win32 forbidden chars
- alt_filename = sanitize_path(filename)
- if alt_filename == filename:
- raise
- else:
- # An exception here should be caught in the caller
- stream = open(encodeFilename(alt_filename), open_mode)
- return (stream, alt_filename)
-
-
-def timeconvert(timestr):
- """Convert RFC 2822 defined time string into system timestamp"""
- timestamp = None
- timetuple = email.utils.parsedate_tz(timestr)
- if timetuple is not None:
- timestamp = email.utils.mktime_tz(timetuple)
- return timestamp
-
-
-def sanitize_filename(s, restricted=False, is_id=False):
- """Sanitizes a string so it could be used as part of a filename.
- If restricted is set, use a stricter subset of allowed characters.
- Set is_id if this is not an arbitrary string, but an ID that should be kept
- if possible.
- """
- def replace_insane(char):
- if restricted and char in ACCENT_CHARS:
- return ACCENT_CHARS[char]
- if char == '?' or ord(char) < 32 or ord(char) == 127:
- return ''
- elif char == '"':
- return '' if restricted else '\''
- elif char == ':':
- return '_-' if restricted else ' -'
- elif char in '\\/|*<>':
- return '_'
- if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
- return '_'
- if restricted and ord(char) > 127:
- return '_'
- return char
-
- # Handle timestamps
- s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
- result = ''.join(map(replace_insane, s))
- if not is_id:
- while '__' in result:
- result = result.replace('__', '_')
- result = result.strip('_')
- # Common case of "Foreign band name - English song title"
- if restricted and result.startswith('-_'):
- result = result[2:]
- if result.startswith('-'):
- result = '_' + result[len('-'):]
- result = result.lstrip('.')
- if not result:
- result = '_'
- return result
-
-
-def sanitize_path(s):
- """Sanitizes and normalizes path on Windows"""
- if sys.platform != 'win32':
- return s
- drive_or_unc, _ = os.path.splitdrive(s)
- if sys.version_info < (2, 7) and not drive_or_unc:
- drive_or_unc, _ = os.path.splitunc(s)
- norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
- if drive_or_unc:
- norm_path.pop(0)
- sanitized_path = [
- path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
- for path_part in norm_path]
- if drive_or_unc:
- sanitized_path.insert(0, drive_or_unc + os.path.sep)
- return os.path.join(*sanitized_path)
-
-
-def sanitize_url(url):
- # Prepend protocol-less URLs with `http:` scheme in order to mitigate
- # the number of unwanted failures due to missing protocol
- if url.startswith('//'):
- return 'http:%s' % url
- # Fix some common typos seen so far
- COMMON_TYPOS = (
- # https://github.com/rg3/youtube-dl/issues/15649
- (r'^httpss://', r'https://'),
- # https://bx1.be/lives/direct-tv/
- (r'^rmtp([es]?)://', r'rtmp\1://'),
- )
- for mistake, fixup in COMMON_TYPOS:
- if re.match(mistake, url):
- return re.sub(mistake, fixup, url)
- return url
-
-
-def sanitized_Request(url, *args, **kwargs):
- return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
-
-
-def expand_path(s):
- """Expand shell variables and ~"""
- return os.path.expandvars(compat_expanduser(s))
-
-
-def orderedSet(iterable):
- """ Remove all duplicates from the input iterable """
- res = []
- for el in iterable:
- if el not in res:
- res.append(el)
- return res
-
-
-def _htmlentity_transform(entity_with_semicolon):
- """Transforms an HTML entity to a character."""
- entity = entity_with_semicolon[:-1]
-
- # Known non-numeric HTML entity
- if entity in compat_html_entities.name2codepoint:
- return compat_chr(compat_html_entities.name2codepoint[entity])
-
- # TODO: HTML5 allows entities without a semicolon. For example,
- # '&Eacuteric' should be decoded as 'Éric'.
- if entity_with_semicolon in compat_html_entities_html5:
- return compat_html_entities_html5[entity_with_semicolon]
-
- mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
- if mobj is not None:
- numstr = mobj.group(1)
- if numstr.startswith('x'):
- base = 16
- numstr = '0%s' % numstr
- else:
- base = 10
- # See https://github.com/rg3/youtube-dl/issues/7518
- try:
- return compat_chr(int(numstr, base))
- except ValueError:
- pass
-
- # Unknown entity in name, return its literal representation
- return '&%s;' % entity
-
-
-def unescapeHTML(s):
- if s is None:
- return None
- assert type(s) == compat_str
-
- return re.sub(
- r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
-
-
-def get_subprocess_encoding():
- if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
- # For subprocess calls, encode with locale encoding
- # Refer to http://stackoverflow.com/a/9951851/35070
- encoding = preferredencoding()
- else:
- encoding = sys.getfilesystemencoding()
- if encoding is None:
- encoding = 'utf-8'
- return encoding
-
-
-def encodeFilename(s, for_subprocess=False):
- """
- @param s The name of the file
- """
-
- assert type(s) == compat_str
-
- # Python 3 has a Unicode API
- if sys.version_info >= (3, 0):
- return s
-
- # Pass '' directly to use Unicode APIs on Windows 2000 and up
- # (Detecting Windows NT 4 is tricky because 'major >= 4' would
- # match Windows 9x series as well. Besides, NT 4 is obsolete.)
- if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
- return s
-
- # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
- if sys.platform.startswith('java'):
- return s
-
- return s.encode(get_subprocess_encoding(), 'ignore')
-
-
-def decodeFilename(b, for_subprocess=False):
-
- if sys.version_info >= (3, 0):
- return b
-
- if not isinstance(b, bytes):
- return b
-
- return b.decode(get_subprocess_encoding(), 'ignore')
-
-
-def encodeArgument(s):
- if not isinstance(s, compat_str):
- # Legacy code that uses byte strings
- # Uncomment the following line after fixing all post processors
- # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
- s = s.decode('ascii')
- return encodeFilename(s, True)
-
-
-def decodeArgument(b):
- return decodeFilename(b, True)
-
-
-def decodeOption(optval):
- if optval is None:
- return optval
- if isinstance(optval, bytes):
- optval = optval.decode(preferredencoding())
-
- assert isinstance(optval, compat_str)
- return optval
-
-
-def formatSeconds(secs):
- if secs > 3600:
- return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
- elif secs > 60:
- return '%d:%02d' % (secs // 60, secs % 60)
- else:
- return '%d' % secs
-
-
-def make_HTTPS_handler(params, **kwargs):
- opts_no_check_certificate = params.get('nocheckcertificate', False)
- if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
- context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
- if opts_no_check_certificate:
- context.check_hostname = False
- context.verify_mode = ssl.CERT_NONE
- try:
- return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
- except TypeError:
- # Python 2.7.8
- # (create_default_context present but HTTPSHandler has no context=)
- pass
-
- if sys.version_info < (3, 2):
- return YoutubeDLHTTPSHandler(params, **kwargs)
- else: # Python < 3.4
- context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- context.verify_mode = (ssl.CERT_NONE
- if opts_no_check_certificate
- else ssl.CERT_REQUIRED)
- context.set_default_verify_paths()
- return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
-
-
-def bug_reports_message():
- if ytdl_is_updateable():
- update_cmd = 'type youtube-dl -U to update'
- else:
- update_cmd = 'see https://yt-dl.org/update on how to update'
- msg = '; please report this issue on https://yt-dl.org/bug .'
- msg += ' Make sure you are using the latest version; %s.' % update_cmd
- msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
- return msg
-
-
-class YoutubeDLError(Exception):
- """Base exception for YoutubeDL errors."""
- pass
-
-
-class ExtractorError(YoutubeDLError):
- """Error during info extraction."""
-
- def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
- """ tb, if given, is the original traceback (so that it can be printed out).
- If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
- """
-
- if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
- expected = True
- if video_id is not None:
- msg = video_id + ': ' + msg
- if cause:
- msg += ' (caused by %r)' % cause
- if not expected:
- msg += bug_reports_message()
- super(ExtractorError, self).__init__(msg)
-
- self.traceback = tb
- self.exc_info = sys.exc_info() # preserve original exception
- self.cause = cause
- self.video_id = video_id
-
- def format_traceback(self):
- if self.traceback is None:
- return None
- return ''.join(traceback.format_tb(self.traceback))
-
-
-class UnsupportedError(ExtractorError):
- def __init__(self, url):
- super(UnsupportedError, self).__init__(
- 'Unsupported URL: %s' % url, expected=True)
- self.url = url
-
-
-class RegexNotFoundError(ExtractorError):
- """Error when a regex didn't match"""
- pass
-
-
-class GeoRestrictedError(ExtractorError):
- """Geographic restriction Error exception.
-
- This exception may be thrown when a video is not available from your
- geographic location due to geographic restrictions imposed by a website.
- """
- def __init__(self, msg, countries=None):
- super(GeoRestrictedError, self).__init__(msg, expected=True)
- self.msg = msg
- self.countries = countries
-
-
-class DownloadError(YoutubeDLError):
- """Download Error exception.
-
- This exception may be thrown by FileDownloader objects if they are not
- configured to continue on errors. They will contain the appropriate
- error message.
- """
-
- def __init__(self, msg, exc_info=None):
- """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
- super(DownloadError, self).__init__(msg)
- self.exc_info = exc_info
-
-
-class SameFileError(YoutubeDLError):
- """Same File exception.
-
- This exception will be thrown by FileDownloader objects if they detect
- multiple files would have to be downloaded to the same file on disk.
- """
- pass
-
-
-class PostProcessingError(YoutubeDLError):
- """Post Processing exception.
-
- This exception may be raised by PostProcessor's .run() method to
- indicate an error in the postprocessing task.
- """
-
- def __init__(self, msg):
- super(PostProcessingError, self).__init__(msg)
- self.msg = msg
-
-
-class MaxDownloadsReached(YoutubeDLError):
- """ --max-downloads limit has been reached. """
- pass
-
-
-class UnavailableVideoError(YoutubeDLError):
- """Unavailable Format exception.
-
- This exception will be thrown when a video is requested
- in a format that is not available for that video.
- """
- pass
-
-
-class ContentTooShortError(YoutubeDLError):
- """Content Too Short exception.
-
- This exception may be raised by FileDownloader objects when a file they
- download is too small for what the server announced first, indicating
- the connection was probably interrupted.
- """
-
- def __init__(self, downloaded, expected):
- super(ContentTooShortError, self).__init__(
- 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
- )
- # Both in bytes
- self.downloaded = downloaded
- self.expected = expected
-
-
-class XAttrMetadataError(YoutubeDLError):
- def __init__(self, code=None, msg='Unknown error'):
- super(XAttrMetadataError, self).__init__(msg)
- self.code = code
- self.msg = msg
-
- # Parsing code and msg
- if (self.code in (errno.ENOSPC, errno.EDQUOT) or
- 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
- self.reason = 'NO_SPACE'
- elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
- self.reason = 'VALUE_TOO_LONG'
- else:
- self.reason = 'NOT_SUPPORTED'
-
-
-class XAttrUnavailableError(YoutubeDLError):
- pass
-
-
-def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
- # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
- # expected HTTP responses to meet HTTP/1.0 or later (see also
- # https://github.com/rg3/youtube-dl/issues/6727)
- if sys.version_info < (3, 0):
- kwargs['strict'] = True
- hc = http_class(*args, **compat_kwargs(kwargs))
- source_address = ydl_handler._params.get('source_address')
-
- if source_address is not None:
- # This is to workaround _create_connection() from socket where it will try all
- # address data from getaddrinfo() including IPv6. This filters the result from
- # getaddrinfo() based on the source_address value.
- # This is based on the cpython socket.create_connection() function.
- # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
- def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
- host, port = address
- err = None
- addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
- af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
- ip_addrs = [addr for addr in addrs if addr[0] == af]
- if addrs and not ip_addrs:
- ip_version = 'v4' if af == socket.AF_INET else 'v6'
- raise socket.error(
- "No remote IP%s addresses available for connect, can't use '%s' as source address"
- % (ip_version, source_address[0]))
- for res in ip_addrs:
- af, socktype, proto, canonname, sa = res
- sock = None
- try:
- sock = socket.socket(af, socktype, proto)
- if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
- sock.settimeout(timeout)
- sock.bind(source_address)
- sock.connect(sa)
- err = None # Explicitly break reference cycle
- return sock
- except socket.error as _:
- err = _
- if sock is not None:
- sock.close()
- if err is not None:
- raise err
- else:
- raise socket.error('getaddrinfo returns an empty list')
- if hasattr(hc, '_create_connection'):
- hc._create_connection = _create_connection
- sa = (source_address, 0)
- if hasattr(hc, 'source_address'): # Python 2.7+
- hc.source_address = sa
- else: # Python 2.6
- def _hc_connect(self, *args, **kwargs):
- sock = _create_connection(
- (self.host, self.port), self.timeout, sa)
- if is_https:
- self.sock = ssl.wrap_socket(
- sock, self.key_file, self.cert_file,
- ssl_version=ssl.PROTOCOL_TLSv1)
- else:
- self.sock = sock
- hc.connect = functools.partial(_hc_connect, hc)
-
- return hc
-
-
-def handle_youtubedl_headers(headers):
- filtered_headers = headers
-
- if 'Youtubedl-no-compression' in filtered_headers:
- filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
- del filtered_headers['Youtubedl-no-compression']
-
- return filtered_headers
-
-
-class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
- """Handler for HTTP requests and responses.
-
- This class, when installed with an OpenerDirector, automatically adds
- the standard headers to every HTTP request and handles gzipped and
- deflated responses from web servers. If compression is to be avoided in
- a particular request, the original request in the program code only has
- to include the HTTP header "Youtubedl-no-compression", which will be
- removed before making the real request.
-
- Part of this code was copied from:
-
- http://techknack.net/python-urllib2-handlers/
-
- Andrew Rowls, the author of that code, agreed to release it to the
- public domain.
- """
-
- def __init__(self, params, *args, **kwargs):
- compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
- self._params = params
-
- def http_open(self, req):
- conn_class = compat_http_client.HTTPConnection
-
- socks_proxy = req.headers.get('Ytdl-socks-proxy')
- if socks_proxy:
- conn_class = make_socks_conn_class(conn_class, socks_proxy)
- del req.headers['Ytdl-socks-proxy']
-
- return self.do_open(functools.partial(
- _create_http_connection, self, conn_class, False),
- req)
-
- @staticmethod
- def deflate(data):
- try:
- return zlib.decompress(data, -zlib.MAX_WBITS)
- except zlib.error:
- return zlib.decompress(data)
-
- def http_request(self, req):
- # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
- # always respected by websites, some tend to give out URLs with non percent-encoded
- # non-ASCII characters (see telemb.py, ard.py [#3412])
- # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
- # To work around aforementioned issue we will replace request's original URL with
- # percent-encoded one
- # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
- # the code of this workaround has been moved here from YoutubeDL.urlopen()
- url = req.get_full_url()
- url_escaped = escape_url(url)
-
- # Substitute URL if any change after escaping
- if url != url_escaped:
- req = update_Request(req, url=url_escaped)
-
- for h, v in std_headers.items():
- # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
- # The dict keys are capitalized because of this bug by urllib
- if h.capitalize() not in req.headers:
- req.add_header(h, v)
-
- req.headers = handle_youtubedl_headers(req.headers)
-
- if sys.version_info < (2, 7) and '#' in req.get_full_url():
- # Python 2.6 is brain-dead when it comes to fragments
- req._Request__original = req._Request__original.partition('#')[0]
- req._Request__r_type = req._Request__r_type.partition('#')[0]
-
- return req
-
- def http_response(self, req, resp):
- old_resp = resp
- # gzip
- if resp.headers.get('Content-encoding', '') == 'gzip':
- content = resp.read()
- gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
- try:
- uncompressed = io.BytesIO(gz.read())
- except IOError as original_ioerror:
- # There may be junk add the end of the file
- # See http://stackoverflow.com/q/4928560/35070 for details
- for i in range(1, 1024):
- try:
- gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
- uncompressed = io.BytesIO(gz.read())
- except IOError:
- continue
- break
- else:
- raise original_ioerror
- resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
- resp.msg = old_resp.msg
- del resp.headers['Content-encoding']
- # deflate
- if resp.headers.get('Content-encoding', '') == 'deflate':
- gz = io.BytesIO(self.deflate(resp.read()))
- resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
- resp.msg = old_resp.msg
- del resp.headers['Content-encoding']
- # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
- # https://github.com/rg3/youtube-dl/issues/6457).
- if 300 <= resp.code < 400:
- location = resp.headers.get('Location')
- if location:
- # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
- if sys.version_info >= (3, 0):
- location = location.encode('iso-8859-1').decode('utf-8')
- else:
- location = location.decode('utf-8')
- location_escaped = escape_url(location)
- if location != location_escaped:
- del resp.headers['Location']
- if sys.version_info < (3, 0):
- location_escaped = location_escaped.encode('utf-8')
- resp.headers['Location'] = location_escaped
- return resp
-
- https_request = http_request
- https_response = http_response
-
-
-def make_socks_conn_class(base_class, socks_proxy):
- assert issubclass(base_class, (
- compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
-
- url_components = compat_urlparse.urlparse(socks_proxy)
- if url_components.scheme.lower() == 'socks5':
- socks_type = ProxyType.SOCKS5
- elif url_components.scheme.lower() in ('socks', 'socks4'):
- socks_type = ProxyType.SOCKS4
- elif url_components.scheme.lower() == 'socks4a':
- socks_type = ProxyType.SOCKS4A
-
- def unquote_if_non_empty(s):
- if not s:
- return s
- return compat_urllib_parse_unquote_plus(s)
-
- proxy_args = (
- socks_type,
- url_components.hostname, url_components.port or 1080,
- True, # Remote DNS
- unquote_if_non_empty(url_components.username),
- unquote_if_non_empty(url_components.password),
- )
-
- class SocksConnection(base_class):
- def connect(self):
- self.sock = sockssocket()
- self.sock.setproxy(*proxy_args)
- if type(self.timeout) in (int, float):
- self.sock.settimeout(self.timeout)
- self.sock.connect((self.host, self.port))
-
- if isinstance(self, compat_http_client.HTTPSConnection):
- if hasattr(self, '_context'): # Python > 2.6
- self.sock = self._context.wrap_socket(
- self.sock, server_hostname=self.host)
- else:
- self.sock = ssl.wrap_socket(self.sock)
-
- return SocksConnection
-
-
-class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
- def __init__(self, params, https_conn_class=None, *args, **kwargs):
- compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
- self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
- self._params = params
-
- def https_open(self, req):
- kwargs = {}
- conn_class = self._https_conn_class
-
- if hasattr(self, '_context'): # python > 2.6
- kwargs['context'] = self._context
- if hasattr(self, '_check_hostname'): # python 3.x
- kwargs['check_hostname'] = self._check_hostname
-
- socks_proxy = req.headers.get('Ytdl-socks-proxy')
- if socks_proxy:
- conn_class = make_socks_conn_class(conn_class, socks_proxy)
- del req.headers['Ytdl-socks-proxy']
-
- return self.do_open(functools.partial(
- _create_http_connection, self, conn_class, True),
- req, **kwargs)
-
-
-class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
- def save(self, filename=None, ignore_discard=False, ignore_expires=False):
- # Store session cookies with `expires` set to 0 instead of an empty
- # string
- for cookie in self:
- if cookie.expires is None:
- cookie.expires = 0
- compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
-
- def load(self, filename=None, ignore_discard=False, ignore_expires=False):
- compat_cookiejar.MozillaCookieJar.load(self, filename, ignore_discard, ignore_expires)
- # Session cookies are denoted by either `expires` field set to
- # an empty string or 0. MozillaCookieJar only recognizes the former
- # (see [1]). So we need force the latter to be recognized as session
- # cookies on our own.
- # Session cookies may be important for cookies-based authentication,
- # e.g. usually, when user does not check 'Remember me' check box while
- # logging in on a site, some important cookies are stored as session
- # cookies so that not recognizing them will result in failed login.
- # 1. https://bugs.python.org/issue17164
- for cookie in self:
- # Treat `expires=0` cookies as session cookies
- if cookie.expires == 0:
- cookie.expires = None
- cookie.discard = True
-
-
-class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
- def __init__(self, cookiejar=None):
- compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
-
- def http_response(self, request, response):
- # Python 2 will choke on next HTTP request in row if there are non-ASCII
- # characters in Set-Cookie HTTP header of last response (see
- # https://github.com/rg3/youtube-dl/issues/6769).
- # In order to at least prevent crashing we will percent encode Set-Cookie
- # header before HTTPCookieProcessor starts processing it.
- # if sys.version_info < (3, 0) and response.headers:
- # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
- # set_cookie = response.headers.get(set_cookie_header)
- # if set_cookie:
- # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
- # if set_cookie != set_cookie_escaped:
- # del response.headers[set_cookie_header]
- # response.headers[set_cookie_header] = set_cookie_escaped
- return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
-
- https_request = compat_urllib_request.HTTPCookieProcessor.http_request
- https_response = http_response
-
-
-def extract_timezone(date_str):
- m = re.search(
- r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
- date_str)
- if not m:
- timezone = datetime.timedelta()
- else:
- date_str = date_str[:-len(m.group('tz'))]
- if not m.group('sign'):
- timezone = datetime.timedelta()
- else:
- sign = 1 if m.group('sign') == '+' else -1
- timezone = datetime.timedelta(
- hours=sign * int(m.group('hours')),
- minutes=sign * int(m.group('minutes')))
- return timezone, date_str
-
-
-def parse_iso8601(date_str, delimiter='T', timezone=None):
- """ Return a UNIX timestamp from the given date """
-
- if date_str is None:
- return None
-
- date_str = re.sub(r'\.[0-9]+', '', date_str)
-
- if timezone is None:
- timezone, date_str = extract_timezone(date_str)
-
- try:
- date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
- dt = datetime.datetime.strptime(date_str, date_format) - timezone
- return calendar.timegm(dt.timetuple())
- except ValueError:
- pass
-
-
-def date_formats(day_first=True):
- return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
-
-
-def unified_strdate(date_str, day_first=True):
- """Return a string with the date in the format YYYYMMDD"""
-
- if date_str is None:
- return None
- upload_date = None
- # Replace commas
- date_str = date_str.replace(',', ' ')
- # Remove AM/PM + timezone
- date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
- _, date_str = extract_timezone(date_str)
-
- for expression in date_formats(day_first):
- try:
- upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
- except ValueError:
- pass
- if upload_date is None:
- timetuple = email.utils.parsedate_tz(date_str)
- if timetuple:
- try:
- upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
- except ValueError:
- pass
- if upload_date is not None:
- return compat_str(upload_date)
-
-
-def unified_timestamp(date_str, day_first=True):
- if date_str is None:
- return None
-
- date_str = re.sub(r'[,|]', '', date_str)
-
- pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
- timezone, date_str = extract_timezone(date_str)
-
- # Remove AM/PM + timezone
- date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
-
- # Remove unrecognized timezones from ISO 8601 alike timestamps
- m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
- if m:
- date_str = date_str[:-len(m.group('tz'))]
-
- # Python only supports microseconds, so remove nanoseconds
- m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
- if m:
- date_str = m.group(1)
-
- for expression in date_formats(day_first):
- try:
- dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
- return calendar.timegm(dt.timetuple())
- except ValueError:
- pass
- timetuple = email.utils.parsedate_tz(date_str)
- if timetuple:
- return calendar.timegm(timetuple) + pm_delta * 3600
-
-
-def determine_ext(url, default_ext='unknown_video'):
- if url is None or '.' not in url:
- return default_ext
- guess = url.partition('?')[0].rpartition('.')[2]
- if re.match(r'^[A-Za-z0-9]+$', guess):
- return guess
- # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
- elif guess.rstrip('/') in KNOWN_EXTENSIONS:
- return guess.rstrip('/')
- else:
- return default_ext
-
-
-def subtitles_filename(filename, sub_lang, sub_format):
- return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
-
-
-def date_from_str(date_str):
- """
- Return a datetime object from a string in the format YYYYMMDD or
- (now|today)[+-][0-9](day|week|month|year)(s)?"""
- today = datetime.date.today()
- if date_str in ('now', 'today'):
- return today
- if date_str == 'yesterday':
- return today - datetime.timedelta(days=1)
- match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
- if match is not None:
- sign = match.group('sign')
- time = int(match.group('time'))
- if sign == '-':
- time = -time
- unit = match.group('unit')
- # A bad approximation?
- if unit == 'month':
- unit = 'day'
- time *= 30
- elif unit == 'year':
- unit = 'day'
- time *= 365
- unit += 's'
- delta = datetime.timedelta(**{unit: time})
- return today + delta
- return datetime.datetime.strptime(date_str, '%Y%m%d').date()
-
-
-def hyphenate_date(date_str):
- """
- Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
- match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
- if match is not None:
- return '-'.join(match.groups())
- else:
- return date_str
-
-
-class DateRange(object):
- """Represents a time interval between two dates"""
-
- def __init__(self, start=None, end=None):
- """start and end must be strings in the format accepted by date"""
- if start is not None:
- self.start = date_from_str(start)
- else:
- self.start = datetime.datetime.min.date()
- if end is not None:
- self.end = date_from_str(end)
- else:
- self.end = datetime.datetime.max.date()
- if self.start > self.end:
- raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
-
- @classmethod
- def day(cls, day):
- """Returns a range that only contains the given day"""
- return cls(day, day)
-
- def __contains__(self, date):
- """Check if the date is in the range"""
- if not isinstance(date, datetime.date):
- date = date_from_str(date)
- return self.start <= date <= self.end
-
- def __str__(self):
- return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
-
-
-def platform_name():
- """ Returns the platform name as a compat_str """
- res = platform.platform()
- if isinstance(res, bytes):
- res = res.decode(preferredencoding())
-
- assert isinstance(res, compat_str)
- return res
-
-
-def _windows_write_string(s, out):
- """ Returns True if the string was written using special methods,
- False if it has yet to be written out."""
- # Adapted from http://stackoverflow.com/a/3259271/35070
-
- import ctypes
- import ctypes.wintypes
-
- WIN_OUTPUT_IDS = {
- 1: -11,
- 2: -12,
- }
-
- try:
- fileno = out.fileno()
- except AttributeError:
- # If the output stream doesn't have a fileno, it's virtual
- return False
- except io.UnsupportedOperation:
- # Some strange Windows pseudo files?
- return False
- if fileno not in WIN_OUTPUT_IDS:
- return False
-
- GetStdHandle = compat_ctypes_WINFUNCTYPE(
- ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
- ('GetStdHandle', ctypes.windll.kernel32))
- h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
-
- WriteConsoleW = compat_ctypes_WINFUNCTYPE(
- ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
- ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
- ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
- written = ctypes.wintypes.DWORD(0)
-
- GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
- FILE_TYPE_CHAR = 0x0002
- FILE_TYPE_REMOTE = 0x8000
- GetConsoleMode = compat_ctypes_WINFUNCTYPE(
- ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
- ctypes.POINTER(ctypes.wintypes.DWORD))(
- ('GetConsoleMode', ctypes.windll.kernel32))
- INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
-
- def not_a_console(handle):
- if handle == INVALID_HANDLE_VALUE or handle is None:
- return True
- return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
- GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
-
- if not_a_console(h):
- return False
-
- def next_nonbmp_pos(s):
- try:
- return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
- except StopIteration:
- return len(s)
-
- while s:
- count = min(next_nonbmp_pos(s), 1024)
-
- ret = WriteConsoleW(
- h, s, count if count else 2, ctypes.byref(written), None)
- if ret == 0:
- raise OSError('Failed to write string')
- if not count: # We just wrote a non-BMP character
- assert written.value == 2
- s = s[1:]
- else:
- assert written.value > 0
- s = s[written.value:]
- return True
-
-
-def write_string(s, out=None, encoding=None):
- if out is None:
- out = sys.stderr
- assert type(s) == compat_str
-
- if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
- if _windows_write_string(s, out):
- return
-
- if ('b' in getattr(out, 'mode', '') or
- sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
- byt = s.encode(encoding or preferredencoding(), 'ignore')
- out.write(byt)
- elif hasattr(out, 'buffer'):
- enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
- byt = s.encode(enc, 'ignore')
- out.buffer.write(byt)
- else:
- out.write(s)
- out.flush()
-
-
-def bytes_to_intlist(bs):
- if not bs:
- return []
- if isinstance(bs[0], int): # Python 3
- return list(bs)
- else:
- return [ord(c) for c in bs]
-
-
-def intlist_to_bytes(xs):
- if not xs:
- return b''
- return compat_struct_pack('%dB' % len(xs), *xs)
-
-
-# Cross-platform file locking
-if sys.platform == 'win32':
- import ctypes.wintypes
- import msvcrt
-
- class OVERLAPPED(ctypes.Structure):
- _fields_ = [
- ('Internal', ctypes.wintypes.LPVOID),
- ('InternalHigh', ctypes.wintypes.LPVOID),
- ('Offset', ctypes.wintypes.DWORD),
- ('OffsetHigh', ctypes.wintypes.DWORD),
- ('hEvent', ctypes.wintypes.HANDLE),
- ]
-
- kernel32 = ctypes.windll.kernel32
- LockFileEx = kernel32.LockFileEx
- LockFileEx.argtypes = [
- ctypes.wintypes.HANDLE, # hFile
- ctypes.wintypes.DWORD, # dwFlags
- ctypes.wintypes.DWORD, # dwReserved
- ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
- ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
- ctypes.POINTER(OVERLAPPED) # Overlapped
- ]
- LockFileEx.restype = ctypes.wintypes.BOOL
- UnlockFileEx = kernel32.UnlockFileEx
- UnlockFileEx.argtypes = [
- ctypes.wintypes.HANDLE, # hFile
- ctypes.wintypes.DWORD, # dwReserved
- ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
- ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
- ctypes.POINTER(OVERLAPPED) # Overlapped
- ]
- UnlockFileEx.restype = ctypes.wintypes.BOOL
- whole_low = 0xffffffff
- whole_high = 0x7fffffff
-
- def _lock_file(f, exclusive):
- overlapped = OVERLAPPED()
- overlapped.Offset = 0
- overlapped.OffsetHigh = 0
- overlapped.hEvent = 0
- f._lock_file_overlapped_p = ctypes.pointer(overlapped)
- handle = msvcrt.get_osfhandle(f.fileno())
- if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
- whole_low, whole_high, f._lock_file_overlapped_p):
- raise OSError('Locking file failed: %r' % ctypes.FormatError())
-
- def _unlock_file(f):
- assert f._lock_file_overlapped_p
- handle = msvcrt.get_osfhandle(f.fileno())
- if not UnlockFileEx(handle, 0,
- whole_low, whole_high, f._lock_file_overlapped_p):
- raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
-
-else:
- # Some platforms, such as Jython, is missing fcntl
- try:
- import fcntl
-
- def _lock_file(f, exclusive):
- fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
-
- def _unlock_file(f):
- fcntl.flock(f, fcntl.LOCK_UN)
- except ImportError:
- UNSUPPORTED_MSG = 'file locking is not supported on this platform'
-
- def _lock_file(f, exclusive):
- raise IOError(UNSUPPORTED_MSG)
-
- def _unlock_file(f):
- raise IOError(UNSUPPORTED_MSG)
-
-
-class locked_file(object):
- def __init__(self, filename, mode, encoding=None):
- assert mode in ['r', 'a', 'w']
- self.f = io.open(filename, mode, encoding=encoding)
- self.mode = mode
-
- def __enter__(self):
- exclusive = self.mode != 'r'
- try:
- _lock_file(self.f, exclusive)
- except IOError:
- self.f.close()
- raise
- return self
-
- def __exit__(self, etype, value, traceback):
- try:
- _unlock_file(self.f)
- finally:
- self.f.close()
-
- def __iter__(self):
- return iter(self.f)
-
- def write(self, *args):
- return self.f.write(*args)
-
- def read(self, *args):
- return self.f.read(*args)
-
-
-def get_filesystem_encoding():
- encoding = sys.getfilesystemencoding()
- return encoding if encoding is not None else 'utf-8'
-
-
-def shell_quote(args):
- quoted_args = []
- encoding = get_filesystem_encoding()
- for a in args:
- if isinstance(a, bytes):
- # We may get a filename encoded with 'encodeFilename'
- a = a.decode(encoding)
- quoted_args.append(compat_shlex_quote(a))
- return ' '.join(quoted_args)
-
-
-def smuggle_url(url, data):
- """ Pass additional data in a URL for internal use. """
-
- url, idata = unsmuggle_url(url, {})
- data.update(idata)
- sdata = compat_urllib_parse_urlencode(
- {'__youtubedl_smuggle': json.dumps(data)})
- return url + '#' + sdata
-
-
-def unsmuggle_url(smug_url, default=None):
- if '#__youtubedl_smuggle' not in smug_url:
- return smug_url, default
- url, _, sdata = smug_url.rpartition('#')
- jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
- data = json.loads(jsond)
- return url, data
-
-
-def format_bytes(bytes):
- if bytes is None:
- return 'N/A'
- if type(bytes) is str:
- bytes = float(bytes)
- if bytes == 0.0:
- exponent = 0
- else:
- exponent = int(math.log(bytes, 1024.0))
- suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
- converted = float(bytes) / float(1024 ** exponent)
- return '%.2f%s' % (converted, suffix)
-
-
-def lookup_unit_table(unit_table, s):
- units_re = '|'.join(re.escape(u) for u in unit_table)
- m = re.match(
- r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
- if not m:
- return None
- num_str = m.group('num').replace(',', '.')
- mult = unit_table[m.group('unit')]
- return int(float(num_str) * mult)
-
-
-def parse_filesize(s):
- if s is None:
- return None
-
- # The lower-case forms are of course incorrect and unofficial,
- # but we support those too
- _UNIT_TABLE = {
- 'B': 1,
- 'b': 1,
- 'bytes': 1,
- 'KiB': 1024,
- 'KB': 1000,
- 'kB': 1024,
- 'Kb': 1000,
- 'kb': 1000,
- 'kilobytes': 1000,
- 'kibibytes': 1024,
- 'MiB': 1024 ** 2,
- 'MB': 1000 ** 2,
- 'mB': 1024 ** 2,
- 'Mb': 1000 ** 2,
- 'mb': 1000 ** 2,
- 'megabytes': 1000 ** 2,
- 'mebibytes': 1024 ** 2,
- 'GiB': 1024 ** 3,
- 'GB': 1000 ** 3,
- 'gB': 1024 ** 3,
- 'Gb': 1000 ** 3,
- 'gb': 1000 ** 3,
- 'gigabytes': 1000 ** 3,
- 'gibibytes': 1024 ** 3,
- 'TiB': 1024 ** 4,
- 'TB': 1000 ** 4,
- 'tB': 1024 ** 4,
- 'Tb': 1000 ** 4,
- 'tb': 1000 ** 4,
- 'terabytes': 1000 ** 4,
- 'tebibytes': 1024 ** 4,
- 'PiB': 1024 ** 5,
- 'PB': 1000 ** 5,
- 'pB': 1024 ** 5,
- 'Pb': 1000 ** 5,
- 'pb': 1000 ** 5,
- 'petabytes': 1000 ** 5,
- 'pebibytes': 1024 ** 5,
- 'EiB': 1024 ** 6,
- 'EB': 1000 ** 6,
- 'eB': 1024 ** 6,
- 'Eb': 1000 ** 6,
- 'eb': 1000 ** 6,
- 'exabytes': 1000 ** 6,
- 'exbibytes': 1024 ** 6,
- 'ZiB': 1024 ** 7,
- 'ZB': 1000 ** 7,
- 'zB': 1024 ** 7,
- 'Zb': 1000 ** 7,
- 'zb': 1000 ** 7,
- 'zettabytes': 1000 ** 7,
- 'zebibytes': 1024 ** 7,
- 'YiB': 1024 ** 8,
- 'YB': 1000 ** 8,
- 'yB': 1024 ** 8,
- 'Yb': 1000 ** 8,
- 'yb': 1000 ** 8,
- 'yottabytes': 1000 ** 8,
- 'yobibytes': 1024 ** 8,
- }
-
- return lookup_unit_table(_UNIT_TABLE, s)
-
-
-def parse_count(s):
- if s is None:
- return None
-
- s = s.strip()
-
- if re.match(r'^[\d,.]+$', s):
- return str_to_int(s)
-
- _UNIT_TABLE = {
- 'k': 1000,
- 'K': 1000,
- 'm': 1000 ** 2,
- 'M': 1000 ** 2,
- 'kk': 1000 ** 2,
- 'KK': 1000 ** 2,
- }
-
- return lookup_unit_table(_UNIT_TABLE, s)
-
-
-def parse_resolution(s):
- if s is None:
- return {}
-
- mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
- if mobj:
- return {
- 'width': int(mobj.group('w')),
- 'height': int(mobj.group('h')),
- }
-
- mobj = re.search(r'\b(\d+)[pPiI]\b', s)
- if mobj:
- return {'height': int(mobj.group(1))}
-
- mobj = re.search(r'\b([48])[kK]\b', s)
- if mobj:
- return {'height': int(mobj.group(1)) * 540}
-
- return {}
-
-
-def month_by_name(name, lang='en'):
- """ Return the number of a month by (locale-independently) English name """
-
- month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
-
- try:
- return month_names.index(name) + 1
- except ValueError:
- return None
-
-
-def month_by_abbreviation(abbrev):
- """ Return the number of a month by (locale-independently) English
- abbreviations """
-
- try:
- return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
- except ValueError:
- return None
-
-
-def fix_xml_ampersands(xml_str):
- """Replace all the '&' by '&amp;' in XML"""
- return re.sub(
- r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
- '&amp;',
- xml_str)
-
-
-def setproctitle(title):
- assert isinstance(title, compat_str)
-
- # ctypes in Jython is not complete
- # http://bugs.jython.org/issue2148
- if sys.platform.startswith('java'):
- return
-
- try:
- libc = ctypes.cdll.LoadLibrary('libc.so.6')
- except OSError:
- return
- except TypeError:
- # LoadLibrary in Windows Python 2.7.13 only expects
- # a bytestring, but since unicode_literals turns
- # every string into a unicode string, it fails.
- return
- title_bytes = title.encode('utf-8')
- buf = ctypes.create_string_buffer(len(title_bytes))
- buf.value = title_bytes
- try:
- libc.prctl(15, buf, 0, 0, 0)
- except AttributeError:
- return # Strange libc, just skip this
-
-
-def remove_start(s, start):
- return s[len(start):] if s is not None and s.startswith(start) else s
-
-
-def remove_end(s, end):
- return s[:-len(end)] if s is not None and s.endswith(end) else s
-
-
-def remove_quotes(s):
- if s is None or len(s) < 2:
- return s
- for quote in ('"', "'", ):
- if s[0] == quote and s[-1] == quote:
- return s[1:-1]
- return s
-
-
-def url_basename(url):
- path = compat_urlparse.urlparse(url).path
- return path.strip('/').split('/')[-1]
-
-
-def base_url(url):
- return re.match(r'https?://[^?#&]+/', url).group()
-
-
-def urljoin(base, path):
- if isinstance(path, bytes):
- path = path.decode('utf-8')
- if not isinstance(path, compat_str) or not path:
- return None
- if re.match(r'^(?:https?:)?//', path):
- return path
- if isinstance(base, bytes):
- base = base.decode('utf-8')
- if not isinstance(base, compat_str) or not re.match(
- r'^(?:https?:)?//', base):
- return None
- return compat_urlparse.urljoin(base, path)
-
-
-class HEADRequest(compat_urllib_request.Request):
- def get_method(self):
- return 'HEAD'
-
-
-class PUTRequest(compat_urllib_request.Request):
- def get_method(self):
- return 'PUT'
-
-
-def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
- if get_attr:
- if v is not None:
- v = getattr(v, get_attr, None)
- if v == '':
- v = None
- if v is None:
- return default
- try:
- return int(v) * invscale // scale
- except ValueError:
- return default
-
-
-def str_or_none(v, default=None):
- return default if v is None else compat_str(v)
-
-
-def str_to_int(int_str):
- """ A more relaxed version of int_or_none """
- if int_str is None:
- return None
- int_str = re.sub(r'[,\.\+]', '', int_str)
- return int(int_str)
-
-
-def float_or_none(v, scale=1, invscale=1, default=None):
- if v is None:
- return default
- try:
- return float(v) * invscale / scale
- except ValueError:
- return default
-
-
-def bool_or_none(v, default=None):
- return v if isinstance(v, bool) else default
-
-
-def strip_or_none(v):
- return None if v is None else v.strip()
-
-
-def url_or_none(url):
- if not url or not isinstance(url, compat_str):
- return None
- url = url.strip()
- return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
-
-
-def parse_duration(s):
- if not isinstance(s, compat_basestring):
- return None
-
- s = s.strip()
-
- days, hours, mins, secs, ms = [None] * 5
- m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
- if m:
- days, hours, mins, secs, ms = m.groups()
- else:
- m = re.match(
- r'''(?ix)(?:P?
- (?:
- [0-9]+\s*y(?:ears?)?\s*
- )?
- (?:
- [0-9]+\s*m(?:onths?)?\s*
- )?
- (?:
- [0-9]+\s*w(?:eeks?)?\s*
- )?
- (?:
- (?P<days>[0-9]+)\s*d(?:ays?)?\s*
- )?
- T)?
- (?:
- (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
- )?
- (?:
- (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
- )?
- (?:
- (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
- )?Z?$''', s)
- if m:
- days, hours, mins, secs, ms = m.groups()
- else:
- m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
- if m:
- hours, mins = m.groups()
- else:
- return None
-
- duration = 0
- if secs:
- duration += float(secs)
- if mins:
- duration += float(mins) * 60
- if hours:
- duration += float(hours) * 60 * 60
- if days:
- duration += float(days) * 24 * 60 * 60
- if ms:
- duration += float(ms)
- return duration
-
-
-def prepend_extension(filename, ext, expected_real_ext=None):
- name, real_ext = os.path.splitext(filename)
- return (
- '{0}.{1}{2}'.format(name, ext, real_ext)
- if not expected_real_ext or real_ext[1:] == expected_real_ext
- else '{0}.{1}'.format(filename, ext))
-
-
-def replace_extension(filename, ext, expected_real_ext=None):
- name, real_ext = os.path.splitext(filename)
- return '{0}.{1}'.format(
- name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
- ext)
-
-
-def check_executable(exe, args=[]):
- """ Checks if the given binary is installed somewhere in PATH, and returns its name.
- args can be a list of arguments for a short output (like -version) """
- try:
- subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
- except OSError:
- return False
- return exe
-
-
-def get_exe_version(exe, args=['--version'],
- version_re=None, unrecognized='present'):
- """ Returns the version of the specified executable,
- or False if the executable is not present """
- try:
- # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
- # SIGTTOU if youtube-dl is run in the background.
- # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656
- out, _ = subprocess.Popen(
- [encodeArgument(exe)] + args,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
- except OSError:
- return False
- if isinstance(out, bytes): # Python 2.x
- out = out.decode('ascii', 'ignore')
- return detect_exe_version(out, version_re, unrecognized)
-
-
-def detect_exe_version(output, version_re=None, unrecognized='present'):
- assert isinstance(output, compat_str)
- if version_re is None:
- version_re = r'version\s+([-0-9._a-zA-Z]+)'
- m = re.search(version_re, output)
- if m:
- return m.group(1)
- else:
- return unrecognized
-
-
-class PagedList(object):
- def __len__(self):
- # This is only useful for tests
- return len(self.getslice())
-
-
-class OnDemandPagedList(PagedList):
- def __init__(self, pagefunc, pagesize, use_cache=True):
- self._pagefunc = pagefunc
- self._pagesize = pagesize
- self._use_cache = use_cache
- if use_cache:
- self._cache = {}
-
- def getslice(self, start=0, end=None):
- res = []
- for pagenum in itertools.count(start // self._pagesize):
- firstid = pagenum * self._pagesize
- nextfirstid = pagenum * self._pagesize + self._pagesize
- if start >= nextfirstid:
- continue
-
- page_results = None
- if self._use_cache:
- page_results = self._cache.get(pagenum)
- if page_results is None:
- page_results = list(self._pagefunc(pagenum))
- if self._use_cache:
- self._cache[pagenum] = page_results
-
- startv = (
- start % self._pagesize
- if firstid <= start < nextfirstid
- else 0)
-
- endv = (
- ((end - 1) % self._pagesize) + 1
- if (end is not None and firstid <= end <= nextfirstid)
- else None)
-
- if startv != 0 or endv is not None:
- page_results = page_results[startv:endv]
- res.extend(page_results)
-
- # A little optimization - if current page is not "full", ie. does
- # not contain page_size videos then we can assume that this page
- # is the last one - there are no more ids on further pages -
- # i.e. no need to query again.
- if len(page_results) + startv < self._pagesize:
- break
-
- # If we got the whole page, but the next page is not interesting,
- # break out early as well
- if end == nextfirstid:
- break
- return res
-
-
-class InAdvancePagedList(PagedList):
- def __init__(self, pagefunc, pagecount, pagesize):
- self._pagefunc = pagefunc
- self._pagecount = pagecount
- self._pagesize = pagesize
-
- def getslice(self, start=0, end=None):
- res = []
- start_page = start // self._pagesize
- end_page = (
- self._pagecount if end is None else (end // self._pagesize + 1))
- skip_elems = start - start_page * self._pagesize
- only_more = None if end is None else end - start
- for pagenum in range(start_page, end_page):
- page = list(self._pagefunc(pagenum))
- if skip_elems:
- page = page[skip_elems:]
- skip_elems = None
- if only_more is not None:
- if len(page) < only_more:
- only_more -= len(page)
- else:
- page = page[:only_more]
- res.extend(page)
- break
- res.extend(page)
- return res
-
-
-def uppercase_escape(s):
- unicode_escape = codecs.getdecoder('unicode_escape')
- return re.sub(
- r'\\U[0-9a-fA-F]{8}',
- lambda m: unicode_escape(m.group(0))[0],
- s)
-
-
-def lowercase_escape(s):
- unicode_escape = codecs.getdecoder('unicode_escape')
- return re.sub(
- r'\\u[0-9a-fA-F]{4}',
- lambda m: unicode_escape(m.group(0))[0],
- s)
-
-
-def escape_rfc3986(s):
- """Escape non-ASCII characters as suggested by RFC 3986"""
- if sys.version_info < (3, 0) and isinstance(s, compat_str):
- s = s.encode('utf-8')
- return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
-
-
-def escape_url(url):
- """Escape URL as suggested by RFC 3986"""
- url_parsed = compat_urllib_parse_urlparse(url)
- return url_parsed._replace(
- netloc=url_parsed.netloc.encode('idna').decode('ascii'),
- path=escape_rfc3986(url_parsed.path),
- params=escape_rfc3986(url_parsed.params),
- query=escape_rfc3986(url_parsed.query),
- fragment=escape_rfc3986(url_parsed.fragment)
- ).geturl()
-
-
-def read_batch_urls(batch_fd):
- def fixup(url):
- if not isinstance(url, compat_str):
- url = url.decode('utf-8', 'replace')
- BOM_UTF8 = '\xef\xbb\xbf'
- if url.startswith(BOM_UTF8):
- url = url[len(BOM_UTF8):]
- url = url.strip()
- if url.startswith(('#', ';', ']')):
- return False
- return url
-
- with contextlib.closing(batch_fd) as fd:
- return [url for url in map(fixup, fd) if url]
-
-
-def urlencode_postdata(*args, **kargs):
- return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
-
-
-def update_url_query(url, query):
- if not query:
- return url
- parsed_url = compat_urlparse.urlparse(url)
- qs = compat_parse_qs(parsed_url.query)
- qs.update(query)
- return compat_urlparse.urlunparse(parsed_url._replace(
- query=compat_urllib_parse_urlencode(qs, True)))
-
-
-def update_Request(req, url=None, data=None, headers={}, query={}):
- req_headers = req.headers.copy()
- req_headers.update(headers)
- req_data = data or req.data
- req_url = update_url_query(url or req.get_full_url(), query)
- req_get_method = req.get_method()
- if req_get_method == 'HEAD':
- req_type = HEADRequest
- elif req_get_method == 'PUT':
- req_type = PUTRequest
- else:
- req_type = compat_urllib_request.Request
- new_req = req_type(
- req_url, data=req_data, headers=req_headers,
- origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
- if hasattr(req, 'timeout'):
- new_req.timeout = req.timeout
- return new_req
-
-
-def _multipart_encode_impl(data, boundary):
- content_type = 'multipart/form-data; boundary=%s' % boundary
-
- out = b''
- for k, v in data.items():
- out += b'--' + boundary.encode('ascii') + b'\r\n'
- if isinstance(k, compat_str):
- k = k.encode('utf-8')
- if isinstance(v, compat_str):
- v = v.encode('utf-8')
- # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
- # suggests sending UTF-8 directly. Firefox sends UTF-8, too
- content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
- if boundary.encode('ascii') in content:
- raise ValueError('Boundary overlaps with data')
- out += content
-
- out += b'--' + boundary.encode('ascii') + b'--\r\n'
-
- return out, content_type
-
-
-def multipart_encode(data, boundary=None):
- '''
- Encode a dict to RFC 7578-compliant form-data
-
- data:
- A dict where keys and values can be either Unicode or bytes-like
- objects.
- boundary:
- If specified a Unicode object, it's used as the boundary. Otherwise
- a random boundary is generated.
-
- Reference: https://tools.ietf.org/html/rfc7578
- '''
- has_specified_boundary = boundary is not None
-
- while True:
- if boundary is None:
- boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
-
- try:
- out, content_type = _multipart_encode_impl(data, boundary)
- break
- except ValueError:
- if has_specified_boundary:
- raise
- boundary = None
-
- return out, content_type
-
-
-def dict_get(d, key_or_keys, default=None, skip_false_values=True):
- if isinstance(key_or_keys, (list, tuple)):
- for key in key_or_keys:
- if key not in d or d[key] is None or skip_false_values and not d[key]:
- continue
- return d[key]
- return default
- return d.get(key_or_keys, default)
-
-
-def try_get(src, getter, expected_type=None):
- if not isinstance(getter, (list, tuple)):
- getter = [getter]
- for get in getter:
- try:
- v = get(src)
- except (AttributeError, KeyError, TypeError, IndexError):
- pass
- else:
- if expected_type is None or isinstance(v, expected_type):
- return v
-
-
-def merge_dicts(*dicts):
- merged = {}
- for a_dict in dicts:
- for k, v in a_dict.items():
- if v is None:
- continue
- if (k not in merged or
- (isinstance(v, compat_str) and v and
- isinstance(merged[k], compat_str) and
- not merged[k])):
- merged[k] = v
- return merged
-
-
-def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
- return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
-
-
-US_RATINGS = {
- 'G': 0,
- 'PG': 10,
- 'PG-13': 13,
- 'R': 16,
- 'NC': 18,
-}
-
-
-TV_PARENTAL_GUIDELINES = {
- 'TV-Y': 0,
- 'TV-Y7': 7,
- 'TV-G': 0,
- 'TV-PG': 0,
- 'TV-14': 14,
- 'TV-MA': 17,
-}
-
-
-def parse_age_limit(s):
- if type(s) == int:
- return s if 0 <= s <= 21 else None
- if not isinstance(s, compat_basestring):
- return None
- m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
- if m:
- return int(m.group('age'))
- if s in US_RATINGS:
- return US_RATINGS[s]
- m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
- if m:
- return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
- return None
-
-
-def strip_jsonp(code):
- return re.sub(
- r'''(?sx)^
- (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
- (?:\s*&&\s*(?P=func_name))?
- \s*\(\s*(?P<callback_data>.*)\);?
- \s*?(?://[^\n]*)*$''',
- r'\g<callback_data>', code)
-
-
-def js_to_json(code):
- COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
- SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
- INTEGER_TABLE = (
- (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
- (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
- )
-
- def fix_kv(m):
- v = m.group(0)
- if v in ('true', 'false', 'null'):
- return v
- elif v.startswith('/*') or v.startswith('//') or v == ',':
- return ""
-
- if v[0] in ("'", '"'):
- v = re.sub(r'(?s)\\.|"', lambda m: {
- '"': '\\"',
- "\\'": "'",
- '\\\n': '',
- '\\x': '\\u00',
- }.get(m.group(0), m.group(0)), v[1:-1])
-
- for regex, base in INTEGER_TABLE:
- im = re.match(regex, v)
- if im:
- i = int(im.group(1), base)
- return '"%d":' % i if v.endswith(':') else '%d' % i
-
- return '"%s"' % v
-
- return re.sub(r'''(?sx)
- "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
- '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
- {comment}|,(?={skip}[\]}}])|
- (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
- \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
- [0-9]+(?={skip}:)
- '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
-
-
-def qualities(quality_ids):
- """ Get a numeric quality value out of a list of possible values """
- def q(qid):
- try:
- return quality_ids.index(qid)
- except ValueError:
- return -1
- return q
-
-
-DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
-
-
-def limit_length(s, length):
- """ Add ellipses to overly long strings """
- if s is None:
- return None
- ELLIPSES = '...'
- if len(s) > length:
- return s[:length - len(ELLIPSES)] + ELLIPSES
- return s
-
-
-def version_tuple(v):
- return tuple(int(e) for e in re.split(r'[-.]', v))
-
-
-def is_outdated_version(version, limit, assume_new=True):
- if not version:
- return not assume_new
- try:
- return version_tuple(version) < version_tuple(limit)
- except ValueError:
- return not assume_new
-
-
-def ytdl_is_updateable():
- """ Returns if youtube-dl can be updated with -U """
- from zipimport import zipimporter
-
- return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
-
-
-def args_to_str(args):
- # Get a short string representation for a subprocess command
- return ' '.join(compat_shlex_quote(a) for a in args)
-
-
-def error_to_compat_str(err):
- err_str = str(err)
- # On python 2 error byte string must be decoded with proper
- # encoding rather than ascii
- if sys.version_info[0] < 3:
- err_str = err_str.decode(preferredencoding())
- return err_str
-
-
-def mimetype2ext(mt):
- if mt is None:
- return None
-
- ext = {
- 'audio/mp4': 'm4a',
- # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
- # it's the most popular one
- 'audio/mpeg': 'mp3',
- }.get(mt)
- if ext is not None:
- return ext
-
- _, _, res = mt.rpartition('/')
- res = res.split(';')[0].strip().lower()
-
- return {
- '3gpp': '3gp',
- 'smptett+xml': 'tt',
- 'ttaf+xml': 'dfxp',
- 'ttml+xml': 'ttml',
- 'x-flv': 'flv',
- 'x-mp4-fragmented': 'mp4',
- 'x-ms-sami': 'sami',
- 'x-ms-wmv': 'wmv',
- 'mpegurl': 'm3u8',
- 'x-mpegurl': 'm3u8',
- 'vnd.apple.mpegurl': 'm3u8',
- 'dash+xml': 'mpd',
- 'f4m+xml': 'f4m',
- 'hds+xml': 'f4m',
- 'vnd.ms-sstr+xml': 'ism',
- 'quicktime': 'mov',
- 'mp2t': 'ts',
- }.get(res, res)
-
-
-def parse_codecs(codecs_str):
- # http://tools.ietf.org/html/rfc6381
- if not codecs_str:
- return {}
- splited_codecs = list(filter(None, map(
- lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
- vcodec, acodec = None, None
- for full_codec in splited_codecs:
- codec = full_codec.split('.')[0]
- if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'):
- if not vcodec:
- vcodec = full_codec
- elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
- if not acodec:
- acodec = full_codec
- else:
- write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
- if not vcodec and not acodec:
- if len(splited_codecs) == 2:
- return {
- 'vcodec': vcodec,
- 'acodec': acodec,
- }
- elif len(splited_codecs) == 1:
- return {
- 'vcodec': 'none',
- 'acodec': vcodec,
- }
- else:
- return {
- 'vcodec': vcodec or 'none',
- 'acodec': acodec or 'none',
- }
- return {}
-
-
-def urlhandle_detect_ext(url_handle):
- getheader = url_handle.headers.get
-
- cd = getheader('Content-Disposition')
- if cd:
- m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
- if m:
- e = determine_ext(m.group('filename'), default_ext=None)
- if e:
- return e
-
- return mimetype2ext(getheader('Content-Type'))
-
-
-def encode_data_uri(data, mime_type):
- return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
-
-
-def age_restricted(content_limit, age_limit):
- """ Returns True iff the content should be blocked """
-
- if age_limit is None: # No limit set
- return False
- if content_limit is None:
- return False # Content available for everyone
- return age_limit < content_limit
-
-
-def is_html(first_bytes):
- """ Detect whether a file contains HTML by examining its first bytes. """
-
- BOMS = [
- (b'\xef\xbb\xbf', 'utf-8'),
- (b'\x00\x00\xfe\xff', 'utf-32-be'),
- (b'\xff\xfe\x00\x00', 'utf-32-le'),
- (b'\xff\xfe', 'utf-16-le'),
- (b'\xfe\xff', 'utf-16-be'),
- ]
- for bom, enc in BOMS:
- if first_bytes.startswith(bom):
- s = first_bytes[len(bom):].decode(enc, 'replace')
- break
- else:
- s = first_bytes.decode('utf-8', 'replace')
-
- return re.match(r'^\s*<', s)
-
-
-def determine_protocol(info_dict):
- protocol = info_dict.get('protocol')
- if protocol is not None:
- return protocol
-
- url = info_dict['url']
- if url.startswith('rtmp'):
- return 'rtmp'
- elif url.startswith('mms'):
- return 'mms'
- elif url.startswith('rtsp'):
- return 'rtsp'
-
- ext = determine_ext(url)
- if ext == 'm3u8':
- return 'm3u8'
- elif ext == 'f4m':
- return 'f4m'
-
- return compat_urllib_parse_urlparse(url).scheme
-
-
-def render_table(header_row, data):
- """ Render a list of rows, each as a list of values """
- table = [header_row] + data
- max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
- format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
- return '\n'.join(format_str % tuple(row) for row in table)
-
-
-def _match_one(filter_part, dct):
- COMPARISON_OPERATORS = {
- '<': operator.lt,
- '<=': operator.le,
- '>': operator.gt,
- '>=': operator.ge,
- '=': operator.eq,
- '!=': operator.ne,
- }
- operator_rex = re.compile(r'''(?x)\s*
- (?P<key>[a-z_]+)
- \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
- (?:
- (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
- (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
- (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
- )
- \s*$
- ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
- m = operator_rex.search(filter_part)
- if m:
- op = COMPARISON_OPERATORS[m.group('op')]
- actual_value = dct.get(m.group('key'))
- if (m.group('quotedstrval') is not None or
- m.group('strval') is not None or
- # If the original field is a string and matching comparisonvalue is
- # a number we should respect the origin of the original field
- # and process comparison value as a string (see
- # https://github.com/rg3/youtube-dl/issues/11082).
- actual_value is not None and m.group('intval') is not None and
- isinstance(actual_value, compat_str)):
- if m.group('op') not in ('=', '!='):
- raise ValueError(
- 'Operator %s does not support string values!' % m.group('op'))
- comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
- quote = m.group('quote')
- if quote is not None:
- comparison_value = comparison_value.replace(r'\%s' % quote, quote)
- else:
- try:
- comparison_value = int(m.group('intval'))
- except ValueError:
- comparison_value = parse_filesize(m.group('intval'))
- if comparison_value is None:
- comparison_value = parse_filesize(m.group('intval') + 'B')
- if comparison_value is None:
- raise ValueError(
- 'Invalid integer value %r in filter part %r' % (
- m.group('intval'), filter_part))
- if actual_value is None:
- return m.group('none_inclusive')
- return op(actual_value, comparison_value)
-
- UNARY_OPERATORS = {
- '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
- '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
- }
- operator_rex = re.compile(r'''(?x)\s*
- (?P<op>%s)\s*(?P<key>[a-z_]+)
- \s*$
- ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
- m = operator_rex.search(filter_part)
- if m:
- op = UNARY_OPERATORS[m.group('op')]
- actual_value = dct.get(m.group('key'))
- return op(actual_value)
-
- raise ValueError('Invalid filter part %r' % filter_part)
-
-
-def match_str(filter_str, dct):
- """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
-
- return all(
- _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
-
-
-def match_filter_func(filter_str):
- def _match_func(info_dict):
- if match_str(filter_str, info_dict):
- return None
- else:
- video_title = info_dict.get('title', info_dict.get('id', 'video'))
- return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
- return _match_func
-
-
-def parse_dfxp_time_expr(time_expr):
- if not time_expr:
- return
-
- mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
- if mobj:
- return float(mobj.group('time_offset'))
-
- mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
- if mobj:
- return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
-
-
-def srt_subtitles_timecode(seconds):
- return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
-
-
-def dfxp2srt(dfxp_data):
- '''
- @param dfxp_data A bytes-like object containing DFXP data
- @returns A unicode object containing converted SRT data
- '''
- LEGACY_NAMESPACES = (
- (b'http://www.w3.org/ns/ttml', [
- b'http://www.w3.org/2004/11/ttaf1',
- b'http://www.w3.org/2006/04/ttaf1',
- b'http://www.w3.org/2006/10/ttaf1',
- ]),
- (b'http://www.w3.org/ns/ttml#styling', [
- b'http://www.w3.org/ns/ttml#style',
- ]),
- )
-
- SUPPORTED_STYLING = [
- 'color',
- 'fontFamily',
- 'fontSize',
- 'fontStyle',
- 'fontWeight',
- 'textDecoration'
- ]
-
- _x = functools.partial(xpath_with_ns, ns_map={
- 'xml': 'http://www.w3.org/XML/1998/namespace',
- 'ttml': 'http://www.w3.org/ns/ttml',
- 'tts': 'http://www.w3.org/ns/ttml#styling',
- })
-
- styles = {}
- default_style = {}
-
- class TTMLPElementParser(object):
- _out = ''
- _unclosed_elements = []
- _applied_styles = []
-
- def start(self, tag, attrib):
- if tag in (_x('ttml:br'), 'br'):
- self._out += '\n'
- else:
- unclosed_elements = []
- style = {}
- element_style_id = attrib.get('style')
- if default_style:
- style.update(default_style)
- if element_style_id:
- style.update(styles.get(element_style_id, {}))
- for prop in SUPPORTED_STYLING:
- prop_val = attrib.get(_x('tts:' + prop))
- if prop_val:
- style[prop] = prop_val
- if style:
- font = ''
- for k, v in sorted(style.items()):
- if self._applied_styles and self._applied_styles[-1].get(k) == v:
- continue
- if k == 'color':
- font += ' color="%s"' % v
- elif k == 'fontSize':
- font += ' size="%s"' % v
- elif k == 'fontFamily':
- font += ' face="%s"' % v
- elif k == 'fontWeight' and v == 'bold':
- self._out += '<b>'
- unclosed_elements.append('b')
- elif k == 'fontStyle' and v == 'italic':
- self._out += '<i>'
- unclosed_elements.append('i')
- elif k == 'textDecoration' and v == 'underline':
- self._out += '<u>'
- unclosed_elements.append('u')
- if font:
- self._out += '<font' + font + '>'
- unclosed_elements.append('font')
- applied_style = {}
- if self._applied_styles:
- applied_style.update(self._applied_styles[-1])
- applied_style.update(style)
- self._applied_styles.append(applied_style)
- self._unclosed_elements.append(unclosed_elements)
-
- def end(self, tag):
- if tag not in (_x('ttml:br'), 'br'):
- unclosed_elements = self._unclosed_elements.pop()
- for element in reversed(unclosed_elements):
- self._out += '</%s>' % element
- if unclosed_elements and self._applied_styles:
- self._applied_styles.pop()
-
- def data(self, data):
- self._out += data
-
- def close(self):
- return self._out.strip()
-
- def parse_node(node):
- target = TTMLPElementParser()
- parser = xml.etree.ElementTree.XMLParser(target=target)
- parser.feed(xml.etree.ElementTree.tostring(node))
- return parser.close()
-
- for k, v in LEGACY_NAMESPACES:
- for ns in v:
- dfxp_data = dfxp_data.replace(ns, k)
-
- dfxp = compat_etree_fromstring(dfxp_data)
- out = []
- paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
-
- if not paras:
- raise ValueError('Invalid dfxp/TTML subtitle')
-
- repeat = False
- while True:
- for style in dfxp.findall(_x('.//ttml:style')):
- style_id = style.get('id') or style.get(_x('xml:id'))
- if not style_id:
- continue
- parent_style_id = style.get('style')
- if parent_style_id:
- if parent_style_id not in styles:
- repeat = True
- continue
- styles[style_id] = styles[parent_style_id].copy()
- for prop in SUPPORTED_STYLING:
- prop_val = style.get(_x('tts:' + prop))
- if prop_val:
- styles.setdefault(style_id, {})[prop] = prop_val
- if repeat:
- repeat = False
- else:
- break
-
- for p in ('body', 'div'):
- ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
- if ele is None:
- continue
- style = styles.get(ele.get('style'))
- if not style:
- continue
- default_style.update(style)
-
- for para, index in zip(paras, itertools.count(1)):
- begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
- end_time = parse_dfxp_time_expr(para.attrib.get('end'))
- dur = parse_dfxp_time_expr(para.attrib.get('dur'))
- if begin_time is None:
- continue
- if not end_time:
- if not dur:
- continue
- end_time = begin_time + dur
- out.append('%d\n%s --> %s\n%s\n\n' % (
- index,
- srt_subtitles_timecode(begin_time),
- srt_subtitles_timecode(end_time),
- parse_node(para)))
-
- return ''.join(out)
-
-
-def cli_option(params, command_option, param):
- param = params.get(param)
- if param:
- param = compat_str(param)
- return [command_option, param] if param is not None else []
-
-
-def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
- param = params.get(param)
- if param is None:
- return []
- assert isinstance(param, bool)
- if separator:
- return [command_option + separator + (true_value if param else false_value)]
- return [command_option, true_value if param else false_value]
-
-
-def cli_valueless_option(params, command_option, param, expected_value=True):
- param = params.get(param)
- return [command_option] if param == expected_value else []
-
-
-def cli_configuration_args(params, param, default=[]):
- ex_args = params.get(param)
- if ex_args is None:
- return default
- assert isinstance(ex_args, list)
- return ex_args
-
-
-class ISO639Utils(object):
- # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
- _lang_map = {
- 'aa': 'aar',
- 'ab': 'abk',
- 'ae': 'ave',
- 'af': 'afr',
- 'ak': 'aka',
- 'am': 'amh',
- 'an': 'arg',
- 'ar': 'ara',
- 'as': 'asm',
- 'av': 'ava',
- 'ay': 'aym',
- 'az': 'aze',
- 'ba': 'bak',
- 'be': 'bel',
- 'bg': 'bul',
- 'bh': 'bih',
- 'bi': 'bis',
- 'bm': 'bam',
- 'bn': 'ben',
- 'bo': 'bod',
- 'br': 'bre',
- 'bs': 'bos',
- 'ca': 'cat',
- 'ce': 'che',
- 'ch': 'cha',
- 'co': 'cos',
- 'cr': 'cre',
- 'cs': 'ces',
- 'cu': 'chu',
- 'cv': 'chv',
- 'cy': 'cym',
- 'da': 'dan',
- 'de': 'deu',
- 'dv': 'div',
- 'dz': 'dzo',
- 'ee': 'ewe',
- 'el': 'ell',
- 'en': 'eng',
- 'eo': 'epo',
- 'es': 'spa',
- 'et': 'est',
- 'eu': 'eus',
- 'fa': 'fas',
- 'ff': 'ful',
- 'fi': 'fin',
- 'fj': 'fij',
- 'fo': 'fao',
- 'fr': 'fra',
- 'fy': 'fry',
- 'ga': 'gle',
- 'gd': 'gla',
- 'gl': 'glg',
- 'gn': 'grn',
- 'gu': 'guj',
- 'gv': 'glv',
- 'ha': 'hau',
- 'he': 'heb',
- 'iw': 'heb', # Replaced by he in 1989 revision
- 'hi': 'hin',
- 'ho': 'hmo',
- 'hr': 'hrv',
- 'ht': 'hat',
- 'hu': 'hun',
- 'hy': 'hye',
- 'hz': 'her',
- 'ia': 'ina',
- 'id': 'ind',
- 'in': 'ind', # Replaced by id in 1989 revision
- 'ie': 'ile',
- 'ig': 'ibo',
- 'ii': 'iii',
- 'ik': 'ipk',
- 'io': 'ido',
- 'is': 'isl',
- 'it': 'ita',
- 'iu': 'iku',
- 'ja': 'jpn',
- 'jv': 'jav',
- 'ka': 'kat',
- 'kg': 'kon',
- 'ki': 'kik',
- 'kj': 'kua',
- 'kk': 'kaz',
- 'kl': 'kal',
- 'km': 'khm',
- 'kn': 'kan',
- 'ko': 'kor',
- 'kr': 'kau',
- 'ks': 'kas',
- 'ku': 'kur',
- 'kv': 'kom',
- 'kw': 'cor',
- 'ky': 'kir',
- 'la': 'lat',
- 'lb': 'ltz',
- 'lg': 'lug',
- 'li': 'lim',
- 'ln': 'lin',
- 'lo': 'lao',
- 'lt': 'lit',
- 'lu': 'lub',
- 'lv': 'lav',
- 'mg': 'mlg',
- 'mh': 'mah',
- 'mi': 'mri',
- 'mk': 'mkd',
- 'ml': 'mal',
- 'mn': 'mon',
- 'mr': 'mar',
- 'ms': 'msa',
- 'mt': 'mlt',
- 'my': 'mya',
- 'na': 'nau',
- 'nb': 'nob',
- 'nd': 'nde',
- 'ne': 'nep',
- 'ng': 'ndo',
- 'nl': 'nld',
- 'nn': 'nno',
- 'no': 'nor',
- 'nr': 'nbl',
- 'nv': 'nav',
- 'ny': 'nya',
- 'oc': 'oci',
- 'oj': 'oji',
- 'om': 'orm',
- 'or': 'ori',
- 'os': 'oss',
- 'pa': 'pan',
- 'pi': 'pli',
- 'pl': 'pol',
- 'ps': 'pus',
- 'pt': 'por',
- 'qu': 'que',
- 'rm': 'roh',
- 'rn': 'run',
- 'ro': 'ron',
- 'ru': 'rus',
- 'rw': 'kin',
- 'sa': 'san',
- 'sc': 'srd',
- 'sd': 'snd',
- 'se': 'sme',
- 'sg': 'sag',
- 'si': 'sin',
- 'sk': 'slk',
- 'sl': 'slv',
- 'sm': 'smo',
- 'sn': 'sna',
- 'so': 'som',
- 'sq': 'sqi',
- 'sr': 'srp',
- 'ss': 'ssw',
- 'st': 'sot',
- 'su': 'sun',
- 'sv': 'swe',
- 'sw': 'swa',
- 'ta': 'tam',
- 'te': 'tel',
- 'tg': 'tgk',
- 'th': 'tha',
- 'ti': 'tir',
- 'tk': 'tuk',
- 'tl': 'tgl',
- 'tn': 'tsn',
- 'to': 'ton',
- 'tr': 'tur',
- 'ts': 'tso',
- 'tt': 'tat',
- 'tw': 'twi',
- 'ty': 'tah',
- 'ug': 'uig',
- 'uk': 'ukr',
- 'ur': 'urd',
- 'uz': 'uzb',
- 've': 'ven',
- 'vi': 'vie',
- 'vo': 'vol',
- 'wa': 'wln',
- 'wo': 'wol',
- 'xh': 'xho',
- 'yi': 'yid',
- 'ji': 'yid', # Replaced by yi in 1989 revision
- 'yo': 'yor',
- 'za': 'zha',
- 'zh': 'zho',
- 'zu': 'zul',
- }
-
- @classmethod
- def short2long(cls, code):
- """Convert language code from ISO 639-1 to ISO 639-2/T"""
- return cls._lang_map.get(code[:2])
-
- @classmethod
- def long2short(cls, code):
- """Convert language code from ISO 639-2/T to ISO 639-1"""
- for short_name, long_name in cls._lang_map.items():
- if long_name == code:
- return short_name
-
-
-class ISO3166Utils(object):
- # From http://data.okfn.org/data/core/country-list
- _country_map = {
- 'AF': 'Afghanistan',
- 'AX': 'Åland Islands',
- 'AL': 'Albania',
- 'DZ': 'Algeria',
- 'AS': 'American Samoa',
- 'AD': 'Andorra',
- 'AO': 'Angola',
- 'AI': 'Anguilla',
- 'AQ': 'Antarctica',
- 'AG': 'Antigua and Barbuda',
- 'AR': 'Argentina',
- 'AM': 'Armenia',
- 'AW': 'Aruba',
- 'AU': 'Australia',
- 'AT': 'Austria',
- 'AZ': 'Azerbaijan',
- 'BS': 'Bahamas',
- 'BH': 'Bahrain',
- 'BD': 'Bangladesh',
- 'BB': 'Barbados',
- 'BY': 'Belarus',
- 'BE': 'Belgium',
- 'BZ': 'Belize',
- 'BJ': 'Benin',
- 'BM': 'Bermuda',
- 'BT': 'Bhutan',
- 'BO': 'Bolivia, Plurinational State of',
- 'BQ': 'Bonaire, Sint Eustatius and Saba',
- 'BA': 'Bosnia and Herzegovina',
- 'BW': 'Botswana',
- 'BV': 'Bouvet Island',
- 'BR': 'Brazil',
- 'IO': 'British Indian Ocean Territory',
- 'BN': 'Brunei Darussalam',
- 'BG': 'Bulgaria',
- 'BF': 'Burkina Faso',
- 'BI': 'Burundi',
- 'KH': 'Cambodia',
- 'CM': 'Cameroon',
- 'CA': 'Canada',
- 'CV': 'Cape Verde',
- 'KY': 'Cayman Islands',
- 'CF': 'Central African Republic',
- 'TD': 'Chad',
- 'CL': 'Chile',
- 'CN': 'China',
- 'CX': 'Christmas Island',
- 'CC': 'Cocos (Keeling) Islands',
- 'CO': 'Colombia',
- 'KM': 'Comoros',
- 'CG': 'Congo',
- 'CD': 'Congo, the Democratic Republic of the',
- 'CK': 'Cook Islands',
- 'CR': 'Costa Rica',
- 'CI': 'Côte d\'Ivoire',
- 'HR': 'Croatia',
- 'CU': 'Cuba',
- 'CW': 'Curaçao',
- 'CY': 'Cyprus',
- 'CZ': 'Czech Republic',
- 'DK': 'Denmark',
- 'DJ': 'Djibouti',
- 'DM': 'Dominica',
- 'DO': 'Dominican Republic',
- 'EC': 'Ecuador',
- 'EG': 'Egypt',
- 'SV': 'El Salvador',
- 'GQ': 'Equatorial Guinea',
- 'ER': 'Eritrea',
- 'EE': 'Estonia',
- 'ET': 'Ethiopia',
- 'FK': 'Falkland Islands (Malvinas)',
- 'FO': 'Faroe Islands',
- 'FJ': 'Fiji',
- 'FI': 'Finland',
- 'FR': 'France',
- 'GF': 'French Guiana',
- 'PF': 'French Polynesia',
- 'TF': 'French Southern Territories',
- 'GA': 'Gabon',
- 'GM': 'Gambia',
- 'GE': 'Georgia',
- 'DE': 'Germany',
- 'GH': 'Ghana',
- 'GI': 'Gibraltar',
- 'GR': 'Greece',
- 'GL': 'Greenland',
- 'GD': 'Grenada',
- 'GP': 'Guadeloupe',
- 'GU': 'Guam',
- 'GT': 'Guatemala',
- 'GG': 'Guernsey',
- 'GN': 'Guinea',
- 'GW': 'Guinea-Bissau',
- 'GY': 'Guyana',
- 'HT': 'Haiti',
- 'HM': 'Heard Island and McDonald Islands',
- 'VA': 'Holy See (Vatican City State)',
- 'HN': 'Honduras',
- 'HK': 'Hong Kong',
- 'HU': 'Hungary',
- 'IS': 'Iceland',
- 'IN': 'India',
- 'ID': 'Indonesia',
- 'IR': 'Iran, Islamic Republic of',
- 'IQ': 'Iraq',
- 'IE': 'Ireland',
- 'IM': 'Isle of Man',
- 'IL': 'Israel',
- 'IT': 'Italy',
- 'JM': 'Jamaica',
- 'JP': 'Japan',
- 'JE': 'Jersey',
- 'JO': 'Jordan',
- 'KZ': 'Kazakhstan',
- 'KE': 'Kenya',
- 'KI': 'Kiribati',
- 'KP': 'Korea, Democratic People\'s Republic of',
- 'KR': 'Korea, Republic of',
- 'KW': 'Kuwait',
- 'KG': 'Kyrgyzstan',
- 'LA': 'Lao People\'s Democratic Republic',
- 'LV': 'Latvia',
- 'LB': 'Lebanon',
- 'LS': 'Lesotho',
- 'LR': 'Liberia',
- 'LY': 'Libya',
- 'LI': 'Liechtenstein',
- 'LT': 'Lithuania',
- 'LU': 'Luxembourg',
- 'MO': 'Macao',
- 'MK': 'Macedonia, the Former Yugoslav Republic of',
- 'MG': 'Madagascar',
- 'MW': 'Malawi',
- 'MY': 'Malaysia',
- 'MV': 'Maldives',
- 'ML': 'Mali',
- 'MT': 'Malta',
- 'MH': 'Marshall Islands',
- 'MQ': 'Martinique',
- 'MR': 'Mauritania',
- 'MU': 'Mauritius',
- 'YT': 'Mayotte',
- 'MX': 'Mexico',
- 'FM': 'Micronesia, Federated States of',
- 'MD': 'Moldova, Republic of',
- 'MC': 'Monaco',
- 'MN': 'Mongolia',
- 'ME': 'Montenegro',
- 'MS': 'Montserrat',
- 'MA': 'Morocco',
- 'MZ': 'Mozambique',
- 'MM': 'Myanmar',
- 'NA': 'Namibia',
- 'NR': 'Nauru',
- 'NP': 'Nepal',
- 'NL': 'Netherlands',
- 'NC': 'New Caledonia',
- 'NZ': 'New Zealand',
- 'NI': 'Nicaragua',
- 'NE': 'Niger',
- 'NG': 'Nigeria',
- 'NU': 'Niue',
- 'NF': 'Norfolk Island',
- 'MP': 'Northern Mariana Islands',
- 'NO': 'Norway',
- 'OM': 'Oman',
- 'PK': 'Pakistan',
- 'PW': 'Palau',
- 'PS': 'Palestine, State of',
- 'PA': 'Panama',
- 'PG': 'Papua New Guinea',
- 'PY': 'Paraguay',
- 'PE': 'Peru',
- 'PH': 'Philippines',
- 'PN': 'Pitcairn',
- 'PL': 'Poland',
- 'PT': 'Portugal',
- 'PR': 'Puerto Rico',
- 'QA': 'Qatar',
- 'RE': 'Réunion',
- 'RO': 'Romania',
- 'RU': 'Russian Federation',
- 'RW': 'Rwanda',
- 'BL': 'Saint Barthélemy',
- 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
- 'KN': 'Saint Kitts and Nevis',
- 'LC': 'Saint Lucia',
- 'MF': 'Saint Martin (French part)',
- 'PM': 'Saint Pierre and Miquelon',
- 'VC': 'Saint Vincent and the Grenadines',
- 'WS': 'Samoa',
- 'SM': 'San Marino',
- 'ST': 'Sao Tome and Principe',
- 'SA': 'Saudi Arabia',
- 'SN': 'Senegal',
- 'RS': 'Serbia',
- 'SC': 'Seychelles',
- 'SL': 'Sierra Leone',
- 'SG': 'Singapore',
- 'SX': 'Sint Maarten (Dutch part)',
- 'SK': 'Slovakia',
- 'SI': 'Slovenia',
- 'SB': 'Solomon Islands',
- 'SO': 'Somalia',
- 'ZA': 'South Africa',
- 'GS': 'South Georgia and the South Sandwich Islands',
- 'SS': 'South Sudan',
- 'ES': 'Spain',
- 'LK': 'Sri Lanka',
- 'SD': 'Sudan',
- 'SR': 'Suriname',
- 'SJ': 'Svalbard and Jan Mayen',
- 'SZ': 'Swaziland',
- 'SE': 'Sweden',
- 'CH': 'Switzerland',
- 'SY': 'Syrian Arab Republic',
- 'TW': 'Taiwan, Province of China',
- 'TJ': 'Tajikistan',
- 'TZ': 'Tanzania, United Republic of',
- 'TH': 'Thailand',
- 'TL': 'Timor-Leste',
- 'TG': 'Togo',
- 'TK': 'Tokelau',
- 'TO': 'Tonga',
- 'TT': 'Trinidad and Tobago',
- 'TN': 'Tunisia',
- 'TR': 'Turkey',
- 'TM': 'Turkmenistan',
- 'TC': 'Turks and Caicos Islands',
- 'TV': 'Tuvalu',
- 'UG': 'Uganda',
- 'UA': 'Ukraine',
- 'AE': 'United Arab Emirates',
- 'GB': 'United Kingdom',
- 'US': 'United States',
- 'UM': 'United States Minor Outlying Islands',
- 'UY': 'Uruguay',
- 'UZ': 'Uzbekistan',
- 'VU': 'Vanuatu',
- 'VE': 'Venezuela, Bolivarian Republic of',
- 'VN': 'Viet Nam',
- 'VG': 'Virgin Islands, British',
- 'VI': 'Virgin Islands, U.S.',
- 'WF': 'Wallis and Futuna',
- 'EH': 'Western Sahara',
- 'YE': 'Yemen',
- 'ZM': 'Zambia',
- 'ZW': 'Zimbabwe',
- }
-
- @classmethod
- def short2full(cls, code):
- """Convert an ISO 3166-2 country code to the corresponding full name"""
- return cls._country_map.get(code.upper())
-
-
-class GeoUtils(object):
- # Major IPv4 address blocks per country
- _country_ip_map = {
- 'AD': '85.94.160.0/19',
- 'AE': '94.200.0.0/13',
- 'AF': '149.54.0.0/17',
- 'AG': '209.59.64.0/18',
- 'AI': '204.14.248.0/21',
- 'AL': '46.99.0.0/16',
- 'AM': '46.70.0.0/15',
- 'AO': '105.168.0.0/13',
- 'AP': '159.117.192.0/21',
- 'AR': '181.0.0.0/12',
- 'AS': '202.70.112.0/20',
- 'AT': '84.112.0.0/13',
- 'AU': '1.128.0.0/11',
- 'AW': '181.41.0.0/18',
- 'AZ': '5.191.0.0/16',
- 'BA': '31.176.128.0/17',
- 'BB': '65.48.128.0/17',
- 'BD': '114.130.0.0/16',
- 'BE': '57.0.0.0/8',
- 'BF': '129.45.128.0/17',
- 'BG': '95.42.0.0/15',
- 'BH': '37.131.0.0/17',
- 'BI': '154.117.192.0/18',
- 'BJ': '137.255.0.0/16',
- 'BL': '192.131.134.0/24',
- 'BM': '196.12.64.0/18',
- 'BN': '156.31.0.0/16',
- 'BO': '161.56.0.0/16',
- 'BQ': '161.0.80.0/20',
- 'BR': '152.240.0.0/12',
- 'BS': '24.51.64.0/18',
- 'BT': '119.2.96.0/19',
- 'BW': '168.167.0.0/16',
- 'BY': '178.120.0.0/13',
- 'BZ': '179.42.192.0/18',
- 'CA': '99.224.0.0/11',
- 'CD': '41.243.0.0/16',
- 'CF': '196.32.200.0/21',
- 'CG': '197.214.128.0/17',
- 'CH': '85.0.0.0/13',
- 'CI': '154.232.0.0/14',
- 'CK': '202.65.32.0/19',
- 'CL': '152.172.0.0/14',
- 'CM': '165.210.0.0/15',
- 'CN': '36.128.0.0/10',
- 'CO': '181.240.0.0/12',
- 'CR': '201.192.0.0/12',
- 'CU': '152.206.0.0/15',
- 'CV': '165.90.96.0/19',
- 'CW': '190.88.128.0/17',
- 'CY': '46.198.0.0/15',
- 'CZ': '88.100.0.0/14',
- 'DE': '53.0.0.0/8',
- 'DJ': '197.241.0.0/17',
- 'DK': '87.48.0.0/12',
- 'DM': '192.243.48.0/20',
- 'DO': '152.166.0.0/15',
- 'DZ': '41.96.0.0/12',
- 'EC': '186.68.0.0/15',
- 'EE': '90.190.0.0/15',
- 'EG': '156.160.0.0/11',
- 'ER': '196.200.96.0/20',
- 'ES': '88.0.0.0/11',
- 'ET': '196.188.0.0/14',
- 'EU': '2.16.0.0/13',
- 'FI': '91.152.0.0/13',
- 'FJ': '144.120.0.0/16',
- 'FM': '119.252.112.0/20',
- 'FO': '88.85.32.0/19',
- 'FR': '90.0.0.0/9',
- 'GA': '41.158.0.0/15',
- 'GB': '25.0.0.0/8',
- 'GD': '74.122.88.0/21',
- 'GE': '31.146.0.0/16',
- 'GF': '161.22.64.0/18',
- 'GG': '62.68.160.0/19',
- 'GH': '45.208.0.0/14',
- 'GI': '85.115.128.0/19',
- 'GL': '88.83.0.0/19',
- 'GM': '160.182.0.0/15',
- 'GN': '197.149.192.0/18',
- 'GP': '104.250.0.0/19',
- 'GQ': '105.235.224.0/20',
- 'GR': '94.64.0.0/13',
- 'GT': '168.234.0.0/16',
- 'GU': '168.123.0.0/16',
- 'GW': '197.214.80.0/20',
- 'GY': '181.41.64.0/18',
- 'HK': '113.252.0.0/14',
- 'HN': '181.210.0.0/16',
- 'HR': '93.136.0.0/13',
- 'HT': '148.102.128.0/17',
- 'HU': '84.0.0.0/14',
- 'ID': '39.192.0.0/10',
- 'IE': '87.32.0.0/12',
- 'IL': '79.176.0.0/13',
- 'IM': '5.62.80.0/20',
- 'IN': '117.192.0.0/10',
- 'IO': '203.83.48.0/21',
- 'IQ': '37.236.0.0/14',
- 'IR': '2.176.0.0/12',
- 'IS': '82.221.0.0/16',
- 'IT': '79.0.0.0/10',
- 'JE': '87.244.64.0/18',
- 'JM': '72.27.0.0/17',
- 'JO': '176.29.0.0/16',
- 'JP': '126.0.0.0/8',
- 'KE': '105.48.0.0/12',
- 'KG': '158.181.128.0/17',
- 'KH': '36.37.128.0/17',
- 'KI': '103.25.140.0/22',
- 'KM': '197.255.224.0/20',
- 'KN': '198.32.32.0/19',
- 'KP': '175.45.176.0/22',
- 'KR': '175.192.0.0/10',
- 'KW': '37.36.0.0/14',
- 'KY': '64.96.0.0/15',
- 'KZ': '2.72.0.0/13',
- 'LA': '115.84.64.0/18',
- 'LB': '178.135.0.0/16',
- 'LC': '192.147.231.0/24',
- 'LI': '82.117.0.0/19',
- 'LK': '112.134.0.0/15',
- 'LR': '41.86.0.0/19',
- 'LS': '129.232.0.0/17',
- 'LT': '78.56.0.0/13',
- 'LU': '188.42.0.0/16',
- 'LV': '46.109.0.0/16',
- 'LY': '41.252.0.0/14',
- 'MA': '105.128.0.0/11',
- 'MC': '88.209.64.0/18',
- 'MD': '37.246.0.0/16',
- 'ME': '178.175.0.0/17',
- 'MF': '74.112.232.0/21',
- 'MG': '154.126.0.0/17',
- 'MH': '117.103.88.0/21',
- 'MK': '77.28.0.0/15',
- 'ML': '154.118.128.0/18',
- 'MM': '37.111.0.0/17',
- 'MN': '49.0.128.0/17',
- 'MO': '60.246.0.0/16',
- 'MP': '202.88.64.0/20',
- 'MQ': '109.203.224.0/19',
- 'MR': '41.188.64.0/18',
- 'MS': '208.90.112.0/22',
- 'MT': '46.11.0.0/16',
- 'MU': '105.16.0.0/12',
- 'MV': '27.114.128.0/18',
- 'MW': '105.234.0.0/16',
- 'MX': '187.192.0.0/11',
- 'MY': '175.136.0.0/13',
- 'MZ': '197.218.0.0/15',
- 'NA': '41.182.0.0/16',
- 'NC': '101.101.0.0/18',
- 'NE': '197.214.0.0/18',
- 'NF': '203.17.240.0/22',
- 'NG': '105.112.0.0/12',
- 'NI': '186.76.0.0/15',
- 'NL': '145.96.0.0/11',
- 'NO': '84.208.0.0/13',
- 'NP': '36.252.0.0/15',
- 'NR': '203.98.224.0/19',
- 'NU': '49.156.48.0/22',
- 'NZ': '49.224.0.0/14',
- 'OM': '5.36.0.0/15',
- 'PA': '186.72.0.0/15',
- 'PE': '186.160.0.0/14',
- 'PF': '123.50.64.0/18',
- 'PG': '124.240.192.0/19',
- 'PH': '49.144.0.0/13',
- 'PK': '39.32.0.0/11',
- 'PL': '83.0.0.0/11',
- 'PM': '70.36.0.0/20',
- 'PR': '66.50.0.0/16',
- 'PS': '188.161.0.0/16',
- 'PT': '85.240.0.0/13',
- 'PW': '202.124.224.0/20',
- 'PY': '181.120.0.0/14',
- 'QA': '37.210.0.0/15',
- 'RE': '139.26.0.0/16',
- 'RO': '79.112.0.0/13',
- 'RS': '178.220.0.0/14',
- 'RU': '5.136.0.0/13',
- 'RW': '105.178.0.0/15',
- 'SA': '188.48.0.0/13',
- 'SB': '202.1.160.0/19',
- 'SC': '154.192.0.0/11',
- 'SD': '154.96.0.0/13',
- 'SE': '78.64.0.0/12',
- 'SG': '152.56.0.0/14',
- 'SI': '188.196.0.0/14',
- 'SK': '78.98.0.0/15',
- 'SL': '197.215.0.0/17',
- 'SM': '89.186.32.0/19',
- 'SN': '41.82.0.0/15',
- 'SO': '197.220.64.0/19',
- 'SR': '186.179.128.0/17',
- 'SS': '105.235.208.0/21',
- 'ST': '197.159.160.0/19',
- 'SV': '168.243.0.0/16',
- 'SX': '190.102.0.0/20',
- 'SY': '5.0.0.0/16',
- 'SZ': '41.84.224.0/19',
- 'TC': '65.255.48.0/20',
- 'TD': '154.68.128.0/19',
- 'TG': '196.168.0.0/14',
- 'TH': '171.96.0.0/13',
- 'TJ': '85.9.128.0/18',
- 'TK': '27.96.24.0/21',
- 'TL': '180.189.160.0/20',
- 'TM': '95.85.96.0/19',
- 'TN': '197.0.0.0/11',
- 'TO': '175.176.144.0/21',
- 'TR': '78.160.0.0/11',
- 'TT': '186.44.0.0/15',
- 'TV': '202.2.96.0/19',
- 'TW': '120.96.0.0/11',
- 'TZ': '156.156.0.0/14',
- 'UA': '93.72.0.0/13',
- 'UG': '154.224.0.0/13',
- 'US': '3.0.0.0/8',
- 'UY': '167.56.0.0/13',
- 'UZ': '82.215.64.0/18',
- 'VA': '212.77.0.0/19',
- 'VC': '24.92.144.0/20',
- 'VE': '186.88.0.0/13',
- 'VG': '172.103.64.0/18',
- 'VI': '146.226.0.0/16',
- 'VN': '14.160.0.0/11',
- 'VU': '202.80.32.0/20',
- 'WF': '117.20.32.0/21',
- 'WS': '202.4.32.0/19',
- 'YE': '134.35.0.0/16',
- 'YT': '41.242.116.0/22',
- 'ZA': '41.0.0.0/11',
- 'ZM': '165.56.0.0/13',
- 'ZW': '41.85.192.0/19',
- }
-
- @classmethod
- def random_ipv4(cls, code_or_block):
- if len(code_or_block) == 2:
- block = cls._country_ip_map.get(code_or_block.upper())
- if not block:
- return None
- else:
- block = code_or_block
- addr, preflen = block.split('/')
- addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
- addr_max = addr_min | (0xffffffff >> int(preflen))
- return compat_str(socket.inet_ntoa(
- compat_struct_pack('!L', random.randint(addr_min, addr_max))))
-
-
-class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
- def __init__(self, proxies=None):
- # Set default handlers
- for type in ('http', 'https'):
- setattr(self, '%s_open' % type,
- lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
- meth(r, proxy, type))
- compat_urllib_request.ProxyHandler.__init__(self, proxies)
-
- def proxy_open(self, req, proxy, type):
- req_proxy = req.headers.get('Ytdl-request-proxy')
- if req_proxy is not None:
- proxy = req_proxy
- del req.headers['Ytdl-request-proxy']
-
- if proxy == '__noproxy__':
- return None # No Proxy
- if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
- req.add_header('Ytdl-socks-proxy', proxy)
- # youtube-dl's http/https handlers do wrapping the socket with socks
- return None
- return compat_urllib_request.ProxyHandler.proxy_open(
- self, req, proxy, type)
-
-
-# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
-# released into Public Domain
-# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
-
-def long_to_bytes(n, blocksize=0):
- """long_to_bytes(n:long, blocksize:int) : string
- Convert a long integer to a byte string.
-
- If optional blocksize is given and greater than zero, pad the front of the
- byte string with binary zeros so that the length is a multiple of
- blocksize.
- """
- # after much testing, this algorithm was deemed to be the fastest
- s = b''
- n = int(n)
- while n > 0:
- s = compat_struct_pack('>I', n & 0xffffffff) + s
- n = n >> 32
- # strip off leading zeros
- for i in range(len(s)):
- if s[i] != b'\000'[0]:
- break
- else:
- # only happens when n == 0
- s = b'\000'
- i = 0
- s = s[i:]
- # add back some pad bytes. this could be done more efficiently w.r.t. the
- # de-padding being done above, but sigh...
- if blocksize > 0 and len(s) % blocksize:
- s = (blocksize - len(s) % blocksize) * b'\000' + s
- return s
-
-
-def bytes_to_long(s):
- """bytes_to_long(string) : long
- Convert a byte string to a long integer.
-
- This is (essentially) the inverse of long_to_bytes().
- """
- acc = 0
- length = len(s)
- if length % 4:
- extra = (4 - length % 4)
- s = b'\000' * extra + s
- length = length + extra
- for i in range(0, length, 4):
- acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
- return acc
-
-
-def ohdave_rsa_encrypt(data, exponent, modulus):
- '''
- Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
-
- Input:
- data: data to encrypt, bytes-like object
- exponent, modulus: parameter e and N of RSA algorithm, both integer
- Output: hex string of encrypted data
-
- Limitation: supports one block encryption only
- '''
-
- payload = int(binascii.hexlify(data[::-1]), 16)
- encrypted = pow(payload, exponent, modulus)
- return '%x' % encrypted
-
-
-def pkcs1pad(data, length):
- """
- Padding input data with PKCS#1 scheme
-
- @param {int[]} data input data
- @param {int} length target length
- @returns {int[]} padded data
- """
- if len(data) > length - 11:
- raise ValueError('Input data too long for PKCS#1 padding')
-
- pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
- return [0, 2] + pseudo_random + [0] + data
-
-
-def encode_base_n(num, n, table=None):
- FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
- if not table:
- table = FULL_TABLE[:n]
-
- if n > len(table):
- raise ValueError('base %d exceeds table length %d' % (n, len(table)))
-
- if num == 0:
- return table[0]
-
- ret = ''
- while num:
- ret = table[num % n] + ret
- num = num // n
- return ret
-
-
-def decode_packed_codes(code):
- mobj = re.search(PACKED_CODES_RE, code)
- obfucasted_code, base, count, symbols = mobj.groups()
- base = int(base)
- count = int(count)
- symbols = symbols.split('|')
- symbol_table = {}
-
- while count:
- count -= 1
- base_n_count = encode_base_n(count, base)
- symbol_table[base_n_count] = symbols[count] or base_n_count
-
- return re.sub(
- r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
- obfucasted_code)
-
-
-def parse_m3u8_attributes(attrib):
- info = {}
- for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
- if val.startswith('"'):
- val = val[1:-1]
- info[key] = val
- return info
-
-
-def urshift(val, n):
- return val >> n if val >= 0 else (val + 0x100000000) >> n
-
-
-# Based on png2str() written by @gdkchan and improved by @yokrysty
-# Originally posted at https://github.com/rg3/youtube-dl/issues/9706
-def decode_png(png_data):
- # Reference: https://www.w3.org/TR/PNG/
- header = png_data[8:]
-
- if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
- raise IOError('Not a valid PNG file.')
-
- int_map = {1: '>B', 2: '>H', 4: '>I'}
- unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
-
- chunks = []
-
- while header:
- length = unpack_integer(header[:4])
- header = header[4:]
-
- chunk_type = header[:4]
- header = header[4:]
-
- chunk_data = header[:length]
- header = header[length:]
-
- header = header[4:] # Skip CRC
-
- chunks.append({
- 'type': chunk_type,
- 'length': length,
- 'data': chunk_data
- })
-
- ihdr = chunks[0]['data']
-
- width = unpack_integer(ihdr[:4])
- height = unpack_integer(ihdr[4:8])
-
- idat = b''
-
- for chunk in chunks:
- if chunk['type'] == b'IDAT':
- idat += chunk['data']
-
- if not idat:
- raise IOError('Unable to read PNG data.')
-
- decompressed_data = bytearray(zlib.decompress(idat))
-
- stride = width * 3
- pixels = []
-
- def _get_pixel(idx):
- x = idx % stride
- y = idx // stride
- return pixels[y][x]
-
- for y in range(height):
- basePos = y * (1 + stride)
- filter_type = decompressed_data[basePos]
-
- current_row = []
-
- pixels.append(current_row)
-
- for x in range(stride):
- color = decompressed_data[1 + basePos + x]
- basex = y * stride + x
- left = 0
- up = 0
-
- if x > 2:
- left = _get_pixel(basex - 3)
- if y > 0:
- up = _get_pixel(basex - stride)
-
- if filter_type == 1: # Sub
- color = (color + left) & 0xff
- elif filter_type == 2: # Up
- color = (color + up) & 0xff
- elif filter_type == 3: # Average
- color = (color + ((left + up) >> 1)) & 0xff
- elif filter_type == 4: # Paeth
- a = left
- b = up
- c = 0
-
- if x > 2 and y > 0:
- c = _get_pixel(basex - stride - 3)
-
- p = a + b - c
-
- pa = abs(p - a)
- pb = abs(p - b)
- pc = abs(p - c)
-
- if pa <= pb and pa <= pc:
- color = (color + a) & 0xff
- elif pb <= pc:
- color = (color + b) & 0xff
- else:
- color = (color + c) & 0xff
-
- current_row.append(color)
-
- return width, height, pixels
-
-
-def write_xattr(path, key, value):
- # This mess below finds the best xattr tool for the job
- try:
- # try the pyxattr module...
- import xattr
-
- if hasattr(xattr, 'set'): # pyxattr
- # Unicode arguments are not supported in python-pyxattr until
- # version 0.5.0
- # See https://github.com/rg3/youtube-dl/issues/5498
- pyxattr_required_version = '0.5.0'
- if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
- # TODO: fallback to CLI tools
- raise XAttrUnavailableError(
- 'python-pyxattr is detected but is too old. '
- 'youtube-dl requires %s or above while your version is %s. '
- 'Falling back to other xattr implementations' % (
- pyxattr_required_version, xattr.__version__))
-
- setxattr = xattr.set
- else: # xattr
- setxattr = xattr.setxattr
-
- try:
- setxattr(path, key, value)
- except EnvironmentError as e:
- raise XAttrMetadataError(e.errno, e.strerror)
-
- except ImportError:
- if compat_os_name == 'nt':
- # Write xattrs to NTFS Alternate Data Streams:
- # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
- assert ':' not in key
- assert os.path.exists(path)
-
- ads_fn = path + ':' + key
- try:
- with open(ads_fn, 'wb') as f:
- f.write(value)
- except EnvironmentError as e:
- raise XAttrMetadataError(e.errno, e.strerror)
- else:
- user_has_setfattr = check_executable('setfattr', ['--version'])
- user_has_xattr = check_executable('xattr', ['-h'])
-
- if user_has_setfattr or user_has_xattr:
-
- value = value.decode('utf-8')
- if user_has_setfattr:
- executable = 'setfattr'
- opts = ['-n', key, '-v', value]
- elif user_has_xattr:
- executable = 'xattr'
- opts = ['-w', key, value]
-
- cmd = ([encodeFilename(executable, True)] +
- [encodeArgument(o) for o in opts] +
- [encodeFilename(path, True)])
-
- try:
- p = subprocess.Popen(
- cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
- except EnvironmentError as e:
- raise XAttrMetadataError(e.errno, e.strerror)
- stdout, stderr = p.communicate()
- stderr = stderr.decode('utf-8', 'replace')
- if p.returncode != 0:
- raise XAttrMetadataError(p.returncode, stderr)
-
- else:
- # On Unix, and can't find pyxattr, setfattr, or xattr.
- if sys.platform.startswith('linux'):
- raise XAttrUnavailableError(
- "Couldn't find a tool to set the xattrs. "
- "Install either the python 'pyxattr' or 'xattr' "
- "modules, or the GNU 'attr' package "
- "(which contains the 'setfattr' tool).")
- else:
- raise XAttrUnavailableError(
- "Couldn't find a tool to set the xattrs. "
- "Install either the python 'xattr' module, "
- "or the 'xattr' binary.")
-
-
-def random_birthday(year_field, month_field, day_field):
- start_date = datetime.date(1950, 1, 1)
- end_date = datetime.date(1995, 12, 31)
- offset = random.randint(0, (end_date - start_date).days)
- random_date = start_date + datetime.timedelta(offset)
- return {
- year_field: str(random_date.year),
- month_field: str(random_date.month),
- day_field: str(random_date.day),
- }
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
deleted file mode 100644
index c7083cf..0000000
--- a/youtube_dl/version.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from __future__ import unicode_literals
-
-__version__ = '2018.07.10'