aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2022-03-22 00:48:28 +0800
committerJesús <heckyel@hyperbola.info>2022-03-22 00:48:28 +0800
commit7a74bc5d1e54299e51b73492e09c70da994f4b35 (patch)
treee59a64b5b386d2381906e99912153aabd5d4ab0d
parent3c69360ec3cb4a951d7e37150c7cfae8a0491cd2 (diff)
parent84842aee2ba8dc50601c86dc6fbb12d0fa438449 (diff)
downloadhypervideo-pre-7a74bc5d1e54299e51b73492e09c70da994f4b35.tar.lz
hypervideo-pre-7a74bc5d1e54299e51b73492e09c70da994f4b35.tar.xz
hypervideo-pre-7a74bc5d1e54299e51b73492e09c70da994f4b35.zip
updated from upstream | 22/03/2022 at 00:48
-rw-r--r--docs/.gitignore1
-rw-r--r--docs/Changelog.md5
-rw-r--r--docs/Collaborators.md5
-rw-r--r--docs/Contributing.md5
-rw-r--r--docs/LICENSE.md6
-rw-r--r--docs/Makefile177
-rw-r--r--docs/README.md2
-rw-r--r--docs/conf.py68
-rw-r--r--docs/requirements.txt1
-rw-r--r--docs/supportedsites.md5
-rw-r--r--docs/ytdlp_plugins.md6
-rw-r--r--test/test_netrc.py13
-rw-r--r--test/test_utils.py1
-rw-r--r--yt_dlp/YoutubeDL.py5
-rw-r--r--yt_dlp/downloader/fragment.py46
-rw-r--r--yt_dlp/downloader/http.py65
-rw-r--r--yt_dlp/downloader/ism.py4
-rw-r--r--yt_dlp/downloader/mhtml.py3
-rw-r--r--yt_dlp/downloader/youtube_live_chat.py6
-rw-r--r--yt_dlp/extractor/abematv.py10
-rw-r--r--yt_dlp/extractor/adn.py5
-rw-r--r--yt_dlp/extractor/afreecatv.py14
-rw-r--r--yt_dlp/extractor/alura.py9
-rw-r--r--yt_dlp/extractor/animelab.py35
-rw-r--r--yt_dlp/extractor/animeondemand.py9
-rw-r--r--yt_dlp/extractor/arte.py8
-rw-r--r--yt_dlp/extractor/atresplayer.py9
-rw-r--r--yt_dlp/extractor/azmedien.py3
-rw-r--r--yt_dlp/extractor/bbc.py9
-rw-r--r--yt_dlp/extractor/bilibili.py9
-rw-r--r--yt_dlp/extractor/canvas.py9
-rw-r--r--yt_dlp/extractor/cbs.py28
-rw-r--r--yt_dlp/extractor/common.py42
-rw-r--r--yt_dlp/extractor/crunchyroll.py8
-rw-r--r--yt_dlp/extractor/curiositystream.py7
-rw-r--r--yt_dlp/extractor/daftsex.py97
-rw-r--r--yt_dlp/extractor/digitalconcerthall.py8
-rw-r--r--yt_dlp/extractor/eroprofile.py9
-rw-r--r--yt_dlp/extractor/extractors.py1
-rw-r--r--yt_dlp/extractor/facebook.py11
-rw-r--r--yt_dlp/extractor/fancode.py38
-rw-r--r--yt_dlp/extractor/franceculture.py101
-rw-r--r--yt_dlp/extractor/frontendmasters.py9
-rw-r--r--yt_dlp/extractor/funimation.py13
-rw-r--r--yt_dlp/extractor/gaia.py30
-rw-r--r--yt_dlp/extractor/generic.py1
-rw-r--r--yt_dlp/extractor/hidive.py8
-rw-r--r--yt_dlp/extractor/hrti.py15
-rw-r--r--yt_dlp/extractor/huya.py138
-rw-r--r--yt_dlp/extractor/imggaming.py13
-rw-r--r--yt_dlp/extractor/instagram.py8
-rw-r--r--yt_dlp/extractor/iprima.py16
-rw-r--r--yt_dlp/extractor/lecturio.py9
-rw-r--r--yt_dlp/extractor/linkedin.py7
-rw-r--r--yt_dlp/extractor/linuxacademy.py9
-rw-r--r--yt_dlp/extractor/lynda.py11
-rw-r--r--yt_dlp/extractor/nebula.py6
-rw-r--r--yt_dlp/extractor/niconico.py550
-rw-r--r--yt_dlp/extractor/nitter.py221
-rw-r--r--yt_dlp/extractor/njpwworld.py10
-rw-r--r--yt_dlp/extractor/noco.py9
-rw-r--r--yt_dlp/extractor/openload.py2
-rw-r--r--yt_dlp/extractor/packtpub.py5
-rw-r--r--yt_dlp/extractor/panopto.py190
-rw-r--r--yt_dlp/extractor/paramountplus.py29
-rw-r--r--yt_dlp/extractor/patreon.py8
-rw-r--r--yt_dlp/extractor/piapro.py10
-rw-r--r--yt_dlp/extractor/platzi.py9
-rw-r--r--yt_dlp/extractor/playplustv.py12
-rw-r--r--yt_dlp/extractor/pluralsight.py9
-rw-r--r--yt_dlp/extractor/pokergo.py10
-rw-r--r--yt_dlp/extractor/roosterteeth.py8
-rw-r--r--yt_dlp/extractor/rumble.py17
-rw-r--r--yt_dlp/extractor/rutv.py6
-rw-r--r--yt_dlp/extractor/safari.py9
-rw-r--r--yt_dlp/extractor/scte.py9
-rw-r--r--yt_dlp/extractor/shahid.py8
-rw-r--r--yt_dlp/extractor/sonyliv.py16
-rw-r--r--yt_dlp/extractor/soundcloud.py34
-rw-r--r--yt_dlp/extractor/teachable.py3
-rw-r--r--yt_dlp/extractor/teamtreehouse.py7
-rw-r--r--yt_dlp/extractor/tennistv.py9
-rw-r--r--yt_dlp/extractor/toutv.py7
-rw-r--r--yt_dlp/extractor/tubitv.py8
-rw-r--r--yt_dlp/extractor/tumblr.py12
-rw-r--r--yt_dlp/extractor/twitch.py9
-rw-r--r--yt_dlp/extractor/udemy.py9
-rw-r--r--yt_dlp/extractor/veo.py47
-rw-r--r--yt_dlp/extractor/vidio.py9
-rw-r--r--yt_dlp/extractor/viewlift.py3
-rw-r--r--yt_dlp/extractor/viki.py9
-rw-r--r--yt_dlp/extractor/vimeo.py20
-rw-r--r--yt_dlp/extractor/vk.py9
-rw-r--r--yt_dlp/extractor/vlive.py16
-rw-r--r--yt_dlp/extractor/vrv.py12
-rw-r--r--yt_dlp/extractor/youtube.py50
-rw-r--r--yt_dlp/extractor/zattoo.py14
-rw-r--r--yt_dlp/extractor/zee5.py45
-rw-r--r--yt_dlp/utils.py5
99 files changed, 1266 insertions, 1414 deletions
diff --git a/docs/.gitignore b/docs/.gitignore
deleted file mode 100644
index 69fa449dd..000000000
--- a/docs/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-_build/
diff --git a/docs/Changelog.md b/docs/Changelog.md
deleted file mode 100644
index 99de25fb1..000000000
--- a/docs/Changelog.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-orphan: true
----
-```{include} ../Changelog.md
-```
diff --git a/docs/Collaborators.md b/docs/Collaborators.md
deleted file mode 100644
index 5f493d814..000000000
--- a/docs/Collaborators.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-orphan: true
----
-```{include} ../Collaborators.md
-```
diff --git a/docs/Contributing.md b/docs/Contributing.md
deleted file mode 100644
index 60fe46909..000000000
--- a/docs/Contributing.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-orphan: true
----
-```{include} ../Contributing.md
-```
diff --git a/docs/LICENSE.md b/docs/LICENSE.md
deleted file mode 100644
index 8521669f8..000000000
--- a/docs/LICENSE.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-orphan: true
----
-# LICENSE
-```{include} ../LICENSE
-```
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index 1a8e3cb1c..000000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,177 +0,0 @@
-# Makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS =
-SPHINXBUILD = sphinx-build
-PAPER =
-BUILDDIR = _build
-
-# User-friendly check for sphinx-build
-ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
-$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
-endif
-
-# Internal variables.
-PAPEROPT_a4 = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-# the i18n builder cannot share the environment and doctrees with the others
-I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-
-.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
-
-help:
- @echo "Please use \`make <target>' where <target> is one of"
- @echo " html to make standalone HTML files"
- @echo " dirhtml to make HTML files named index.html in directories"
- @echo " singlehtml to make a single large HTML file"
- @echo " pickle to make pickle files"
- @echo " json to make JSON files"
- @echo " htmlhelp to make HTML files and a HTML help project"
- @echo " qthelp to make HTML files and a qthelp project"
- @echo " devhelp to make HTML files and a Devhelp project"
- @echo " epub to make an epub"
- @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
- @echo " latexpdf to make LaTeX files and run them through pdflatex"
- @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
- @echo " text to make text files"
- @echo " man to make manual pages"
- @echo " texinfo to make Texinfo files"
- @echo " info to make Texinfo files and run them through makeinfo"
- @echo " gettext to make PO message catalogs"
- @echo " changes to make an overview of all changed/added/deprecated items"
- @echo " xml to make Docutils-native XML files"
- @echo " pseudoxml to make pseudoxml-XML files for display purposes"
- @echo " linkcheck to check all external links for integrity"
- @echo " doctest to run all doctests embedded in the documentation (if enabled)"
-
-clean:
- rm -rf $(BUILDDIR)/*
-
-html:
- $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-dirhtml:
- $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
-
-singlehtml:
- $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
- @echo
- @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
-
-pickle:
- $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
- @echo
- @echo "Build finished; now you can process the pickle files."
-
-json:
- $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
- @echo
- @echo "Build finished; now you can process the JSON files."
-
-htmlhelp:
- $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
- @echo
- @echo "Build finished; now you can run HTML Help Workshop with the" \
- ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-qthelp:
- $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
- @echo
- @echo "Build finished; now you can run "qcollectiongenerator" with the" \
- ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
- @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/yt-dlp.qhcp"
- @echo "To view the help file:"
- @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/yt-dlp.qhc"
-
-devhelp:
- $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
- @echo
- @echo "Build finished."
- @echo "To view the help file:"
- @echo "# mkdir -p $$HOME/.local/share/devhelp/yt-dlp"
- @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/yt-dlp"
- @echo "# devhelp"
-
-epub:
- $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
- @echo
- @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
-
-latex:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo
- @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
- @echo "Run \`make' in that directory to run these through (pdf)latex" \
- "(use \`make latexpdf' here to do that automatically)."
-
-latexpdf:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo "Running LaTeX files through pdflatex..."
- $(MAKE) -C $(BUILDDIR)/latex all-pdf
- @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-latexpdfja:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo "Running LaTeX files through platex and dvipdfmx..."
- $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
- @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-text:
- $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
- @echo
- @echo "Build finished. The text files are in $(BUILDDIR)/text."
-
-man:
- $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
- @echo
- @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
-
-texinfo:
- $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
- @echo
- @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
- @echo "Run \`make' in that directory to run these through makeinfo" \
- "(use \`make info' here to do that automatically)."
-
-info:
- $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
- @echo "Running Texinfo files through makeinfo..."
- make -C $(BUILDDIR)/texinfo info
- @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
-
-gettext:
- $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
- @echo
- @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
-
-changes:
- $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
- @echo
- @echo "The overview file is in $(BUILDDIR)/changes."
-
-linkcheck:
- $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
- @echo
- @echo "Link check complete; look for any errors in the above output " \
- "or in $(BUILDDIR)/linkcheck/output.txt."
-
-doctest:
- $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
- @echo "Testing of doctests in the sources finished, look at the " \
- "results in $(BUILDDIR)/doctest/output.txt."
-
-xml:
- $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
- @echo
- @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
-
-pseudoxml:
- $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
- @echo
- @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
diff --git a/docs/README.md b/docs/README.md
deleted file mode 100644
index 451bedaec..000000000
--- a/docs/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-```{include} ../README.md
-```
diff --git a/docs/conf.py b/docs/conf.py
deleted file mode 100644
index c4010bbc7..000000000
--- a/docs/conf.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# coding: utf-8
-#
-# yt-dlp documentation build configuration file
-
-import sys
-import os
-
-# Allows to import yt-dlp
-sys.path.insert(0, os.path.abspath('..'))
-
-# -- General configuration ------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
- 'myst_parser',
-]
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The master toctree document.
-master_doc = 'README'
-
-# General information about the project.
-project = u'yt-dlp'
-author = u'yt-dlp'
-copyright = u'UNLICENSE'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-from yt_dlp.version import __version__
-version = __version__
-# The full version, including alpha/beta/rc tags.
-release = version
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-exclude_patterns = ['_build']
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# -- Options for HTML output ----------------------------------------------
-
-# The theme to use for HTML and HTML Help pages. See the documentation for
-# a list of builtin themes.
-html_theme = 'default'
-
-# Disable highlights
-highlight_language = 'none'
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-# html_static_path = ['_static']
-
-# Enable heading anchors
-myst_heading_anchors = 4
-
-# Suppress heading warnings
-suppress_warnings = [
- 'myst.header',
-]
diff --git a/docs/requirements.txt b/docs/requirements.txt
deleted file mode 100644
index f0694bdc0..000000000
--- a/docs/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-myst-parser
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
deleted file mode 100644
index 55c023415..000000000
--- a/docs/supportedsites.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-orphan: true
----
-```{include} ../supportedsites.md
-```
diff --git a/docs/ytdlp_plugins.md b/docs/ytdlp_plugins.md
deleted file mode 100644
index 483b9c46e..000000000
--- a/docs/ytdlp_plugins.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-orphan: true
----
-# ytdlp_plugins
-
-See [https://github.com/yt-dlp/yt-dlp/tree/master/ytdlp_plugins](https://github.com/yt-dlp/yt-dlp/tree/master/ytdlp_plugins).
diff --git a/test/test_netrc.py b/test/test_netrc.py
index 36b943591..94a703406 100644
--- a/test/test_netrc.py
+++ b/test/test_netrc.py
@@ -7,18 +7,19 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from yt_dlp.extractor import (
- gen_extractors,
-)
+from yt_dlp.extractor import gen_extractor_classes
+from yt_dlp.extractor.common import InfoExtractor
+
+NO_LOGIN = InfoExtractor._perform_login
class TestNetRc(unittest.TestCase):
def test_netrc_present(self):
- for ie in gen_extractors():
- if not hasattr(ie, '_login'):
+ for ie in gen_extractor_classes():
+ if ie._perform_login is NO_LOGIN:
continue
self.assertTrue(
- hasattr(ie, '_NETRC_MACHINE'),
+ ie._NETRC_MACHINE,
'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME)
diff --git a/test/test_utils.py b/test/test_utils.py
index 6be5bb642..a7f1b0e94 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1780,6 +1780,7 @@ Line 1
self.assertEqual(format_bytes(1024**6), '1.00EiB')
self.assertEqual(format_bytes(1024**7), '1.00ZiB')
self.assertEqual(format_bytes(1024**8), '1.00YiB')
+ self.assertEqual(format_bytes(1024**9), '1024.00YiB')
def test_hide_login_info(self):
self.assertEqual(Config.hide_login_info(['-u', 'foo', '-p', 'bar']),
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 150764629..a5c7348b2 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1427,7 +1427,7 @@ class YoutubeDL(object):
min_wait, max_wait = self.params.get('wait_for_video')
diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
if diff is None and ie_result.get('live_status') == 'is_upcoming':
- diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
+ diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
self.report_warning('Release time of video is not known')
elif (diff or 0) <= 0:
self.report_warning('Video should already be available according to extracted info')
@@ -2858,14 +2858,13 @@ class YoutubeDL(object):
# Does nothing under normal operation - for backward compatibility of process_info
self.post_extract(info_dict)
+ self._num_downloads += 1
# info_dict['_filename'] needs to be set for backward compatibility
info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
temp_filename = self.prepare_filename(info_dict, 'temp')
files_to_move = {}
- self._num_downloads += 1
-
# Forced printings
self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py
index 95fb2f9e7..6b75dfc62 100644
--- a/yt_dlp/downloader/fragment.py
+++ b/yt_dlp/downloader/fragment.py
@@ -133,19 +133,19 @@ class FragmentFD(FileDownloader):
}
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
- return False, None
+ return False
if fragment_info_dict.get('filetime'):
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
ctx['fragment_filename_sanitized'] = fragment_filename
- try:
- return True, self._read_fragment(ctx)
- except FileNotFoundError:
- if not info_dict.get('is_live'):
- raise
- return False, None
+ return True
def _read_fragment(self, ctx):
- down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
+ try:
+ down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
+ except FileNotFoundError:
+ if ctx.get('live'):
+ return None
+ raise
ctx['fragment_filename_sanitized'] = frag_sanitized
frag_content = down.read()
down.close()
@@ -457,7 +457,7 @@ class FragmentFD(FileDownloader):
def download_fragment(fragment, ctx):
if not interrupt_trigger[0]:
- return False, fragment['frag_index']
+ return
frag_index = ctx['fragment_index'] = fragment['frag_index']
ctx['last_error'] = None
@@ -467,14 +467,12 @@ class FragmentFD(FileDownloader):
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
# Never skip the first fragment
- fatal = is_fatal(fragment.get('index') or (frag_index - 1))
- count, frag_content = 0, None
+ fatal, count = is_fatal(fragment.get('index') or (frag_index - 1)), 0
while count <= fragment_retries:
try:
- success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers)
- if not success:
- return False, frag_index
- break
+ if self._download_fragment(ctx, fragment['url'], info_dict, headers):
+ break
+ return
except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err:
# Unavailable (possibly temporary) fragments may be served.
# First we try to retry then either skip or abort.
@@ -491,13 +489,9 @@ class FragmentFD(FileDownloader):
break
raise
- if count > fragment_retries:
- if not fatal:
- return False, frag_index
+ if count > fragment_retries and fatal:
ctx['dest_stream'].close()
self.report_error('Giving up after %s fragment retries' % fragment_retries)
- return False, frag_index
- return frag_content, frag_index
def append_fragment(frag_content, frag_index, ctx):
if not frag_content:
@@ -520,23 +514,23 @@ class FragmentFD(FileDownloader):
def _download_fragment(fragment):
ctx_copy = ctx.copy()
- frag_content, frag_index = download_fragment(fragment, ctx_copy)
- return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized')
+ download_fragment(fragment, ctx_copy)
+ return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
- for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments):
+ for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
ctx['fragment_filename_sanitized'] = frag_filename
ctx['fragment_index'] = frag_index
- result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
+ result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx)
if not result:
return False
else:
for fragment in fragments:
if not interrupt_trigger[0]:
break
- frag_content, frag_index = download_fragment(fragment, ctx)
- result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
+ download_fragment(fragment, ctx)
+ result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx)
if not result:
return False
diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py
index 10ba61024..8e096b76b 100644
--- a/yt_dlp/downloader/http.py
+++ b/yt_dlp/downloader/http.py
@@ -1,8 +1,7 @@
from __future__ import unicode_literals
-import errno
import os
-import socket
+import ssl
import time
import random
@@ -10,6 +9,7 @@ from .common import FileDownloader
from ..compat import (
compat_str,
compat_urllib_error,
+ compat_http_client
)
from ..utils import (
ContentTooShortError,
@@ -18,11 +18,14 @@ from ..utils import (
parse_http_range,
sanitized_Request,
ThrottledDownload,
+ try_get,
write_xattr,
XAttrMetadataError,
XAttrUnavailableError,
)
+RESPONSE_READ_EXCEPTIONS = (TimeoutError, ConnectionError, ssl.SSLError, compat_http_client.HTTPException)
+
class HttpFD(FileDownloader):
def real_download(self, filename, info_dict):
@@ -53,7 +56,6 @@ class HttpFD(FileDownloader):
ctx.open_mode = 'wb'
ctx.resume_len = 0
- ctx.data_len = None
ctx.block_size = self.params.get('buffersize', 1024)
ctx.start_time = time.time()
ctx.chunk_size = None
@@ -100,6 +102,8 @@ class HttpFD(FileDownloader):
if ctx.is_resume:
self.report_resuming_byte(ctx.resume_len)
ctx.open_mode = 'ab'
+ elif req_start is not None:
+ range_start = req_start
elif ctx.chunk_size > 0:
range_start = 0
else:
@@ -116,23 +120,21 @@ class HttpFD(FileDownloader):
else:
range_end = None
- if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
- range_end = ctx.data_len - 1
- has_range = range_start is not None
- ctx.has_range = has_range
+ if try_get(None, lambda _: range_start > range_end):
+ ctx.resume_len = 0
+ ctx.open_mode = 'wb'
+ raise RetryDownload(Exception(f'Conflicting range. (start={range_start} > end={range_end})'))
+
+ if try_get(None, lambda _: range_end >= ctx.content_len):
+ range_end = ctx.content_len - 1
+
request = sanitized_Request(url, request_data, headers)
+ has_range = range_start is not None
if has_range:
set_range(request, range_start, range_end)
# Establish connection
try:
- try:
- ctx.data = self.ydl.urlopen(request)
- except (compat_urllib_error.URLError, ) as err:
- # reason may not be available, e.g. for urllib2.HTTPError on python 2.6
- reason = getattr(err, 'reason', None)
- if isinstance(reason, socket.timeout):
- raise RetryDownload(err)
- raise err
+ ctx.data = self.ydl.urlopen(request)
# When trying to resume, Content-Range HTTP header of response has to be checked
# to match the value of requested Range HTTP header. This is due to a webservers
# that don't support resuming and serve a whole file with no Content-Range
@@ -151,7 +153,8 @@ class HttpFD(FileDownloader):
or content_range_end == range_end
or content_len < range_end)
if accept_content_len:
- ctx.data_len = content_len
+ ctx.content_len = content_len
+ ctx.data_len = min(content_len, req_end or content_len) - (req_start or 0)
return
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
@@ -159,8 +162,7 @@ class HttpFD(FileDownloader):
self.report_unable_to_resume()
ctx.resume_len = 0
ctx.open_mode = 'wb'
- ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
- return
+ ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
except (compat_urllib_error.HTTPError, ) as err:
if err.code == 416:
# Unable to resume (requested range not satisfiable)
@@ -202,13 +204,14 @@ class HttpFD(FileDownloader):
# Unexpected HTTP error
raise
raise RetryDownload(err)
- except socket.timeout as err:
+ except compat_urllib_error.URLError as err:
+ if isinstance(err.reason, ssl.CertificateError):
+ raise
+ raise RetryDownload(err)
+ # In urllib.request.AbstractHTTPHandler, the response is partially read on request.
+ # Any errors that occur during this will not be wrapped by URLError
+ except RESPONSE_READ_EXCEPTIONS as err:
raise RetryDownload(err)
- except socket.error as err:
- if err.errno in (errno.ECONNRESET, errno.ETIMEDOUT):
- # Connection reset is no problem, just retry
- raise RetryDownload(err)
- raise
def download():
nonlocal throttle_start
@@ -254,16 +257,8 @@ class HttpFD(FileDownloader):
try:
# Download and write
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
- # socket.timeout is a subclass of socket.error but may not have
- # errno set
- except socket.timeout as e:
- retry(e)
- except socket.error as e:
- # SSLError on python 2 (inherits socket.error) may have
- # no errno set but this error message
- if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out':
- retry(e)
- raise
+ except RESPONSE_READ_EXCEPTIONS as err:
+ retry(err)
byte_counter += len(data_block)
@@ -343,7 +338,7 @@ class HttpFD(FileDownloader):
elif speed:
throttle_start = None
- if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
+ if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
ctx.resume_len = byte_counter
# ctx.block_size = block_size
raise NextFragment()
diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py
index 09516abe5..4d5618c83 100644
--- a/yt_dlp/downloader/ism.py
+++ b/yt_dlp/downloader/ism.py
@@ -263,9 +263,11 @@ class IsmFD(FragmentFD):
count = 0
while count <= fragment_retries:
try:
- success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
+ success = self._download_fragment(ctx, segment['url'], info_dict)
if not success:
return False
+ frag_content = self._read_fragment(ctx)
+
if not extra_state['ism_track_written']:
tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py
index bc86fd1bf..54e711792 100644
--- a/yt_dlp/downloader/mhtml.py
+++ b/yt_dlp/downloader/mhtml.py
@@ -171,9 +171,10 @@ body > figure > img {
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
- success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
+ success = self._download_fragment(ctx, fragment_url, info_dict)
if not success:
continue
+ frag_content = self._read_fragment(ctx)
mime_type = b'image/jpeg'
if frag_content.startswith(b'\x89PNG\r\n\x1a\n'):
diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py
index b28d1ec17..cfca686ee 100644
--- a/yt_dlp/downloader/youtube_live_chat.py
+++ b/yt_dlp/downloader/youtube_live_chat.py
@@ -115,9 +115,10 @@ class YoutubeLiveChatFD(FragmentFD):
count = 0
while count <= fragment_retries:
try:
- success, raw_fragment = dl_fragment(url, request_data, headers)
+ success = dl_fragment(url, request_data, headers)
if not success:
return False, None, None, None
+ raw_fragment = self._read_fragment(ctx)
try:
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError:
@@ -145,9 +146,10 @@ class YoutubeLiveChatFD(FragmentFD):
self._prepare_and_start_frag_download(ctx, info_dict)
- success, raw_fragment = dl_fragment(info_dict['url'])
+ success = dl_fragment(info_dict['url'])
if not success:
return False
+ raw_fragment = self._read_fragment(ctx)
try:
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError:
diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
index 360fa4699..a839f0c1f 100644
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@@ -291,15 +291,7 @@ class AbemaTVIE(AbemaTVBaseIE):
return self._MEDIATOKEN
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- # No authentication to be performed
- if not username:
- return True
-
+ def _perform_login(self, username, password):
if '@' in username: # don't strictly check if it's email address or not
ep, method = 'user/email', 'email'
else:
diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py
index 0863e0d85..fca6e605d 100644
--- a/yt_dlp/extractor/adn.py
+++ b/yt_dlp/extractor/adn.py
@@ -126,10 +126,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
}])
return subtitles
- def _real_initialize(self):
- username, password = self._get_login_info()
- if not username:
- return
+ def _perform_login(self, username, password):
try:
access_token = (self._download_json(
self._API_BASE_URL + 'authentication/login', None,
diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py
index f25fc47fa..77f0e3c10 100644
--- a/yt_dlp/extractor/afreecatv.py
+++ b/yt_dlp/extractor/afreecatv.py
@@ -32,7 +32,7 @@ class AfreecaTVIE(InfoExtractor):
/app/(?:index|read_ucc_bbs)\.cgi|
/player/[Pp]layer\.(?:swf|html)
)\?.*?\bnTitleNo=|
- vod\.afreecatv\.com/PLAYER/STATION/
+ vod\.afreecatv\.com/(PLAYER/STATION|player)/
)
(?P<id>\d+)
'''
@@ -170,6 +170,9 @@ class AfreecaTVIE(InfoExtractor):
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
'only_matching': True,
+ }, {
+ 'url': 'http://vod.afreecatv.com/player/15055030',
+ 'only_matching': True,
}]
@staticmethod
@@ -181,14 +184,7 @@ class AfreecaTVIE(InfoExtractor):
video_key['part'] = int(m.group('part'))
return video_key
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
login_form = {
'szWork': 'login',
'szType': 'json',
diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py
index f5325de2f..d2e2df270 100644
--- a/yt_dlp/extractor/alura.py
+++ b/yt_dlp/extractor/alura.py
@@ -74,14 +74,7 @@ class AluraIE(InfoExtractor):
"formats": formats
}
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
- pass
+ def _perform_login(self, username, password):
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login popup')
diff --git a/yt_dlp/extractor/animelab.py b/yt_dlp/extractor/animelab.py
index 4fb7ee424..1c2cc47dd 100644
--- a/yt_dlp/extractor/animelab.py
+++ b/yt_dlp/extractor/animelab.py
@@ -15,25 +15,21 @@ from ..compat import compat_HTTPError
class AnimeLabBaseIE(InfoExtractor):
- _LOGIN_REQUIRED = True
_LOGIN_URL = 'https://www.animelab.com/login'
_NETRC_MACHINE = 'animelab'
+ _LOGGED_IN = False
- def _login(self):
- def is_logged_in(login_webpage):
- return 'Sign In' not in login_webpage
+ def _is_logged_in(self, login_page=None):
+ if not self._LOGGED_IN:
+ if not login_page:
+ login_page = self._download_webpage(self._LOGIN_URL, None, 'Downloading login page')
+ AnimeLabBaseIE._LOGGED_IN = 'Sign In' not in login_page
+ return self._LOGGED_IN
- login_page = self._download_webpage(
- self._LOGIN_URL, None, 'Downloading login page')
-
- # Check if already logged in
- if is_logged_in(login_page):
+ def _perform_login(self, username, password):
+ if self._is_logged_in():
return
- (username, password) = self._get_login_info()
- if username is None and self._LOGIN_REQUIRED:
- self.raise_login_required('Login is required to access any AnimeLab content')
-
login_form = {
'email': username,
'password': password,
@@ -47,17 +43,14 @@ class AnimeLabBaseIE(InfoExtractor):
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
raise ExtractorError('Unable to log in (wrong credentials?)', expected=True)
- else:
- raise
+ raise
- # if login was successful
- if is_logged_in(response):
- return
-
- raise ExtractorError('Unable to login (cannot verify if logged in)')
+ if not self._is_logged_in(response):
+ raise ExtractorError('Unable to login (cannot verify if logged in)')
def _real_initialize(self):
- self._login()
+ if not self._is_logged_in():
+ self.raise_login_required('Login is required to access any AnimeLab content')
class AnimeLabIE(AnimeLabBaseIE):
diff --git a/yt_dlp/extractor/animeondemand.py b/yt_dlp/extractor/animeondemand.py
index 5694f7240..2e674d58f 100644
--- a/yt_dlp/extractor/animeondemand.py
+++ b/yt_dlp/extractor/animeondemand.py
@@ -53,11 +53,7 @@ class AnimeOnDemandIE(InfoExtractor):
'only_matching': True,
}]
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
@@ -93,9 +89,6 @@ class AnimeOnDemandIE(InfoExtractor):
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
- def _real_initialize(self):
- self._login()
-
def _real_extract(self, url):
anime_id = self._match_id(url)
diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py
index a7ffdc24c..c2f2c1bd3 100644
--- a/yt_dlp/extractor/arte.py
+++ b/yt_dlp/extractor/arte.py
@@ -138,6 +138,7 @@ class ArteTVIE(ArteTVBaseIE):
break
else:
lang_pref = -1
+ format_note = '%s, %s' % (f.get('versionCode'), f.get('versionLibelle'))
media_type = f.get('mediaType')
if media_type == 'hls':
@@ -145,14 +146,17 @@ class ArteTVIE(ArteTVBaseIE):
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)
for m3u8_format in m3u8_formats:
- m3u8_format['language_preference'] = lang_pref
+ m3u8_format.update({
+ 'language_preference': lang_pref,
+ 'format_note': format_note,
+ })
formats.extend(m3u8_formats)
continue
format = {
'format_id': format_id,
'language_preference': lang_pref,
- 'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
+ 'format_note': format_note,
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
'tbr': int_or_none(f.get('bitrate')),
diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py
index 6d843966a..465af4ed3 100644
--- a/yt_dlp/extractor/atresplayer.py
+++ b/yt_dlp/extractor/atresplayer.py
@@ -37,9 +37,6 @@ class AtresPlayerIE(InfoExtractor):
]
_API_BASE = 'https://api.atresplayer.com/'
- def _real_initialize(self):
- self._login()
-
def _handle_error(self, e, code):
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
error = self._parse_json(e.cause.read(), None)
@@ -48,11 +45,7 @@ class AtresPlayerIE(InfoExtractor):
raise ExtractorError(error['error_description'], expected=True)
raise
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
self._request_webpage(
self._API_BASE + 'login', None, 'Downloading login page')
diff --git a/yt_dlp/extractor/azmedien.py b/yt_dlp/extractor/azmedien.py
index fee640e14..b3cabbf94 100644
--- a/yt_dlp/extractor/azmedien.py
+++ b/yt_dlp/extractor/azmedien.py
@@ -15,7 +15,8 @@ class AZMedienIE(InfoExtractor):
(?P<host>
telezueri\.ch|
telebaern\.tv|
- telem1\.ch
+ telem1\.ch|
+ tvo-online\.ch
)/
[^/]+/
(?P<id>
diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py
index b664a7007..823155730 100644
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@@ -264,11 +264,7 @@ class BBCCoUkIE(InfoExtractor):
'only_matching': True,
}]
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading signin page')
@@ -294,9 +290,6 @@ class BBCCoUkIE(InfoExtractor):
'Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
- def _real_initialize(self):
- self._login()
-
class MediaSelectionError(Exception):
def __init__(self, id):
self.id = id
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index 1bbf7ca1c..b4eb20642 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -821,11 +821,7 @@ class BiliIntlBaseIE(InfoExtractor):
'extractor_key': BiliIntlIE.ie_key(),
}
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
try:
from Cryptodome.PublicKey import RSA
from Cryptodome.Cipher import PKCS1_v1_5
@@ -856,9 +852,6 @@ class BiliIntlBaseIE(InfoExtractor):
else:
raise ExtractorError('Unable to log in')
- def _real_initialize(self):
- self._login()
-
class BiliIntlIE(BiliIntlBaseIE):
_VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?play/(?P<season_id>\d+)/(?P<id>\d+)'
diff --git a/yt_dlp/extractor/canvas.py b/yt_dlp/extractor/canvas.py
index 82fded4e1..31e7d7de6 100644
--- a/yt_dlp/extractor/canvas.py
+++ b/yt_dlp/extractor/canvas.py
@@ -274,14 +274,7 @@ class VrtNUIE(GigyaBaseIE):
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
_CONTEXT_ID = 'R3595707040'
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
auth_info = self._gigya_login({
'APIKey': self._APIKEY,
'targetEnv': 'jssdk',
diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py
index ae9ce5862..2af36ea82 100644
--- a/yt_dlp/extractor/cbs.py
+++ b/yt_dlp/extractor/cbs.py
@@ -77,21 +77,21 @@ class CBSIE(CBSBaseIE):
(?:
cbs:|
https?://(?:www\.)?(?:
- cbs\.com/(?:shows/[^/]+/video|movies/[^/]+)/|
+ cbs\.com/(?:shows|movies)/(?:video|[^/]+/video|[^/]+)/|
colbertlateshow\.com/(?:video|podcasts)/)
)(?P<id>[\w-]+)'''
# All tests are blocked outside US
_TESTS = [{
- 'url': 'https://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
+ 'url': 'https://www.cbs.com/shows/video/xrUyNLtl9wd8D_RWWAg9NU2F_V6QpB3R/',
'info_dict': {
- 'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
+ 'id': 'xrUyNLtl9wd8D_RWWAg9NU2F_V6QpB3R',
'ext': 'mp4',
- 'title': 'Connect Chat feat. Garth Brooks',
- 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
- 'duration': 1495,
- 'timestamp': 1385585425,
- 'upload_date': '20131127',
+ 'title': 'Tough As Nails - Dreams Never Die',
+ 'description': 'md5:a3535a62531cdd52b0364248a2c1ae33',
+ 'duration': 2588,
+ 'timestamp': 1639015200,
+ 'upload_date': '20211209',
'uploader': 'CBSI-NEW',
},
'params': {
@@ -99,14 +99,14 @@ class CBSIE(CBSBaseIE):
'skip_download': True,
},
}, {
- 'url': 'https://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/60icOhMb9NcjbcWnF_gub9XXHdeBcNk2/the-late-show-6-23-21-christine-baranski-joy-oladokun-',
+ 'url': 'https://www.cbs.com/shows/video/sZH1MGgomIosZgxGJ1l263MFq16oMtW1/',
'info_dict': {
- 'id': '60icOhMb9NcjbcWnF_gub9XXHdeBcNk2',
- 'title': 'The Late Show - 6/23/21 (Christine Baranski, Joy Oladokun)',
- 'timestamp': 1624507140,
- 'description': 'md5:e01af24e95c74d55e8775aef86117b95',
+ 'id': 'sZH1MGgomIosZgxGJ1l263MFq16oMtW1',
+ 'title': 'The Late Show - 3/16/22 (Michael Buble, Rose Matafeo)',
+ 'timestamp': 1647488100,
+ 'description': 'md5:d0e6ec23c544b7fa8e39a8e6844d2439',
'uploader': 'CBSI-NEW',
- 'upload_date': '20210624',
+ 'upload_date': '20220317',
},
'params': {
'ignore_no_formats_error': True,
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 345da9a72..f3ae3fd4c 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -432,7 +432,15 @@ class InfoExtractor(object):
Subclasses may also override suitable() if necessary, but ensure the function
signature is preserved and that this function imports everything it needs
- (except other extractors), so that lazy_extractors works correctly
+ (except other extractors), so that lazy_extractors works correctly.
+
+ To support username + password (or netrc) login, the extractor must define a
+ _NETRC_MACHINE and re-define _perform_login(username, password) and
+ (optionally) _initialize_pre_login() methods. The _perform_login method will
+ be called between _initialize_pre_login and _real_initialize if credentials
+ are passed by the user. In cases where it is necessary to have the login
+ process as part of the extraction rather than initialization, _perform_login
+ can be left undefined.
_GEO_BYPASS attribute may be set to False in order to disable
geo restriction bypass mechanisms for a particular extractor.
@@ -460,9 +468,10 @@ class InfoExtractor(object):
_GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None
_WORKING = True
+ _NETRC_MACHINE = None
_LOGIN_HINTS = {
- 'any': 'Use --cookies, --username and --password, or --netrc to provide account credentials',
+ 'any': 'Use --cookies, --cookies-from-browser, --username and --password, or --netrc to provide account credentials',
'cookies': (
'Use --cookies-from-browser or --cookies for the authentication. '
'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'),
@@ -512,6 +521,10 @@ class InfoExtractor(object):
"""Getter method for _WORKING."""
return cls._WORKING
+ @classmethod
+ def supports_login(cls):
+ return bool(cls._NETRC_MACHINE)
+
def initialize(self):
"""Initializes an instance (authentication, etc)."""
self._printed_messages = set()
@@ -520,6 +533,13 @@ class InfoExtractor(object):
'ip_blocks': self._GEO_IP_BLOCKS,
})
if not self._ready:
+ self._initialize_pre_login()
+ if self.supports_login():
+ username, password = self._get_login_info()
+ if username:
+ self._perform_login(username, password)
+ elif self.get_param('username') and False not in (self.IE_DESC, self._NETRC_MACHINE):
+ self.report_warning(f'Login with password is not supported for this website. {self._LOGIN_HINTS["cookies"]}')
self._real_initialize()
self._ready = True
@@ -665,6 +685,14 @@ class InfoExtractor(object):
"""Sets a YoutubeDL instance as the downloader for this IE."""
self._downloader = downloader
+ def _initialize_pre_login(self):
+ """ Intialization before login. Redefine in subclasses."""
+ pass
+
+ def _perform_login(self, username, password):
+ """ Login with username and password. Redefine in subclasses."""
+ pass
+
def _real_initialize(self):
"""Real initialization process. Redefine in subclasses."""
pass
@@ -1098,12 +1126,15 @@ class InfoExtractor(object):
def raise_login_required(
self, msg='This video is only available for registered users',
- metadata_available=False, method='any'):
+ metadata_available=False, method=NO_DEFAULT):
if metadata_available and (
self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
self.report_warning(msg)
return
+ if method is NO_DEFAULT:
+ method = 'any' if self.supports_login() else 'cookies'
if method is not None:
+ assert method in self._LOGIN_HINTS, 'Invalid login method'
msg = '%s. %s' % (msg, self._LOGIN_HINTS[method])
raise ExtractorError(msg, expected=True)
@@ -3680,9 +3711,8 @@ class InfoExtractor(object):
def mark_watched(self, *args, **kwargs):
if not self.get_param('mark_watched', False):
return
- if (hasattr(self, '_NETRC_MACHINE') and self._get_login_info()[0] is not None
- or self.get_param('cookiefile')
- or self.get_param('cookiesfrombrowser')):
+ if (self.supports_login() and self._get_login_info()[0] is not None
+ or self.get_param('cookiefile') or self.get_param('cookiesfrombrowser')):
self._mark_watched(*args, **kwargs)
def _mark_watched(self, *args, **kwargs):
diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py
index b6ba5ef56..bf1bf8c1c 100644
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@@ -57,10 +57,7 @@ class CrunchyrollBaseIE(InfoExtractor):
'Content-Type': 'application/x-www-form-urlencoded',
})
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
+ def _perform_login(self, username, password):
if self._get_cookies(self._LOGIN_URL).get('etp_rt'):
return
@@ -89,9 +86,6 @@ class CrunchyrollBaseIE(InfoExtractor):
if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
- def _real_initialize(self):
- self._login()
-
@staticmethod
def _add_skip_wall(url):
parsed_url = compat_urlparse.urlparse(url)
diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py
index 485b6031f..b8abcf7a5 100644
--- a/yt_dlp/extractor/curiositystream.py
+++ b/yt_dlp/extractor/curiositystream.py
@@ -33,14 +33,11 @@ class CuriosityStreamBaseIE(InfoExtractor):
self._handle_errors(result)
return result['data']
- def _real_initialize(self):
- email, password = self._get_login_info()
- if email is None:
- return
+ def _perform_login(self, username, password):
result = self._download_json(
'https://api.curiositystream.com/v1/login', None,
note='Logging in', data=urlencode_postdata({
- 'email': email,
+ 'email': username,
'password': password,
}))
self._handle_errors(result)
diff --git a/yt_dlp/extractor/daftsex.py b/yt_dlp/extractor/daftsex.py
index 03672b35d..6037fd9ca 100644
--- a/yt_dlp/extractor/daftsex.py
+++ b/yt_dlp/extractor/daftsex.py
@@ -4,30 +4,50 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_b64decode
from ..utils import (
- get_elements_by_class,
int_or_none,
js_to_json,
parse_count,
parse_duration,
+ traverse_obj,
try_get,
+ unified_timestamp,
)
class DaftsexIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?daftsex\.com/watch/(?P<id>-?\d+_\d+)'
_TESTS = [{
+ 'url': 'https://daftsex.com/watch/-35370899_456246186',
+ 'md5': 'd95135e6cea2d905bea20dbe82cda64a',
+ 'info_dict': {
+ 'id': '-35370899_456246186',
+ 'ext': 'mp4',
+ 'title': 'just relaxing',
+ 'description': 'just relaxing - Watch video Watch video in high quality',
+ 'upload_date': '20201113',
+ 'timestamp': 1605261911,
+ 'thumbnail': r're:https://[^/]+/impf/-43BuMDIawmBGr3GLcZ93CYwWf2PBv_tVWoS1A/dnu41DnARU4\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=6af2c26ff4a45e55334189301c867384&type=video_thumb',
+ },
+ }, {
'url': 'https://daftsex.com/watch/-156601359_456242791',
'info_dict': {
'id': '-156601359_456242791',
'ext': 'mp4',
'title': 'Skye Blue - Dinner And A Show',
+ 'description': 'Skye Blue - Dinner And A Show - Watch video Watch video in high quality',
+ 'upload_date': '20200916',
+ 'timestamp': 1600250735,
+ 'thumbnail': 'https://psv153-1.crazycloud.ru/videos/-156601359/456242791/thumb.jpg?extra=i3D32KaBbBFf9TqDRMAVmQ',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- title = get_elements_by_class('heading', webpage)[-1]
+ title = self._html_search_meta('name', webpage, 'title')
+ timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None))
+ description = self._html_search_meta('description', webpage, 'Description', default=None)
+
duration = parse_duration(self._search_regex(
r'Duration: ((?:[0-9]{2}:){0,2}[0-9]{2})',
webpage, 'duration', fatal=False))
@@ -52,28 +72,75 @@ class DaftsexIE(InfoExtractor):
video_id, transform_source=js_to_json)
server_domain = 'https://%s' % compat_b64decode(video_params['server'][::-1]).decode('utf-8')
+
+ cdn_files = traverse_obj(video_params, ('video', 'cdn_files')) or {}
+ if cdn_files:
+ formats = []
+ for format_id, format_data in cdn_files.items():
+ ext, height = format_id.split('_')
+ formats.append({
+ 'format_id': format_id,
+ 'url': f'{server_domain}/videos/{video_id.replace("_", "/")}/{height}.mp4?extra={format_data.split(".")[-1]}',
+ 'height': int_or_none(height),
+ 'ext': ext,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': try_get(video_params, lambda vi: 'https:' + compat_b64decode(vi['video']['thumb']).decode('utf-8')),
+ 'timestamp': timestamp,
+ 'view_count': views,
+ 'age_limit': 18,
+ }
+
+ item = self._download_json(
+ f'{server_domain}/method/video.get/{video_id}', video_id,
+ headers={'Referer': url}, query={
+ 'token': video_params['video']['access_token'],
+ 'videos': video_id,
+ 'ckey': video_params['c_key'],
+ 'credentials': video_params['video']['credentials'],
+ })['response']['items'][0]
+
formats = []
- for format_id, format_data in video_params['video']['cdn_files'].items():
- ext, height = format_id.split('_')
- extra_quality_data = format_data.split('.')[-1]
- url = f'{server_domain}/videos/{video_id.replace("_", "/")}/{height}.mp4?extra={extra_quality_data}'
- formats.append({
- 'format_id': format_id,
- 'url': url,
- 'height': int_or_none(height),
- 'ext': ext,
- })
+ for f_id, f_url in item.get('files', {}).items():
+ if f_id == 'external':
+ return self.url_result(f_url)
+ ext, height = f_id.split('_')
+ height_extra_key = traverse_obj(video_params, ('video', 'partial', 'quality', height))
+ if height_extra_key:
+ formats.append({
+ 'format_id': f'{height}p',
+ 'url': f'{server_domain}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}',
+ 'height': int_or_none(height),
+ 'ext': ext,
+ })
self._sort_formats(formats)
- thumbnail = try_get(video_params,
- lambda vi: 'https:' + compat_b64decode(vi['video']['thumb']).decode('utf-8'))
+ thumbnails = []
+ for k, v in item.items():
+ if k.startswith('photo_') and v:
+ width = k.replace('photo_', '')
+ thumbnails.append({
+ 'id': width,
+ 'url': v,
+ 'width': int_or_none(width),
+ })
return {
'id': video_id,
'title': title,
'formats': formats,
+ 'comment_count': int_or_none(item.get('comments')),
+ 'description': description,
'duration': duration,
- 'thumbnail': thumbnail,
+ 'thumbnails': thumbnails,
+ 'timestamp': timestamp,
'view_count': views,
'age_limit': 18,
}
diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py
index 9b302a9a0..8398ae30e 100644
--- a/yt_dlp/extractor/digitalconcerthall.py
+++ b/yt_dlp/extractor/digitalconcerthall.py
@@ -45,10 +45,7 @@ class DigitalConcertHallIE(InfoExtractor):
'playlist_count': 3,
}]
- def _login(self):
- username, password = self._get_login_info()
- if not username:
- self.raise_login_required()
+ def _perform_login(self, username, password):
token_response = self._download_json(
self._OAUTH_URL,
None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
@@ -78,7 +75,8 @@ class DigitalConcertHallIE(InfoExtractor):
self.raise_login_required(msg='Login info incorrect')
def _real_initialize(self):
- self._login()
+ if not self._ACCESS_TOKEN:
+ self.raise_login_required(method='password')
def _entries(self, items, language, **kwargs):
for item in items:
diff --git a/yt_dlp/extractor/eroprofile.py b/yt_dlp/extractor/eroprofile.py
index a8396f1d3..5d5e7f244 100644
--- a/yt_dlp/extractor/eroprofile.py
+++ b/yt_dlp/extractor/eroprofile.py
@@ -39,11 +39,7 @@ class EroProfileIE(InfoExtractor):
'skip': 'Requires login',
}]
- def _login(self):
- (username, password) = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
query = compat_urllib_parse_urlencode({
'username': username,
'password': password,
@@ -62,9 +58,6 @@ class EroProfileIE(InfoExtractor):
r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
self._download_webpage(redirect_url, None, False)
- def _real_initialize(self):
- self._login()
-
def _real_extract(self, url):
display_id = self._match_id(url)
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index 09b795c56..4eda27cdc 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -622,6 +622,7 @@ from .hse import (
HSEProductIE,
)
from .huajiao import HuajiaoIE
+from .huya import HuyaLiveIE
from .huffpost import HuffPostIE
from .hungama import (
HungamaIE,
diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
index ef57b221c..2deed585f 100644
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@@ -329,11 +329,7 @@ class FacebookIE(InfoExtractor):
urls.append(mobj.group('url'))
return urls
- def _login(self):
- useremail, password = self._get_login_info()
- if useremail is None:
- return
-
+ def _perform_login(self, username, password):
login_page_req = sanitized_Request(self._LOGIN_URL)
self._set_cookie('facebook.com', 'locale', 'en_US')
login_page = self._download_webpage(login_page_req, None,
@@ -345,7 +341,7 @@ class FacebookIE(InfoExtractor):
lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')
login_form = {
- 'email': useremail,
+ 'email': username,
'pass': password,
'lsd': lsd,
'lgnrnd': lgnrnd,
@@ -392,9 +388,6 @@ class FacebookIE(InfoExtractor):
self.report_warning('unable to log in: %s' % error_to_compat_str(err))
return
- def _real_initialize(self):
- self._login()
-
def _extract_from_url(self, url, video_id):
webpage = self._download_webpage(
url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
diff --git a/yt_dlp/extractor/fancode.py b/yt_dlp/extractor/fancode.py
index 978df31ff..7ea16c61d 100644
--- a/yt_dlp/extractor/fancode.py
+++ b/yt_dlp/extractor/fancode.py
@@ -49,30 +49,26 @@ class FancodeVodIE(InfoExtractor):
'referer': 'https://fancode.com',
}
- def _login(self):
+ def _perform_login(self, username, password):
# Access tokens are shortlived, so get them using the refresh token.
- username, password = self._get_login_info()
- if username == 'refresh' and password is not None:
- self.report_login()
- data = '''{
- "query":"mutation RefreshToken($refreshToken: String\\u0021) { refreshToken(refreshToken: $refreshToken) { accessToken }}",
- "variables":{
- "refreshToken":"%s"
- },
- "operationName":"RefreshToken"
- }''' % password
-
- token_json = self.download_gql('refresh token', data, "Getting the Access token")
- self._ACCESS_TOKEN = try_get(token_json, lambda x: x['data']['refreshToken']['accessToken'])
- if self._ACCESS_TOKEN is None:
- self.report_warning('Failed to get Access token')
- else:
- self.headers.update({'Authorization': 'Bearer %s' % self._ACCESS_TOKEN})
- elif username is not None:
+ if username != 'refresh':
self.report_warning(f'Login using username and password is not currently supported. {self._LOGIN_HINT}')
- def _real_initialize(self):
- self._login()
+ self.report_login()
+ data = '''{
+ "query":"mutation RefreshToken($refreshToken: String\\u0021) { refreshToken(refreshToken: $refreshToken) { accessToken }}",
+ "variables":{
+ "refreshToken":"%s"
+ },
+ "operationName":"RefreshToken"
+ }''' % password
+
+ token_json = self.download_gql('refresh token', data, "Getting the Access token")
+ self._ACCESS_TOKEN = try_get(token_json, lambda x: x['data']['refreshToken']['accessToken'])
+ if self._ACCESS_TOKEN is None:
+ self.report_warning('Failed to get Access token')
+ else:
+ self.headers.update({'Authorization': 'Bearer %s' % self._ACCESS_TOKEN})
def _check_login_required(self, is_available, is_premium):
msg = None
diff --git a/yt_dlp/extractor/franceculture.py b/yt_dlp/extractor/franceculture.py
index 14f4cb489..9dc28d801 100644
--- a/yt_dlp/extractor/franceculture.py
+++ b/yt_dlp/extractor/franceculture.py
@@ -1,18 +1,45 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
extract_attributes,
int_or_none,
+ traverse_obj,
+ unified_strdate,
)
class FranceCultureIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
- 'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
+ # playlist
+ 'url': 'https://www.franceculture.fr/emissions/serie/hasta-dente',
+ 'playlist_count': 12,
+ 'info_dict': {
+ 'id': 'hasta-dente',
+ 'title': 'Hasta Dente',
+ 'description': 'md5:57479af50648d14e9bb649e6b1f8f911',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20201024',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '3c1c2e55-41a0-11e5-9fe0-005056a87c89',
+ 'ext': 'mp3',
+ 'title': 'Jeudi, vous avez dit bizarre ?',
+ 'description': 'md5:47cf1e00cc21c86b0210279996a812c6',
+ 'duration': 604,
+ 'upload_date': '20201024',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1603576680
+ },
+ },
+ ],
+ }, {
+ 'url': 'https://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
'info_dict': {
'id': 'rendez-vous-au-pays-des-geeks',
'display_id': 'rendez-vous-au-pays-des-geeks',
@@ -20,9 +47,9 @@ class FranceCultureIE(InfoExtractor):
'title': 'Rendez-vous au pays des geeks',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140301',
- 'timestamp': 1393700400,
'vcodec': 'none',
- }
+ 'duration': 3569,
+ },
}, {
# no thumbnail
'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
@@ -31,9 +58,54 @@ class FranceCultureIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
-
webpage = self._download_webpage(url, display_id)
+ info = {
+ 'id': display_id,
+ 'title': self._html_search_regex(
+ r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
+ webpage, 'title', default=self._og_search_title(webpage)),
+ 'description': self._html_search_regex(
+ r'(?s)<div[^>]+class="excerpt"[^>]*>(.*?)</div>', webpage, 'description', default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'uploader': self._html_search_regex(
+ r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None),
+ 'upload_date': unified_strdate(self._html_search_regex(
+ r'(?s)class="teaser-text-date".*?(\d{2}/\d{2}/\d{4})', webpage, 'date', default=None)),
+ }
+
+ playlist_data = self._search_regex(
+ r'''(?sx)
+ <section[^>]+data-xiti-place="[^"]*?liste_episodes[^"?]*?"[^>]*>
+ (.*?)
+ </section>
+ ''',
+ webpage, 'playlist data', fatal=False, default=None)
+
+ if playlist_data:
+ entries = []
+ for item, item_description in re.findall(
+ r'(?s)(<button[^<]*class="[^"]*replay-button[^>]*>).*?<p[^>]*class="[^"]*teaser-text-chapo[^>]*>(.*?)</p>',
+ playlist_data):
+
+ item_attributes = extract_attributes(item)
+ entries.append({
+ 'id': item_attributes.get('data-emission-uuid'),
+ 'url': item_attributes.get('data-url'),
+ 'title': item_attributes.get('data-diffusion-title'),
+ 'duration': int_or_none(traverse_obj(item_attributes, 'data-duration-seconds', 'data-duration-seconds')),
+ 'description': item_description,
+ 'timestamp': int_or_none(item_attributes.get('data-start-time')),
+ 'thumbnail': info['thumbnail'],
+ 'uploader': info['uploader'],
+ })
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ **info
+ }
+
video_data = extract_attributes(self._search_regex(
r'''(?sx)
(?:
@@ -43,31 +115,14 @@ class FranceCultureIE(InfoExtractor):
(<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
''',
webpage, 'video data'))
-
- video_url = video_data.get('data-url') or video_data['data-asset-source']
- title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
-
- description = self._html_search_regex(
- r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
- webpage, 'description', default=None)
- thumbnail = self._search_regex(
- r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
- webpage, 'thumbnail', default=None)
- uploader = self._html_search_regex(
- r'(?s)<span class="author">(.*?)</span>',
- webpage, 'uploader', default=None)
+ video_url = traverse_obj(video_data, 'data-url', 'data-asset-source')
ext = determine_ext(video_url.lower())
return {
- 'id': display_id,
'display_id': display_id,
'url': video_url,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
'ext': ext,
'vcodec': 'none' if ext == 'mp3' else None,
- 'uploader': uploader,
- 'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
'duration': int_or_none(video_data.get('data-duration')),
+ **info
}
diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py
index 0d29da29b..fc67a8437 100644
--- a/yt_dlp/extractor/frontendmasters.py
+++ b/yt_dlp/extractor/frontendmasters.py
@@ -28,14 +28,7 @@ class FrontendMastersBaseIE(InfoExtractor):
'high': {'width': 1920, 'height': 1080}
}
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- (username, password) = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py
index 96dad2ca3..36a9c4772 100644
--- a/yt_dlp/extractor/funimation.py
+++ b/yt_dlp/extractor/funimation.py
@@ -36,9 +36,8 @@ class FunimationBaseIE(InfoExtractor):
note='Checking geo-location', errnote='Unable to fetch geo-location information'),
'region') or 'US'
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
+ def _perform_login(self, username, password):
+ if self._TOKEN:
return
try:
data = self._download_json(
@@ -47,7 +46,7 @@ class FunimationBaseIE(InfoExtractor):
'username': username,
'password': password,
}))
- return data['token']
+ FunimationBaseIE._TOKEN = data['token']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
error = self._parse_json(e.cause.read().decode(), None)['error']
@@ -90,8 +89,6 @@ class FunimationPageIE(FunimationBaseIE):
def _real_initialize(self):
if not self._REGION:
FunimationBaseIE._REGION = self._get_region()
- if not self._TOKEN:
- FunimationBaseIE._TOKEN = self._login()
def _real_extract(self, url):
locale, show, episode = self._match_valid_url(url).group('lang', 'show', 'episode')
@@ -154,10 +151,6 @@ class FunimationIE(FunimationBaseIE):
},
}]
- def _real_initialize(self):
- if not self._TOKEN:
- FunimationBaseIE._TOKEN = self._login()
-
@staticmethod
def _get_experiences(episode):
for lang, lang_data in episode.get('languages', {}).items():
diff --git a/yt_dlp/extractor/gaia.py b/yt_dlp/extractor/gaia.py
index 7821fb783..5b0195c63 100644
--- a/yt_dlp/extractor/gaia.py
+++ b/yt_dlp/extractor/gaia.py
@@ -56,24 +56,22 @@ class GaiaIE(InfoExtractor):
def _real_initialize(self):
auth = self._get_cookies('https://www.gaia.com/').get('auth')
if auth:
- auth = self._parse_json(
- compat_urllib_parse_unquote(auth.value),
- None, fatal=False)
- if not auth:
- username, password = self._get_login_info()
- if username is None:
- return
- auth = self._download_json(
- 'https://auth.gaia.com/v1/login',
- None, data=urlencode_postdata({
- 'username': username,
- 'password': password
- }))
- if auth.get('success') is False:
- raise ExtractorError(', '.join(auth['messages']), expected=True)
- if auth:
+ auth = self._parse_json(compat_urllib_parse_unquote(auth.value), None, fatal=False)
self._jwt = auth.get('jwt')
+ def _perform_login(self, username, password):
+ if self._jwt:
+ return
+ auth = self._download_json(
+ 'https://auth.gaia.com/v1/login',
+ None, data=urlencode_postdata({
+ 'username': username,
+ 'password': password
+ }))
+ if auth.get('success') is False:
+ raise ExtractorError(', '.join(auth['messages']), expected=True)
+ self._jwt = auth.get('jwt')
+
def _real_extract(self, url):
display_id, vtype = self._match_valid_url(url).groups()
node_id = self._download_json(
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 6a8b8543b..97e34808f 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -153,6 +153,7 @@ class GenericIE(InfoExtractor):
IE_DESC = 'Generic downloader that works on some sites'
_VALID_URL = r'.*'
IE_NAME = 'generic'
+ _NETRC_MACHINE = False # Supress username warning
_TESTS = [
# Direct link to a video
{
diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py
index 15bd444f9..46d7d62ab 100644
--- a/yt_dlp/extractor/hidive.py
+++ b/yt_dlp/extractor/hidive.py
@@ -35,18 +35,14 @@ class HiDiveIE(InfoExtractor):
'skip': 'Requires Authentication',
}]
- def _real_initialize(self):
- email, password = self._get_login_info()
- if email is None:
- return
-
+ def _perform_login(self, username, password):
webpage = self._download_webpage(self._LOGIN_URL, None)
form = self._search_regex(
r'(?s)<form[^>]+action="/account/login"[^>]*>(.+?)</form>',
webpage, 'login form')
data = self._hidden_inputs(form)
data.update({
- 'Email': email,
+ 'Email': username,
'Password': password,
})
self._download_webpage(
diff --git a/yt_dlp/extractor/hrti.py b/yt_dlp/extractor/hrti.py
index dc5b9670c..36d600773 100644
--- a/yt_dlp/extractor/hrti.py
+++ b/yt_dlp/extractor/hrti.py
@@ -27,8 +27,9 @@ class HRTiBaseIE(InfoExtractor):
_APP_VERSION = '1.1'
_APP_PUBLICATION_ID = 'all_in_one'
_API_URL = 'http://clientapi.hrt.hr/client_api.php/config/identify/format/json'
+ _token = None
- def _initialize_api(self):
+ def _initialize_pre_login(self):
init_data = {
'application_publication_id': self._APP_PUBLICATION_ID
}
@@ -64,12 +65,7 @@ class HRTiBaseIE(InfoExtractor):
self._logout_url = modules['user']['resources']['logout']['uri']
- def _login(self):
- username, password = self._get_login_info()
- # TODO: figure out authentication with cookies
- if username is None or password is None:
- self.raise_login_required()
-
+ def _perform_login(self, username, password):
auth_data = {
'username': username,
'password': password,
@@ -94,8 +90,9 @@ class HRTiBaseIE(InfoExtractor):
self._token = auth_info['secure_streaming_token']
def _real_initialize(self):
- self._initialize_api()
- self._login()
+ if not self._token:
+ # TODO: figure out authentication with cookies
+ self.raise_login_required(method='password')
class HRTiIE(HRTiBaseIE):
diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py
new file mode 100644
index 000000000..b81439682
--- /dev/null
+++ b/yt_dlp/extractor/huya.py
@@ -0,0 +1,138 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import random
+
+from ..compat import compat_urlparse, compat_b64decode
+
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ str_or_none,
+ try_get,
+ unescapeHTML,
+ update_url_query,
+)
+
+from .common import InfoExtractor
+
+
+class HuyaLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'
+ IE_NAME = 'huya:live'
+ IE_DESC = 'huya.com'
+ TESTS = [{
+ 'url': 'https://www.huya.com/572329',
+ 'info_dict': {
+ 'id': '572329',
+ 'title': str,
+ 'description': str,
+ 'is_live': True,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.huya.com/xiaoyugame',
+ 'only_matching': True
+ }]
+
+ _RESOLUTION = {
+ '蓝光4M': {
+ 'width': 1920,
+ 'height': 1080,
+ },
+ '超清': {
+ 'width': 1280,
+ 'height': 720,
+ },
+ '流畅': {
+ 'width': 800,
+ 'height': 480
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id=video_id)
+ json_stream = self._search_regex(r'"stream":\s+"([a-zA-Z0-9+=/]+)"', webpage, 'stream', default=None)
+ if not json_stream:
+ raise ExtractorError('Video is offline', expected=True)
+ stream_data = self._parse_json(compat_b64decode(json_stream).decode(), video_id=video_id,
+ transform_source=js_to_json)
+ room_info = try_get(stream_data, lambda x: x['data'][0]['gameLiveInfo'])
+ if not room_info:
+ raise ExtractorError('Can not extract the room info', expected=True)
+ title = room_info.get('roomName') or room_info.get('introduction') or self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title')
+ screen_type = room_info.get('screenType')
+ live_source_type = room_info.get('liveSourceType')
+ stream_info_list = stream_data['data'][0]['gameStreamInfoList']
+ formats = []
+ for stream_info in stream_info_list:
+ stream_url = stream_info.get('sFlvUrl')
+ if not stream_url:
+ continue
+ stream_name = stream_info.get('sStreamName')
+ re_secret = not screen_type and live_source_type in (0, 8, 13)
+ params = dict(compat_urlparse.parse_qsl(unescapeHTML(stream_info['sFlvAntiCode'])))
+ fm, ss = '', ''
+ if re_secret:
+ fm, ss = self.encrypt(params, stream_info, stream_name)
+ for si in stream_data.get('vMultiStreamInfo'):
+ rate = si.get('iBitRate')
+ if rate:
+ params['ratio'] = rate
+ else:
+ params.pop('ratio', None)
+ if re_secret:
+ params['wsSecret'] = hashlib.md5(
+ '_'.join([fm, params['u'], stream_name, ss, params['wsTime']]))
+ formats.append({
+ 'ext': stream_info.get('sFlvUrlSuffix'),
+ 'format_id': str_or_none(stream_info.get('iLineIndex')),
+ 'tbr': rate,
+ 'url': update_url_query(f'{stream_url}/{stream_name}.{stream_info.get("sFlvUrlSuffix")}',
+ query=params),
+ **self._RESOLUTION.get(si.get('sDisplayName'), {}),
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'view_count': room_info.get('totalCount'),
+ 'thumbnail': room_info.get('screenshot'),
+ 'description': room_info.get('contentIntro'),
+ 'http_headers': {
+ 'Origin': 'https://www.huya.com',
+ 'Referer': 'https://www.huya.com/',
+ },
+ }
+
+ def encrypt(self, params, stream_info, stream_name):
+ ct = int_or_none(params.get('wsTime'), 16) + random.random()
+ presenter_uid = stream_info['lPresenterUid']
+ if not stream_name.startswith(str(presenter_uid)):
+ uid = presenter_uid
+ else:
+ uid = int_or_none(ct % 1e7 * 1e6 % 0xffffffff)
+ u1 = uid & 0xffffffff00000000
+ u2 = uid & 0xffffffff
+ u3 = uid & 0xffffff
+ u = u1 | u2 >> 24 | u3 << 8
+ params.update({
+ 'u': str_or_none(u),
+ 'seqid': str_or_none(int_or_none(ct * 1000) + uid),
+ 'ver': '1',
+ 'uuid': int_or_none(ct % 1e7 * 1e6 % 0xffffffff),
+ 't': '100',
+ })
+ fm = compat_b64decode(params['fm']).decode().split('_', 1)[0]
+ ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']]))
+ return fm, ss
diff --git a/yt_dlp/extractor/imggaming.py b/yt_dlp/extractor/imggaming.py
index 230dc86d3..ce7b21ab2 100644
--- a/yt_dlp/extractor/imggaming.py
+++ b/yt_dlp/extractor/imggaming.py
@@ -21,25 +21,26 @@ class ImgGamingBaseIE(InfoExtractor):
_REALM = None
_VALID_URL_TEMPL = r'https?://(?P<domain>%s)/(?P<type>live|playlist|video)/(?P<id>\d+)(?:\?.*?\bplaylistId=(?P<playlist_id>\d+))?'
- def _real_initialize(self):
+ def _initialize_pre_login(self):
self._HEADERS = {
'Realm': 'dce.' + self._REALM,
'x-api-key': self._API_KEY,
}
- email, password = self._get_login_info()
- if email is None:
- self.raise_login_required()
-
+ def _perform_login(self, username, password):
p_headers = self._HEADERS.copy()
p_headers['Content-Type'] = 'application/json'
self._HEADERS['Authorization'] = 'Bearer ' + self._download_json(
self._API_BASE + 'login',
None, 'Logging in', data=json.dumps({
- 'id': email,
+ 'id': username,
'secret': password,
}).encode(), headers=p_headers)['authorisationToken']
+ def _real_initialize(self):
+ if not self._HEADERS.get('Authorization'):
+ self.raise_login_required(method='password')
+
def _call_api(self, path, media_id):
return self._download_json(
self._API_BASE + path + media_id, media_id, headers=self._HEADERS)
diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py
index 3bb786d6a..970f2c8ab 100644
--- a/yt_dlp/extractor/instagram.py
+++ b/yt_dlp/extractor/instagram.py
@@ -29,9 +29,8 @@ class InstagramBaseIE(InfoExtractor):
_NETRC_MACHINE = 'instagram'
_IS_LOGGED_IN = False
- def _login(self):
- username, password = self._get_login_info()
- if username is None or self._IS_LOGGED_IN:
+ def _perform_login(self, username, password):
+ if self._IS_LOGGED_IN:
return
login_webpage = self._download_webpage(
@@ -72,9 +71,6 @@ class InstagramBaseIE(InfoExtractor):
raise ExtractorError('Unable to login')
InstagramBaseIE._IS_LOGGED_IN = True
- def _real_initialize(self):
- self._login()
-
def _get_count(self, media, kind, *keys):
return traverse_obj(
media, (kind, 'count'), *((f'edge_media_{key}', 'count') for key in keys),
diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py
index 347fec1d5..1a2038453 100644
--- a/yt_dlp/extractor/iprima.py
+++ b/yt_dlp/extractor/iprima.py
@@ -65,11 +65,9 @@ class IPrimaIE(InfoExtractor):
'only_matching': True,
}]
- def _login(self):
- username, password = self._get_login_info()
-
- if username is None or password is None:
- self.raise_login_required('Login is required to access any iPrima content', method='password')
+ def _perform_login(self, username, password):
+ if self.access_token:
+ return
login_page = self._download_webpage(
self._LOGIN_URL, None, note='Downloading login page',
@@ -105,16 +103,16 @@ class IPrimaIE(InfoExtractor):
if self.access_token is None:
raise ExtractorError('Getting token failed', expected=True)
+ def _real_initialize(self):
+ if not self.access_token:
+ self.raise_login_required('Login is required to access any iPrima content', method='password')
+
def _raise_access_error(self, error_code):
if error_code == 'PLAY_GEOIP_DENIED':
self.raise_geo_restricted(countries=['CZ'], metadata_available=True)
elif error_code is not None:
self.raise_no_formats('Access to stream infos forbidden', expected=True)
- def _real_initialize(self):
- if not self.access_token:
- self._login()
-
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py
index 9d2228700..0ee1eeb4d 100644
--- a/yt_dlp/extractor/lecturio.py
+++ b/yt_dlp/extractor/lecturio.py
@@ -22,14 +22,7 @@ class LecturioBaseIE(InfoExtractor):
_LOGIN_URL = 'https://app.lecturio.com/en/login'
_NETRC_MACHINE = 'lecturio'
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
# Sets some cookies
_, urlh = self._download_webpage_handle(
self._LOGIN_URL, None, 'Downloading login popup')
diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py
index bd76ae166..bf549e164 100644
--- a/yt_dlp/extractor/linkedin.py
+++ b/yt_dlp/extractor/linkedin.py
@@ -25,12 +25,9 @@ class LinkedInBaseIE(InfoExtractor):
_NETRC_MACHINE = 'linkedin'
_logged_in = False
- def _real_initialize(self):
+ def _perform_login(self, username, password):
if self._logged_in:
return
- email, password = self._get_login_info()
- if email is None:
- return
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
@@ -39,7 +36,7 @@ class LinkedInBaseIE(InfoExtractor):
default='https://www.linkedin.com/uas/login-submit', group='url'))
data = self._hidden_inputs(login_page)
data.update({
- 'session_key': email,
+ 'session_key': username,
'session_password': password,
})
login_submit_page = self._download_webpage(
diff --git a/yt_dlp/extractor/linuxacademy.py b/yt_dlp/extractor/linuxacademy.py
index 2053970d1..6aff88e13 100644
--- a/yt_dlp/extractor/linuxacademy.py
+++ b/yt_dlp/extractor/linuxacademy.py
@@ -75,14 +75,7 @@ class LinuxAcademyIE(InfoExtractor):
_CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx'
_NETRC_MACHINE = 'linuxacademy'
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
def random_string():
return ''.join([
random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~')
diff --git a/yt_dlp/extractor/lynda.py b/yt_dlp/extractor/lynda.py
index 58cf17239..ce304743f 100644
--- a/yt_dlp/extractor/lynda.py
+++ b/yt_dlp/extractor/lynda.py
@@ -21,9 +21,6 @@ class LyndaBaseIE(InfoExtractor):
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
_NETRC_MACHINE = 'lynda'
- def _real_initialize(self):
- self._login()
-
@staticmethod
def _check_error(json_string, key_or_keys):
keys = [key_or_keys] if isinstance(key_or_keys, compat_str) else key_or_keys
@@ -32,7 +29,7 @@ class LyndaBaseIE(InfoExtractor):
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
- def _login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url):
+ def _perform_login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url):
action_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_html,
'post url', default=fallback_action_url, group='url')
@@ -55,11 +52,7 @@ class LyndaBaseIE(InfoExtractor):
return response, action_url
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
# Step 1: download signin page
signin_page = self._download_webpage(
self._SIGNIN_URL, None, 'Downloading signin page')
diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py
index d235805c3..b77ef5f28 100644
--- a/yt_dlp/extractor/nebula.py
+++ b/yt_dlp/extractor/nebula.py
@@ -148,14 +148,12 @@ class NebulaBaseIE(InfoExtractor):
'creator': episode['channel_title'],
}
- def _login(self):
+ def _perform_login(self, username=None, password=None):
+ # FIXME: username should be passed from here to inner functions
self._nebula_api_token = self._retrieve_nebula_api_token()
self._nebula_bearer_token = self._fetch_nebula_bearer_token()
self._zype_access_token = self._fetch_zype_access_token()
- def _real_initialize(self):
- self._login()
-
class NebulaIE(NebulaBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P<id>[-\w]+)'
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index 8f56fc95b..74828f833 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -2,36 +2,36 @@
from __future__ import unicode_literals
import datetime
-import itertools
import functools
+import itertools
import json
import re
+import time
from .common import InfoExtractor, SearchInfoExtractor
-from ..postprocessor.ffmpeg import FFmpegPostProcessor
from ..compat import (
- compat_str,
compat_parse_qs,
compat_urllib_parse_urlparse,
compat_HTTPError,
)
from ..utils import (
ExtractorError,
- dict_get,
+ OnDemandPagedList,
+ bug_reports_message,
+ clean_html,
float_or_none,
int_or_none,
- OnDemandPagedList,
+ join_nonempty,
parse_duration,
+ parse_filesize,
parse_iso8601,
- PostProcessingError,
remove_start,
- str_or_none,
traverse_obj,
try_get,
unescapeHTML,
- unified_timestamp,
+ update_url_query,
+ url_or_none,
urlencode_postdata,
- xpath_text,
)
@@ -41,7 +41,7 @@ class NiconicoIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.nicovideo.jp/watch/sm22312215',
- 'md5': 'a5bad06f1347452102953f323c69da34s',
+ 'md5': 'd1a75c0823e2f629128c43e1212760f9',
'info_dict': {
'id': 'sm22312215',
'ext': 'mp4',
@@ -164,35 +164,42 @@ class NiconicoIE(InfoExtractor):
}, {
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
'only_matching': True,
+ }, {
+ 'note': 'a video that is only served as an ENCRYPTED HLS.',
+ 'url': 'https://www.nicovideo.jp/watch/so38016254',
+ 'only_matching': True,
}]
- _VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
+ _VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico'
-
+ _COMMENT_API_ENDPOINTS = (
+ 'https://nvcomment.nicovideo.jp/legacy/api.json',
+ 'https://nmsg.nicovideo.jp/api.json',)
_API_HEADERS = {
'X-Frontend-ID': '6',
- 'X-Frontend-Version': '0'
+ 'X-Frontend-Version': '0',
+ 'X-Niconico-Language': 'en-us',
+ 'Referer': 'https://www.nicovideo.jp/',
+ 'Origin': 'https://www.nicovideo.jp',
}
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- # No authentication to be performed
- if not username:
- return True
-
- # Log in
+ def _perform_login(self, username, password):
login_ok = True
login_form_strs = {
'mail_tel': username,
'password': password,
}
+ self._request_webpage(
+ 'https://account.nicovideo.jp/login', None,
+ note='Acquiring Login session')
urlh = self._request_webpage(
- 'https://account.nicovideo.jp/api/v1/login', None,
+ 'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None,
note='Logging in', errnote='Unable to log in',
- data=urlencode_postdata(login_form_strs))
+ data=urlencode_postdata(login_form_strs),
+ headers={
+ 'Referer': 'https://account.nicovideo.jp/login',
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
if urlh is False:
login_ok = False
else:
@@ -204,8 +211,8 @@ class NiconicoIE(InfoExtractor):
return login_ok
def _get_heartbeat_info(self, info_dict):
-
video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
+ dmc_protocol = info_dict['_expected_protocol']
api_data = (
info_dict.get('_api_data')
@@ -220,49 +227,50 @@ class NiconicoIE(InfoExtractor):
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
def ping():
- status = try_get(
- self._download_json(
- 'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
- query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])},
- note='Acquiring permission for downloading video',
- headers=self._API_HEADERS),
- lambda x: x['meta']['status'])
- if status != 200:
- self.report_warning('Failed to acquire permission for playing video. The video may not download.')
+ tracking_id = traverse_obj(api_data, ('media', 'delivery', 'trackingId'))
+ if tracking_id:
+ tracking_url = update_url_query('https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', {'t': tracking_id})
+ watch_request_response = self._download_json(
+ tracking_url, video_id,
+ note='Acquiring permission for downloading video', fatal=False,
+ headers=self._API_HEADERS)
+ if traverse_obj(watch_request_response, ('meta', 'status')) != 200:
+ self.report_warning('Failed to acquire permission for playing video. Video download may fail.')
yesno = lambda x: 'yes' if x else 'no'
- # m3u8 (encryption)
- if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None:
+ if dmc_protocol == 'http':
+ protocol = 'http'
+ protocol_parameters = {
+ 'http_output_download_parameters': {
+ 'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
+ 'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
+ }
+ }
+ elif dmc_protocol == 'hls':
protocol = 'm3u8'
- encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption']
- session_api_http_parameters = {
- 'parameters': {
- 'hls_parameters': {
- 'encryption': {
- encryption: {
- 'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']),
- 'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri'])
- }
- },
- 'transfer_preset': '',
- 'use_ssl': yesno(session_api_endpoint['isSsl']),
- 'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
- 'segment_duration': 6000,
- }
+ segment_duration = try_get(self._configuration_arg('segment_duration'), lambda x: int(x[0])) or 6000
+ parsed_token = self._parse_json(session_api_data['token'], video_id)
+ encryption = traverse_obj(api_data, ('media', 'delivery', 'encryption'))
+ protocol_parameters = {
+ 'hls_parameters': {
+ 'segment_duration': segment_duration,
+ 'transfer_preset': '',
+ 'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
+ 'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
}
}
- # http
- else:
- protocol = 'http'
- session_api_http_parameters = {
- 'parameters': {
- 'http_output_download_parameters': {
- 'use_ssl': yesno(session_api_endpoint['isSsl']),
- 'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
+ if 'hls_encryption' in parsed_token and encryption:
+ protocol_parameters['hls_parameters']['encryption'] = {
+ parsed_token['hls_encryption']: {
+ 'encrypted_key': encryption['encryptedKey'],
+ 'key_uri': encryption['keyUri'],
}
}
- }
+ else:
+ protocol = 'm3u8_native'
+ else:
+ raise ExtractorError(f'Unsupported DMC protocol: {dmc_protocol}')
session_response = self._download_json(
session_api_endpoint['url'], video_id,
@@ -296,11 +304,13 @@ class NiconicoIE(InfoExtractor):
'lifetime': session_api_data.get('heartbeatLifetime')
}
},
- 'priority': session_api_data.get('priority'),
+ 'priority': session_api_data['priority'],
'protocol': {
'name': 'http',
'parameters': {
- 'http_parameters': session_api_http_parameters
+ 'http_parameters': {
+ 'parameters': protocol_parameters
+ }
}
},
'recipe_id': session_api_data.get('recipeId'),
@@ -328,36 +338,35 @@ class NiconicoIE(InfoExtractor):
return info_dict, heartbeat_info_dict
- def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
- def parse_format_id(id_code):
- mobj = re.match(r'''(?x)
- (?:archive_)?
- (?:(?P<codec>[^_]+)_)?
- (?:(?P<br>[\d]+)kbps_)?
- (?:(?P<res>[\d+]+)p_)?
- ''', '%s_' % id_code)
- return mobj.groupdict() if mobj else {}
-
- protocol = 'niconico_dmc'
- format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
- vdict = parse_format_id(video_quality['id'])
- adict = parse_format_id(audio_quality['id'])
- resolution = try_get(video_quality, lambda x: x['metadata']['resolution'], dict) or {'height': vdict.get('res')}
- vbr = try_get(video_quality, lambda x: x['metadata']['bitrate'], float)
+ def _extract_format_for_quality(self, video_id, audio_quality, video_quality, dmc_protocol):
+
+ if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
+ return None
+
+ def extract_video_quality(video_quality):
+ return parse_filesize('%sB' % self._search_regex(
+ r'\| ([0-9]*\.?[0-9]*[MK])', video_quality, 'vbr', default=''))
+
+ format_id = '-'.join(
+ [remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol])
+
+ vid_qual_label = traverse_obj(video_quality, ('metadata', 'label'))
+ vid_quality = traverse_obj(video_quality, ('metadata', 'bitrate'))
return {
- 'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
+ 'url': 'niconico_dmc:%s/%s/%s' % (video_id, video_quality['id'], audio_quality['id']),
'format_id': format_id,
- 'format_note': 'DMC %s' % try_get(video_quality, lambda x: x['metadata']['label'], compat_str),
+ 'format_note': join_nonempty('DMC', vid_qual_label, dmc_protocol.upper(), delim=' '),
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
- 'vcodec': vdict.get('codec'),
- 'acodec': adict.get('codec'),
- 'vbr': float_or_none(vbr, 1000) or float_or_none(vdict.get('br')),
- 'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
- 'height': int_or_none(resolution.get('height', vdict.get('res'))),
- 'width': int_or_none(resolution.get('width')),
- 'quality': -2 if 'low' in format_id else -1, # Default quality value is -1
- 'protocol': protocol,
+ 'acodec': 'aac',
+ 'vcodec': 'h264',
+ 'abr': float_or_none(traverse_obj(audio_quality, ('metadata', 'bitrate')), 1000),
+ 'vbr': float_or_none(vid_quality if vid_quality > 0 else extract_video_quality(vid_qual_label), 1000),
+ 'height': traverse_obj(video_quality, ('metadata', 'resolution', 'height')),
+ 'width': traverse_obj(video_quality, ('metadata', 'resolution', 'width')),
+ 'quality': -2 if 'low' in video_quality['id'] else None,
+ 'protocol': 'niconico_dmc',
+ '_expected_protocol': dmc_protocol,
'http_headers': {
'Origin': 'https://www.nicovideo.jp',
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
@@ -367,248 +376,157 @@ class NiconicoIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- # Get video webpage for API data.
- webpage, handle = self._download_webpage_handle(
- 'http://www.nicovideo.jp/watch/' + video_id, video_id)
- if video_id.startswith('so'):
- video_id = self._match_id(handle.geturl())
-
- api_data = self._parse_json(self._html_search_regex(
- 'data-api-data="([^"]+)"', webpage,
- 'API data', default='{}'), video_id)
-
- def get_video_info_web(items):
- return dict_get(api_data['video'], items)
-
- # Get video info
- video_info_xml = self._download_xml(
- 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
- video_id, note='Downloading video info page')
-
- def get_video_info_xml(items):
- if not isinstance(items, list):
- items = [items]
- for item in items:
- ret = xpath_text(video_info_xml, './/' + item)
- if ret:
- return ret
-
- if get_video_info_xml('error'):
- error_code = get_video_info_xml('code')
-
- if error_code == 'DELETED':
- raise ExtractorError('The video has been deleted.',
- expected=True)
- elif error_code == 'NOT_FOUND':
- raise ExtractorError('The video is not found.',
- expected=True)
- elif error_code == 'COMMUNITY':
- self.to_screen('%s: The video is community members only.' % video_id)
- else:
- raise ExtractorError('%s reports error: %s' % (self.IE_NAME, error_code))
+ try:
+ webpage, handle = self._download_webpage_handle(
+ 'http://www.nicovideo.jp/watch/' + video_id, video_id)
+ if video_id.startswith('so'):
+ video_id = self._match_id(handle.geturl())
+
+ api_data = self._parse_json(self._html_search_regex(
+ 'data-api-data="([^"]+)"', webpage,
+ 'API data', default='{}'), video_id)
+ except ExtractorError as e:
+ try:
+ api_data = self._download_json(
+ 'https://www.nicovideo.jp/api/watch/v3/%s?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_%d' % (video_id, round(time.time() * 1000)), video_id,
+ note='Downloading API JSON', errnote='Unable to fetch data')['data']
+ except ExtractorError:
+ if not isinstance(e.cause, compat_HTTPError):
+ raise
+ webpage = e.cause.read().decode('utf-8', 'replace')
+ error_msg = self._html_search_regex(
+ r'(?s)<section\s+class="(?:(?:ErrorMessage|WatchExceptionPage-message)\s*)+">(.+?)</section>',
+ webpage, 'error reason', default=None)
+ if not error_msg:
+ raise
+ raise ExtractorError(re.sub(r'\s+', ' ', error_msg), expected=True)
- # Start extracting video formats
formats = []
- # Get HTML5 videos info
- quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
- if not quality_info:
- raise ExtractorError('The video can\'t be downloaded', expected=True)
-
- for audio_quality in quality_info.get('audios') or {}:
- for video_quality in quality_info.get('videos') or {}:
- if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
- continue
- formats.append(self._extract_format_for_quality(
- api_data, video_id, audio_quality, video_quality))
+ def get_video_info(*items, get_first=True, **kwargs):
+ return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs)
- # Get flv/swf info
- timestamp = None
- video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
- if video_real_url:
- is_economy = video_real_url.endswith('low')
-
- if is_economy:
- self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
-
- # Invoking ffprobe to determine resolution
- pp = FFmpegPostProcessor(self._downloader)
- cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n')
-
- self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe'))
-
- try:
- metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies])
- except PostProcessingError as err:
- raise ExtractorError(err.msg, expected=True)
-
- v_stream = a_stream = {}
-
- # Some complex swf files doesn't have video stream (e.g. nm4809023)
- for stream in metadata['streams']:
- if stream['codec_type'] == 'video':
- v_stream = stream
- elif stream['codec_type'] == 'audio':
- a_stream = stream
-
- # Community restricted videos seem to have issues with the thumb API not returning anything at all
- filesize = int(
- (get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low'))
- or metadata['format']['size']
- )
- extension = (
- get_video_info_xml('movie_type')
- or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name']
- )
-
- # 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
- timestamp = (
- parse_iso8601(get_video_info_web('first_retrieve'))
- or unified_timestamp(get_video_info_web('postedDateTime'))
- )
- metadata_timestamp = (
- parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time']))
- or timestamp if extension != 'mp4' else 0
- )
-
- # According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
- smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00')
-
- is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0
-
- # If movie file size is unstable, old server movie is not source movie.
- if filesize > 1:
- formats.append({
- 'url': video_real_url,
- 'format_id': 'smile' if not is_economy else 'smile_low',
- 'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality',
- 'ext': extension,
- 'container': extension,
- 'vcodec': v_stream.get('codec_name'),
- 'acodec': a_stream.get('codec_name'),
- # Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
- 'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000),
- 'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000),
- 'abr': int_or_none(a_stream.get('bit_rate'), scale=1000),
- 'height': int_or_none(v_stream.get('height')),
- 'width': int_or_none(v_stream.get('width')),
- 'source_preference': 5 if not is_economy else -2,
- 'quality': 5 if is_source and not is_economy else None,
- 'filesize': filesize
- })
+ quality_info = api_data['media']['delivery']['movie']
+ session_api_data = quality_info['session']
+ for (audio_quality, video_quality, protocol) in itertools.product(quality_info['audios'], quality_info['videos'], session_api_data['protocols']):
+ fmt = self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol)
+ if fmt:
+ formats.append(fmt)
self._sort_formats(formats)
# Start extracting information
- title = (
- get_video_info_xml('title') # prefer to get the untranslated original title
- or get_video_info_web(['originalTitle', 'title'])
- or self._og_search_title(webpage, default=None)
- or self._html_search_regex(
- r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
- webpage, 'video title'))
-
- watch_api_data_string = self._html_search_regex(
- r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
- webpage, 'watch api data', default=None)
- watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {}
- video_detail = watch_api_data.get('videoDetail', {})
-
- thumbnail = (
- self._html_search_regex(r'<meta property="og:image" content="([^"]+)">', webpage, 'thumbnail data', default=None)
- or dict_get( # choose highest from 720p to 240p
- get_video_info_web('thumbnail'),
- ['ogp', 'player', 'largeUrl', 'middleUrl', 'url'])
- or self._html_search_meta('image', webpage, 'thumbnail', default=None)
- or video_detail.get('thumbnail'))
-
- description = get_video_info_web('description')
-
- if not timestamp:
- match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
- if match:
- timestamp = parse_iso8601(match.replace('+', ':00+'))
- if not timestamp and video_detail.get('postedAt'):
- timestamp = parse_iso8601(
- video_detail['postedAt'].replace('/', '-'),
- delimiter=' ', timezone=datetime.timedelta(hours=9))
- timestamp = timestamp or try_get(api_data, lambda x: parse_iso8601(x['video']['registeredAt']))
-
- view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
- if not view_count:
- match = self._html_search_regex(
- r'>Views: <strong[^>]*>([^<]+)</strong>',
- webpage, 'view count', default=None)
- if match:
- view_count = int_or_none(match.replace(',', ''))
- view_count = (
- view_count
- or video_detail.get('viewCount')
- or try_get(api_data, lambda x: x['video']['count']['view']))
-
- comment_count = (
- int_or_none(get_video_info_web('comment_num'))
- or video_detail.get('commentCount')
- or try_get(api_data, lambda x: x['video']['count']['comment']))
-
- if not comment_count:
- match = self._html_search_regex(
- r'>Comments: <strong[^>]*>([^<]+)</strong>',
- webpage, 'comment count', default=None)
- if match:
- comment_count = int_or_none(match.replace(',', ''))
-
- duration = (parse_duration(
- get_video_info_web('length')
- or self._html_search_meta(
- 'video:duration', webpage, 'video duration', default=None))
- or video_detail.get('length')
- or get_video_info_web('duration'))
-
- webpage_url = get_video_info_web('watch_url') or url
-
- # for channel movie and community movie
- channel_id = try_get(
- api_data,
- (lambda x: x['channel']['globalId'],
- lambda x: x['community']['globalId']))
- channel = try_get(
- api_data,
- (lambda x: x['channel']['name'],
- lambda x: x['community']['name']))
-
- # Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
- # in the JSON, which will cause None to be returned instead of {}.
- owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
- uploader_id = str_or_none(
- get_video_info_web(['ch_id', 'user_id'])
- or owner.get('id')
- or channel_id
- )
- uploader = (
- get_video_info_web(['ch_name', 'user_nickname'])
- or owner.get('nickname')
- or channel
- )
+ tags = None
+ if webpage:
+ # use og:video:tag (not logged in)
+ og_video_tags = re.finditer(r'<meta\s+property="og:video:tag"\s*content="(.*?)">', webpage)
+ tags = list(filter(None, (clean_html(x.group(1)) for x in og_video_tags)))
+ if not tags:
+ # use keywords and split with comma (not logged in)
+ kwds = self._html_search_meta('keywords', webpage, default=None)
+ if kwds:
+ tags = [x for x in kwds.split(',') if x]
+ if not tags:
+ # find in json (logged in)
+ tags = traverse_obj(api_data, ('tag', 'items', ..., 'name'))
return {
'id': video_id,
'_api_data': api_data,
- 'title': title,
+ 'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None),
'formats': formats,
- 'thumbnail': thumbnail,
- 'description': description,
- 'uploader': uploader,
- 'timestamp': timestamp,
- 'uploader_id': uploader_id,
- 'channel': channel,
- 'channel_id': channel_id,
- 'view_count': view_count,
- 'comment_count': comment_count,
- 'duration': duration,
- 'webpage_url': webpage_url,
+ 'thumbnail': get_video_info('thumbnail', 'url') or self._html_search_meta(
+ ('image', 'og:image'), webpage, 'thumbnail', default=None),
+ 'description': clean_html(get_video_info('description')),
+ 'uploader': traverse_obj(api_data, ('owner', 'nickname')),
+ 'timestamp': parse_iso8601(get_video_info('registeredAt')) or parse_iso8601(
+ self._html_search_meta('video:release_date', webpage, 'date published', default=None)),
+ 'uploader_id': traverse_obj(api_data, ('owner', 'id')),
+ 'channel': traverse_obj(api_data, ('channel', 'name'), ('community', 'name')),
+ 'channel_id': traverse_obj(api_data, ('channel', 'id'), ('community', 'id')),
+ 'view_count': int_or_none(get_video_info('count', 'view')),
+ 'tags': tags,
+ 'genre': traverse_obj(api_data, ('genre', 'label'), ('genre', 'key')),
+ 'comment_count': get_video_info('count', 'comment', expected_type=int),
+ 'duration': (
+ parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None))
+ or get_video_info('duration')),
+ 'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}',
+ 'subtitles': self.extract_subtitles(video_id, api_data, session_api_data),
}
+ def _get_subtitles(self, video_id, api_data, session_api_data):
+ comment_user_key = traverse_obj(api_data, ('comment', 'keys', 'userKey'))
+ user_id_str = session_api_data.get('serviceUserId')
+
+ thread_ids = [x for x in traverse_obj(api_data, ('comment', 'threads')) or [] if x['isActive']]
+ raw_danmaku = self._extract_all_comments(video_id, thread_ids, user_id_str, comment_user_key)
+ if not raw_danmaku:
+ self.report_warning(f'Failed to get comments. {bug_reports_message()}')
+ return
+ return {
+ 'comments': [{
+ 'ext': 'json',
+ 'data': json.dumps(raw_danmaku),
+ }],
+ }
+
+ def _extract_all_comments(self, video_id, threads, user_id, user_key):
+ auth_data = {
+ 'user_id': user_id,
+ 'userkey': user_key,
+ } if user_id and user_key else {'user_id': ''}
+
+ # Request Start
+ post_data = [{'ping': {'content': 'rs:0'}}]
+ for i, thread in enumerate(threads):
+ thread_id = thread['id']
+ thread_fork = thread['fork']
+ # Post Start (2N)
+ post_data.append({'ping': {'content': f'ps:{i * 2}'}})
+ post_data.append({'thread': {
+ 'fork': thread_fork,
+ 'language': 0,
+ 'nicoru': 3,
+ 'scores': 1,
+ 'thread': thread_id,
+ 'version': '20090904',
+ 'with_global': 1,
+ **auth_data,
+ }})
+ # Post Final (2N)
+ post_data.append({'ping': {'content': f'pf:{i * 2}'}})
+
+ # Post Start (2N+1)
+ post_data.append({'ping': {'content': f'ps:{i * 2 + 1}'}})
+ post_data.append({'thread_leaves': {
+ # format is '<bottom of minute range>-<top of minute range>:<comments per minute>,<total last comments'
+ # unfortunately NND limits (deletes?) comment returns this way, so you're only able to grab the last 1000 per language
+ 'content': '0-999999:999999,999999,nicoru:999999',
+ 'fork': thread_fork,
+ 'language': 0,
+ 'nicoru': 3,
+ 'scores': 1,
+ 'thread': thread_id,
+ **auth_data,
+ }})
+ # Post Final (2N+1)
+ post_data.append({'ping': {'content': f'pf:{i * 2 + 1}'}})
+ # Request Final
+ post_data.append({'ping': {'content': 'rf:0'}})
+
+ for api_url in self._COMMENT_API_ENDPOINTS:
+ comments = self._download_json(
+ api_url, video_id, data=json.dumps(post_data).encode(), fatal=False,
+ headers={
+ 'Referer': 'https://www.nicovideo.jp/watch/%s' % video_id,
+ 'Origin': 'https://www.nicovideo.jp',
+ 'Content-Type': 'text/plain;charset=UTF-8',
+ },
+ note='Downloading comments', errnote=f'Failed to access endpoint {api_url}')
+ if comments:
+ return comments
+
class NiconicoPlaylistBaseIE(InfoExtractor):
_PAGE_SIZE = 100
diff --git a/yt_dlp/extractor/nitter.py b/yt_dlp/extractor/nitter.py
index a0546cda0..8bb709cd7 100644
--- a/yt_dlp/extractor/nitter.py
+++ b/yt_dlp/extractor/nitter.py
@@ -5,7 +5,6 @@ from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
parse_count,
- unified_strdate,
unified_timestamp,
remove_end,
determine_ext,
@@ -25,6 +24,16 @@ class NitterIE(InfoExtractor):
'nitter.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd.onion',
'i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad.onion',
'26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid.onion',
+ 'vfaomgh4jxphpbdfizkm5gbtjahmei234giqj4facbwhrfjtcldauqad.onion',
+ 'iwgu3cv7ywf3gssed5iqtavmrlszgsxazkmwwnt4h2kdait75thdyrqd.onion',
+ 'erpnncl5nhyji3c32dcfmztujtl3xaddqb457jsbkulq24zqq7ifdgad.onion',
+ 'ckzuw5misyahmg7j5t5xwwuj3bwy62jfolxyux4brfflramzsvvd3syd.onion',
+ 'jebqj47jgxleaiosfcxfibx2xdahjettuydlxbg64azd4khsxv6kawid.onion',
+ 'nttr2iupbb6fazdpr2rgbooon2tzbbsvvkagkgkwohhodjzj43stxhad.onion',
+ 'nitraeju2mipeziu2wtcrqsxg7h62v5y4eqgwi75uprynkj74gevvuqd.onion',
+ 'nitter.lqs5fjmajyp7rvp4qvyubwofzi6d4imua7vs237rkc4m5qogitqwrgyd.onion',
+ 'ibsboeui2im5o7dxnik3s5yghufumgy5abevtij5nbizequfpu4qi4ad.onion',
+ 'ec5nvbycpfa5k6ro77blxgkyrzbkv7uy6r5cngcbkadtjj2733nm3uyd.onion',
'nitter.i2p',
'u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa.b32.i2p',
@@ -36,28 +45,55 @@ class NitterIE(InfoExtractor):
'nitter.42l.fr',
'nitter.pussthecat.org',
'nitter.nixnet.services',
- 'nitter.mastodont.cat',
- 'nitter.tedomum.net',
'nitter.fdn.fr',
'nitter.1d4.us',
'nitter.kavin.rocks',
- 'tweet.lambda.dance',
- 'nitter.cc',
- 'nitter.vxempire.xyz',
'nitter.unixfox.eu',
'nitter.domain.glass',
- 'nitter.himiko.cloud',
'nitter.eu',
'nitter.namazso.eu',
- 'nitter.mailstation.de',
'nitter.actionsack.com',
- 'nitter.cattube.org',
- 'nitter.dark.fail',
'birdsite.xanny.family',
- 'nitter.40two.app',
- 'nitter.skrep.in',
+ 'nitter.hu',
+ 'twitr.gq',
+ 'nitter.moomoo.me',
+ 'nittereu.moomoo.me',
+ 'bird.from.tf',
+ 'nitter.it',
+ 'twitter.censors.us',
+ 'twitter.grimneko.de',
+ 'nitter.alefvanoon.xyz',
+ 'n.hyperborea.cloud',
+ 'nitter.ca',
+ 'twitter.076.ne.jp',
+ 'twitter.mstdn.social',
+ 'nitter.fly.dev',
+ 'notabird.site',
+ 'nitter.weiler.rocks',
+ 'nitter.silkky.cloud',
+ 'nitter.sethforprivacy.com',
+ 'nttr.stream',
+ 'nitter.cutelab.space',
+ 'nitter.nl',
+ 'nitter.mint.lgbt',
+ 'nitter.bus-hit.me',
+ 'fuckthesacklers.network',
+ 'nitter.govt.land',
+ 'nitter.datatunnel.xyz',
+ 'nitter.esmailelbob.xyz',
+ 'tw.artemislena.eu',
+ 'de.nttr.stream',
+ 'nitter.winscloud.net',
+ 'nitter.tiekoetter.com',
+ 'nitter.spaceint.fr',
+ 'twtr.bch.bar',
+ 'nitter.exonip.de',
+ 'nitter.mastodon.pro',
+ 'nitter.notraxx.ch',
+
# not in the list anymore
+ 'nitter.skrep.in',
'nitter.snopyta.org',
)
@@ -68,96 +104,121 @@ class NitterIE(InfoExtractor):
# official, rate limited
'nitter.net',
# offline
+ 'is-nitter.resolv.ee',
+ 'lu-nitter.resolv.ee',
'nitter.13ad.de',
+ 'nitter.40two.app',
+ 'nitter.cattube.org',
+ 'nitter.cc',
+ 'nitter.dark.fail',
+ 'nitter.himiko.cloud',
+ 'nitter.koyu.space',
+ 'nitter.mailstation.de',
+ 'nitter.mastodont.cat',
+ 'nitter.tedomum.net',
+ 'nitter.tokhmi.xyz',
'nitter.weaponizedhumiliation.com',
+ 'nitter.vxempire.xyz',
+ 'tweet.lambda.dance',
)
INSTANCES = NON_HTTP_INSTANCES + HTTP_INSTANCES + DEAD_INSTANCES
- _INSTANCES_RE = '(?:' + '|'.join([re.escape(instance) for instance in INSTANCES]) + ')'
- _VALID_URL = r'https?://%(instance)s/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?' % {'instance': _INSTANCES_RE}
+ _INSTANCES_RE = f'(?:{"|".join(map(re.escape, INSTANCES))})'
+ _VALID_URL = fr'https?://{_INSTANCES_RE}/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?'
current_instance = random.choice(HTTP_INSTANCES)
_TESTS = [
{
# GIF (wrapped in mp4)
- 'url': 'https://%s/firefox/status/1314279897502629888#m' % current_instance,
+ 'url': f'https://{current_instance}/firefox/status/1314279897502629888#m',
'info_dict': {
'id': '1314279897502629888',
'ext': 'mp4',
- 'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n\nReport harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg\n\n#UnfckTheInternet',
- 'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n\nReport harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg\n\n#UnfckTheInternet',
+ 'title': 'md5:7890a9277da4639ab624dd899424c5d8',
+ 'description': 'md5:5fea96a4d3716c350f8b95b21b3111fe',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Firefox 🔥',
'uploader_id': 'firefox',
- 'uploader_url': 'https://%s/firefox' % current_instance,
+ 'uploader_url': f'https://{current_instance}/firefox',
'upload_date': '20201008',
'timestamp': 1602183720,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
},
}, { # normal video
- 'url': 'https://%s/Le___Doc/status/1299715685392756737#m' % current_instance,
+ 'url': f'https://{current_instance}/Le___Doc/status/1299715685392756737#m',
'info_dict': {
'id': '1299715685392756737',
'ext': 'mp4',
- 'title': 'Le Doc - "Je ne prédis jamais rien"\nD Raoult, Août 2020...',
+ 'title': 're:^.* - "Je ne prédis jamais rien"\nD Raoult, Août 2020...',
'description': '"Je ne prédis jamais rien"\nD Raoult, Août 2020...',
'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Le Doc',
+ 'uploader': 're:^Le *Doc',
'uploader_id': 'Le___Doc',
- 'uploader_url': 'https://%s/Le___Doc' % current_instance,
+ 'uploader_url': f'https://{current_instance}/Le___Doc',
'upload_date': '20200829',
- 'timestamp': 1598711341,
+ 'timestamp': 1598711340,
'view_count': int,
'like_count': int,
'repost_count': int,
'comment_count': int,
},
}, { # video embed in a "Streaming Political Ads" box
- 'url': 'https://%s/mozilla/status/1321147074491092994#m' % current_instance,
+ 'url': f'https://{current_instance}/mozilla/status/1321147074491092994#m',
'info_dict': {
'id': '1321147074491092994',
'ext': 'mp4',
- 'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows?\n\nThis isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. \n\nLearn more ➡️ https://mzl.la/StreamingAds",
- 'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows?\n\nThis isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. \n\nLearn more ➡️ https://mzl.la/StreamingAds",
+ 'title': 'md5:8290664aabb43b9189145c008386bf12',
+ 'description': 'md5:9cf2762d49674bc416a191a689fb2aaa',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Mozilla',
'uploader_id': 'mozilla',
- 'uploader_url': 'https://%s/mozilla' % current_instance,
+ 'uploader_url': f'https://{current_instance}/mozilla',
'upload_date': '20201027',
- 'timestamp': 1603820982
+ 'timestamp': 1603820940,
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
},
+ 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
}, { # not the first tweet but main-tweet
- 'url': 'https://%s/TheNaturalNu/status/1379050895539724290#m' % current_instance,
+ 'url': f'https://{current_instance}/firefox/status/1354848277481414657#m',
'info_dict': {
- 'id': '1379050895539724290',
+ 'id': '1354848277481414657',
'ext': 'mp4',
- 'title': 'Dorothy Zbornak - This had me hollering!!',
- 'description': 'This had me hollering!!',
+ 'title': 'md5:bef647f03bd1c6b15b687ea70dfc9700',
+ 'description': 'md5:5efba25e2f9dac85ebcd21160cb4341f',
'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Dorothy Zbornak',
- 'uploader_id': 'TheNaturalNu',
- 'uploader_url': 'https://%s/TheNaturalNu' % current_instance,
- 'timestamp': 1617626329,
- 'upload_date': '20210405'
+ 'uploader': 'Firefox 🔥',
+ 'uploader_id': 'firefox',
+ 'uploader_url': f'https://{current_instance}/firefox',
+ 'upload_date': '20210128',
+ 'timestamp': 1611855960,
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
}
}
]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
parsed_url = compat_urlparse.urlparse(url)
- base_url = '%s://%s' % (parsed_url.scheme, parsed_url.netloc)
+ base_url = f'{parsed_url.scheme}://{parsed_url.netloc}'
self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
- full_webpage = self._download_webpage(url, video_id)
+ full_webpage = webpage = self._download_webpage(url, video_id)
main_tweet_start = full_webpage.find('class="main-tweet"')
if main_tweet_start > 0:
webpage = full_webpage[main_tweet_start:]
- if not webpage:
- webpage = full_webpage
- video_url = '%s%s' % (base_url, self._html_search_regex(r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url'))
+ video_url = '%s%s' % (base_url, self._html_search_regex(
+ r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url'))
ext = determine_ext(video_url)
if ext == 'unknown_video':
@@ -168,61 +229,49 @@ class NitterIE(InfoExtractor):
'ext': ext
}]
- title = self._og_search_description(full_webpage)
- if not title:
- title = self._html_search_regex(r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title')
- description = title
+ title = description = self._og_search_description(full_webpage) or self._html_search_regex(
+ r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title', fatal=False)
- mobj = self._match_valid_url(url)
- uploader_id = (
- mobj.group('uploader_id')
- or self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
- )
+ uploader_id = self._html_search_regex(
+ r'<a class="username"[^>]+title="@([^"]+)"', webpage, 'uploader id', fatal=False) or uploader_id
- if uploader_id:
- uploader_url = '%s/%s' % (base_url, uploader_id)
+ uploader = self._html_search_regex(
+ r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
+ if uploader:
+ title = f'{uploader} - {title}'
- uploader = self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
+ counts = {
+ f'{x[0]}_count': self._html_search_regex(
+ fr'<span[^>]+class="icon-{x[1]}[^>]*></span>([^<]*)</div>',
+ webpage, f'{x[0]} count', fatal=False)
+ for x in (('view', 'play'), ('like', 'heart'), ('repost', 'retweet'), ('comment', 'comment'))
+ }
+ counts = {field: 0 if count == '' else parse_count(count) for field, count in counts.items()}
- if uploader:
- title = '%s - %s' % (uploader, title)
-
- view_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-play[^>]*></span>\s([^<]+)</div>', webpage, 'view count', fatal=False))
- like_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-heart[^>]*></span>\s([^<]+)</div>', webpage, 'like count', fatal=False))
- repost_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-retweet[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
- comment_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-comment[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
-
- thumbnail = self._html_search_meta('og:image', full_webpage, 'thumbnail url')
- if not thumbnail:
- thumbnail = '%s%s' % (base_url, self._html_search_regex(r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False))
- thumbnail = remove_end(thumbnail, '%3Asmall')
-
- thumbnails = []
- thumbnail_ids = ('thumb', 'small', 'large', 'medium', 'orig')
- for id in thumbnail_ids:
- thumbnails.append({
- 'id': id,
- 'url': thumbnail + '%3A' + id,
- })
-
- date = self._html_search_regex(r'<span[^>]+class="tweet-date"[^>]*><a[^>]+title="([^"]+)"', webpage, 'upload date', fatal=False)
- upload_date = unified_strdate(date)
- timestamp = unified_timestamp(date)
+ thumbnail = (
+ self._html_search_meta('og:image', full_webpage, 'thumbnail url')
+ or remove_end('%s%s' % (base_url, self._html_search_regex(
+ r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)), '%3Asmall'))
+
+ thumbnails = [
+ {'id': id, 'url': f'{thumbnail}%3A{id}'}
+ for id in ('thumb', 'small', 'large', 'medium', 'orig')
+ ]
+
+ date = self._html_search_regex(
+ r'<span[^>]+class="tweet-date"[^>]*><a[^>]+title="([^"]+)"',
+ webpage, 'upload date', default='').replace('·', '')
return {
'id': video_id,
'title': title,
'description': description,
'uploader': uploader,
- 'timestamp': timestamp,
+ 'timestamp': unified_timestamp(date),
'uploader_id': uploader_id,
- 'uploader_url': uploader_url,
- 'view_count': view_count,
- 'like_count': like_count,
- 'repost_count': repost_count,
- 'comment_count': comment_count,
+ 'uploader_url': f'{base_url}/{uploader_id}',
'formats': formats,
'thumbnails': thumbnails,
'thumbnail': thumbnail,
- 'upload_date': upload_date,
+ **counts,
}
diff --git a/yt_dlp/extractor/njpwworld.py b/yt_dlp/extractor/njpwworld.py
index 89380d039..68c8c8e52 100644
--- a/yt_dlp/extractor/njpwworld.py
+++ b/yt_dlp/extractor/njpwworld.py
@@ -43,15 +43,7 @@ class NJPWWorldIE(InfoExtractor):
_LOGIN_URL = 'https://front.njpwworld.com/auth/login'
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- # No authentication to be performed
- if not username:
- return True
-
+ def _perform_login(self, username, password):
# Setup session (will set necessary cookies)
self._request_webpage(
'https://njpwworld.com/', None, note='Setting up session')
diff --git a/yt_dlp/extractor/noco.py b/yt_dlp/extractor/noco.py
index 78c4952f4..28af909d5 100644
--- a/yt_dlp/extractor/noco.py
+++ b/yt_dlp/extractor/noco.py
@@ -61,14 +61,7 @@ class NocoIE(InfoExtractor):
}
]
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
login = self._download_json(
self._LOGIN_URL, None, 'Logging in',
data=urlencode_postdata({
diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py
index 36927009d..fe4740aae 100644
--- a/yt_dlp/extractor/openload.py
+++ b/yt_dlp/extractor/openload.py
@@ -207,7 +207,7 @@ class PhantomJSwrapper(object):
replaces = self.options
replaces['url'] = url
- user_agent = headers.get('User-Agent') or self.get_param('http_headers')['User-Agent']
+ user_agent = headers.get('User-Agent') or self.extractor.get_param('http_headers')['User-Agent']
replaces['ua'] = user_agent.replace('"', '\\"')
replaces['jscode'] = jscode
diff --git a/yt_dlp/extractor/packtpub.py b/yt_dlp/extractor/packtpub.py
index c06fca795..62c52cd6e 100644
--- a/yt_dlp/extractor/packtpub.py
+++ b/yt_dlp/extractor/packtpub.py
@@ -47,10 +47,7 @@ class PacktPubIE(PacktPubBaseIE):
_NETRC_MACHINE = 'packtpub'
_TOKEN = None
- def _real_initialize(self):
- username, password = self._get_login_info()
- if username is None:
- return
+ def _perform_login(self, username, password):
try:
self._TOKEN = self._download_json(
'https://services.packtpub.com/auth-v1/users/tokens', None,
diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py
index d458dfe50..3388f7f39 100644
--- a/yt_dlp/extractor/panopto.py
+++ b/yt_dlp/extractor/panopto.py
@@ -18,12 +18,39 @@ from ..utils import (
int_or_none,
OnDemandPagedList,
parse_qs,
+ srt_subtitles_timecode,
traverse_obj,
)
class PanoptoBaseIE(InfoExtractor):
- BASE_URL_RE = r'(?P<base_url>https?://[\w.]+\.panopto.(?:com|eu)/Panopto)'
+ BASE_URL_RE = r'(?P<base_url>https?://[\w.-]+\.panopto.(?:com|eu)/Panopto)'
+
+ # see panopto core.js
+ _SUB_LANG_MAPPING = {
+ 0: 'en-US',
+ 1: 'en-GB',
+ 2: 'es-MX',
+ 3: 'es-ES',
+ 4: 'de-DE',
+ 5: 'fr-FR',
+ 6: 'nl-NL',
+ 7: 'th-TH',
+ 8: 'zh-CN',
+ 9: 'zh-TW',
+ 10: 'ko-KR',
+ 11: 'ja-JP',
+ 12: 'ru-RU',
+ 13: 'pt-PT',
+ 14: 'pl-PL',
+ 15: 'en-AU',
+ 16: 'da-DK',
+ 17: 'fi-FI',
+ 18: 'hu-HU',
+ 19: 'nb-NO',
+ 20: 'sv-SE',
+ 21: 'it-IT'
+ }
def _call_api(self, base_url, path, video_id, data=None, fatal=True, **kwargs):
response = self._download_json(
@@ -31,7 +58,7 @@ class PanoptoBaseIE(InfoExtractor):
fatal=fatal, headers={'accept': 'application/json', 'content-type': 'application/json'}, **kwargs)
if not response:
return
- error_code = response.get('ErrorCode')
+ error_code = traverse_obj(response, 'ErrorCode')
if error_code == 2:
self.raise_login_required(method='cookies')
elif error_code is not None:
@@ -62,10 +89,11 @@ class PanoptoIE(PanoptoBaseIE):
'id': '26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
'title': 'Panopto for Business - Use Cases',
'timestamp': 1459184200,
- 'thumbnail': r're:https://demo\.hosted\.panopto\.com/Panopto/Services/FrameGrabber\.svc/FrameRedirect\?objectId=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb&mode=Delivery&random=[\d.]+',
+ 'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+',
'upload_date': '20160328',
'ext': 'mp4',
'cast': [],
+ 'chapters': [],
'duration': 88.17099999999999,
'average_rating': int,
'uploader_id': '2db6b718-47a0-4b0b-9e17-ab0b00f42b1e',
@@ -80,10 +108,10 @@ class PanoptoIE(PanoptoBaseIE):
'title': 'Overcoming Top 4 Challenges of Enterprise Video',
'uploader': 'Panopto Support',
'timestamp': 1449409251,
- 'thumbnail': r're:https://demo\.hosted\.panopto\.com/Panopto/Services/FrameGrabber\.svc/FrameRedirect\?objectId=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59&mode=Delivery&random=[\d.]+',
+ 'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+',
'upload_date': '20151206',
'ext': 'mp4',
- 'chapters': 'count:21',
+ 'chapters': 'count:12',
'cast': ['Panopto Support'],
'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c',
'average_rating': int,
@@ -104,8 +132,9 @@ class PanoptoIE(PanoptoBaseIE):
'uploader_id': '316a0a58-7fa2-4cd9-be1c-64270d284a56',
'timestamp': 1569845768,
'tags': ['Viewer', 'Enterprise'],
+ 'chapters': [],
'upload_date': '20190930',
- 'thumbnail': r're:https://howtovideos\.hosted\.panopto\.com/Panopto/Services/FrameGrabber.svc/FrameRedirect\?objectId=5fa74e93-3d87-4694-b60e-aaa4012214ed&mode=Delivery&random=[\d.]+',
+ 'thumbnail': r're:https://howtovideos\.hosted\.panopto\.com/.+',
'description': 'md5:2d844aaa1b1a14ad0e2601a0993b431f',
'title': 'Getting Started: View a Video',
'average_rating': int,
@@ -121,6 +150,7 @@ class PanoptoIE(PanoptoBaseIE):
'id': '9d9a0fa3-e99a-4ebd-a281-aac2017f4da4',
'ext': 'mp4',
'cast': ['LTS CLI Script'],
+ 'chapters': [],
'duration': 2178.45,
'description': 'md5:ee5cf653919f55b72bce2dbcf829c9fa',
'channel_id': 'b23e673f-c287-4cb1-8344-aae9005a69f8',
@@ -129,12 +159,78 @@ class PanoptoIE(PanoptoBaseIE):
'uploader': 'LTS CLI Script',
'timestamp': 1572458134,
'title': 'WW2 Vets Interview 3 Ronald Stanley George',
- 'thumbnail': r're:https://unisa\.au\.panopto\.com/Panopto/Services/FrameGrabber.svc/FrameRedirect\?objectId=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4&mode=Delivery&random=[\d.]+',
+ 'thumbnail': r're:https://unisa\.au\.panopto\.com/.+',
'channel': 'World War II Veteran Interviews',
'upload_date': '20191030',
},
},
{
+ # Slides/storyboard
+ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=a7f12f1d-3872-4310-84b0-f8d8ab15326b',
+ 'info_dict': {
+ 'id': 'a7f12f1d-3872-4310-84b0-f8d8ab15326b',
+ 'ext': 'mhtml',
+ 'timestamp': 1448798857,
+ 'duration': 4712.681,
+ 'title': 'Cache Memory - CompSci 15-213, Lecture 12',
+ 'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
+ 'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c',
+ 'upload_date': '20151129',
+ 'average_rating': 0,
+ 'uploader': 'Panopto Support',
+ 'channel': 'Showcase Videos',
+ 'description': 'md5:55e51d54233ddb0e6c2ed388ca73822c',
+ 'cast': ['ISR Videographer', 'Panopto Support'],
+ 'chapters': 'count:28',
+ 'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+',
+ },
+ 'params': {'format': 'mhtml', 'skip_download': True}
+ },
+ {
+ 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=8285224a-9a2b-4957-84f2-acb0000c4ea9',
+ 'info_dict': {
+ 'id': '8285224a-9a2b-4957-84f2-acb0000c4ea9',
+ 'ext': 'mp4',
+ 'chapters': [],
+ 'title': 'Company Policy',
+ 'average_rating': 0,
+ 'timestamp': 1615058901,
+ 'channel': 'Human Resources',
+ 'tags': ['HumanResources'],
+ 'duration': 1604.243,
+ 'thumbnail': r're:https://na-training-1\.hosted\.panopto\.com/.+',
+ 'uploader_id': '8e8ba0a3-424f-40df-a4f1-ab3a01375103',
+ 'uploader': 'Cait M.',
+ 'upload_date': '20210306',
+ 'cast': ['Cait M.'],
+ 'subtitles': {'en-US': [{'ext': 'srt', 'data': 'md5:a3f4d25963fdeace838f327097c13265'}],
+ 'es-ES': [{'ext': 'srt', 'data': 'md5:57e9dad365fd0fbaf0468eac4949f189'}]},
+ },
+ 'params': {'writesubtitles': True, 'skip_download': True}
+ }, {
+ # On Panopto there are two subs: "Default" and en-US. en-US is blank and should be skipped.
+ 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=940cbd41-f616-4a45-b13e-aaf1000c915b',
+ 'info_dict': {
+ 'id': '940cbd41-f616-4a45-b13e-aaf1000c915b',
+ 'ext': 'mp4',
+ 'subtitles': 'count:1',
+ 'title': 'HR Benefits Review Meeting*',
+ 'cast': ['Panopto Support'],
+ 'chapters': [],
+ 'timestamp': 1575024251,
+ 'thumbnail': r're:https://na-training-1\.hosted\.panopto\.com/.+',
+ 'channel': 'Zoom',
+ 'description': 'md5:04f90a9c2c68b7828144abfb170f0106',
+ 'uploader': 'Panopto Support',
+ 'average_rating': 0,
+ 'duration': 409.34499999999997,
+ 'uploader_id': 'b6ac04ad-38b8-4724-a004-a851004ea3df',
+ 'upload_date': '20191129',
+
+ },
+ 'params': {'writesubtitles': True, 'skip_download': True}
+ },
+ {
'url': 'https://ucc.cloud.panopto.eu/Panopto/Pages/Viewer.aspx?id=0e8484a4-4ceb-4d98-a63f-ac0200b455cb',
'only_matching': True
},
@@ -178,19 +274,82 @@ class PanoptoIE(PanoptoBaseIE):
note='Marking watched', errnote='Unable to mark watched')
@staticmethod
- def _extract_chapters(delivery):
+ def _extract_chapters(timestamps):
chapters = []
- for timestamp in delivery.get('Timestamps', []):
+ for timestamp in timestamps or []:
+ caption = timestamp.get('Caption')
start, duration = int_or_none(timestamp.get('Time')), int_or_none(timestamp.get('Duration'))
- if start is None or duration is None:
+ if not caption or start is None or duration is None:
continue
chapters.append({
'start_time': start,
'end_time': start + duration,
- 'title': timestamp.get('Caption')
+ 'title': caption
})
return chapters
+ @staticmethod
+ def _extract_mhtml_formats(base_url, timestamps):
+ image_frags = {}
+ for timestamp in timestamps or []:
+ duration = timestamp.get('Duration')
+ obj_id, obj_sn = timestamp.get('ObjectIdentifier'), timestamp.get('ObjectSequenceNumber'),
+ if timestamp.get('EventTargetType') == 'PowerPoint' and obj_id is not None and obj_sn is not None:
+ image_frags.setdefault('slides', []).append({
+ 'url': base_url + f'/Pages/Viewer/Image.aspx?id={obj_id}&number={obj_sn}',
+ 'duration': duration
+ })
+
+ obj_pid, session_id, abs_time = timestamp.get('ObjectPublicIdentifier'), timestamp.get('SessionID'), timestamp.get('AbsoluteTime')
+ if None not in (obj_pid, session_id, abs_time):
+ image_frags.setdefault('chapter', []).append({
+ 'url': base_url + f'/Pages/Viewer/Thumb.aspx?eventTargetPID={obj_pid}&sessionPID={session_id}&number={obj_sn}&isPrimary=false&absoluteTime={abs_time}',
+ 'duration': duration,
+ })
+ for name, fragments in image_frags.items():
+ yield {
+ 'format_id': name,
+ 'ext': 'mhtml',
+ 'protocol': 'mhtml',
+ 'acodec': 'none',
+ 'vcodec': 'none',
+ 'url': 'about:invalid',
+ 'fragments': fragments
+ }
+
+ @staticmethod
+ def _json2srt(data, delivery):
+ def _gen_lines():
+ for i, line in enumerate(data):
+ start_time = line['Time']
+ duration = line.get('Duration')
+ if duration:
+ end_time = start_time + duration
+ else:
+ end_time = traverse_obj(data, (i + 1, 'Time')) or delivery['Duration']
+ yield f'{i + 1}\n{srt_subtitles_timecode(start_time)} --> {srt_subtitles_timecode(end_time)}\n{line["Caption"]}'
+ return '\n\n'.join(_gen_lines())
+
+ def _get_subtitles(self, base_url, video_id, delivery):
+ subtitles = {}
+ for lang in delivery.get('AvailableLanguages') or []:
+ response = self._call_api(
+ base_url, '/Pages/Viewer/DeliveryInfo.aspx', video_id, fatal=False,
+ note='Downloading captions JSON metadata', query={
+ 'deliveryId': video_id,
+ 'getCaptions': True,
+ 'language': str(lang),
+ 'responseType': 'json'
+ }
+ )
+ if not isinstance(response, list):
+ continue
+ subtitles.setdefault(self._SUB_LANG_MAPPING.get(lang) or 'default', []).append({
+ 'ext': 'srt',
+ 'data': self._json2srt(response, delivery),
+ })
+ return subtitles
+
def _extract_streams_formats_and_subtitles(self, video_id, streams, **fmt_kwargs):
formats = []
subtitles = {}
@@ -240,6 +399,7 @@ class PanoptoIE(PanoptoBaseIE):
delivery = delivery_info['Delivery']
session_start_time = int_or_none(delivery.get('SessionStartTime'))
+ timestamps = delivery.get('Timestamps')
# Podcast stream is usually the combined streams. We will prefer that by default.
podcast_formats, podcast_subtitles = self._extract_streams_formats_and_subtitles(
@@ -249,9 +409,11 @@ class PanoptoIE(PanoptoBaseIE):
video_id, delivery.get('Streams'), preference=-10)
formats = podcast_formats + streams_formats
- subtitles = self._merge_subtitles(podcast_subtitles, streams_subtitles)
- self._sort_formats(formats)
+ formats.extend(self._extract_mhtml_formats(base_url, timestamps))
+ subtitles = self._merge_subtitles(
+ podcast_subtitles, streams_subtitles, self.extract_subtitles(base_url, video_id, delivery))
+ self._sort_formats(formats)
self.mark_watched(base_url, video_id, delivery_info)
return {
@@ -262,7 +424,7 @@ class PanoptoIE(PanoptoBaseIE):
'duration': delivery.get('Duration'),
'thumbnail': base_url + f'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random()}',
'average_rating': delivery.get('AverageRating'),
- 'chapters': self._extract_chapters(delivery) or None,
+ 'chapters': self._extract_chapters(timestamps),
'uploader': delivery.get('OwnerDisplayName') or None,
'uploader_id': delivery.get('OwnerId'),
'description': delivery.get('SessionAbstract'),
diff --git a/yt_dlp/extractor/paramountplus.py b/yt_dlp/extractor/paramountplus.py
index 17138985a..94a9319ea 100644
--- a/yt_dlp/extractor/paramountplus.py
+++ b/yt_dlp/extractor/paramountplus.py
@@ -1,4 +1,5 @@
from __future__ import unicode_literals
+import itertools
from .common import InfoExtractor
from .cbs import CBSBaseIE
@@ -13,12 +14,12 @@ class ParamountPlusIE(CBSBaseIE):
(?:
paramountplus:|
https?://(?:www\.)?(?:
- paramountplus\.com/(?:shows/[^/]+/video|movies/[^/]+)/
+ paramountplus\.com/(?:shows|movies)/(?:video|[^/]+/video|[^/]+)/
)(?P<id>[\w-]+))'''
# All tests are blocked outside US
_TESTS = [{
- 'url': 'https://www.paramountplus.com/shows/catdog/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/catdog-climb-every-catdog-the-canine-mutiny/',
+ 'url': 'https://www.paramountplus.com/shows/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/',
'info_dict': {
'id': 'Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k',
'ext': 'mp4',
@@ -33,7 +34,7 @@ class ParamountPlusIE(CBSBaseIE):
'skip_download': 'm3u8',
},
}, {
- 'url': 'https://www.paramountplus.com/shows/tooning-out-the-news/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/7-23-21-week-in-review-rep-jahana-hayes-howard-fineman-sen-michael-bennet-sheera-frenkel-cecilia-kang-/',
+ 'url': 'https://www.paramountplus.com/shows/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/',
'info_dict': {
'id': '6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd',
'ext': 'mp4',
@@ -48,7 +49,7 @@ class ParamountPlusIE(CBSBaseIE):
'skip_download': 'm3u8',
},
}, {
- 'url': 'https://www.paramountplus.com/movies/daddys-home/vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC',
+ 'url': 'https://www.paramountplus.com/movies/video/vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC/',
'info_dict': {
'id': 'vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC',
'ext': 'mp4',
@@ -63,7 +64,7 @@ class ParamountPlusIE(CBSBaseIE):
},
'expected_warnings': ['Ignoring subtitle tracks'], # TODO: Investigate this
}, {
- 'url': 'https://www.paramountplus.com/movies/sonic-the-hedgehog/5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc',
+ 'url': 'https://www.paramountplus.com/movies/video/5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc/',
'info_dict': {
'id': '5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc',
'ext': 'mp4',
@@ -78,10 +79,16 @@ class ParamountPlusIE(CBSBaseIE):
},
'expected_warnings': ['Ignoring subtitle tracks'],
}, {
- 'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
+ 'url': 'https://www.paramountplus.com/shows/the-real-world/video/mOVeHeL9ub9yWdyzSZFYz8Uj4ZBkVzQg/the-real-world-reunion/',
'only_matching': True,
}, {
- 'url': 'https://www.paramountplus.com/movies/million-dollar-american-princesses-meghan-and-harry/C0LpgNwXYeB8txxycdWdR9TjxpJOsdCq',
+ 'url': 'https://www.paramountplus.com/shows/video/mOVeHeL9ub9yWdyzSZFYz8Uj4ZBkVzQg/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.paramountplus.com/movies/video/W0VyStQqUnqKzJkrpSAIARuCc9YuYGNy/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.paramountplus.com/movies/paw-patrol-the-movie/W0VyStQqUnqKzJkrpSAIARuCc9YuYGNy/',
'only_matching': True,
}]
@@ -128,11 +135,13 @@ class ParamountPlusSeriesIE(InfoExtractor):
'id': 'spongebob-squarepants',
}
}]
- _API_URL = 'https://www.paramountplus.com/shows/{}/xhr/episodes/page/0/size/100000/xs/0/season/0/'
def _entries(self, show_name):
- show_json = self._download_json(self._API_URL.format(show_name), video_id=show_name)
- if show_json.get('success'):
+ for page in itertools.count():
+ show_json = self._download_json(
+ f'https://www.paramountplus.com/shows/{show_name}/xhr/episodes/page/{page}/size/50/xs/0/season/0', show_name)
+ if not show_json.get('success'):
+ return
for episode in show_json['result']['data']:
yield self.url_result(
'https://www.paramountplus.com%s' % episode['url'],
diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py
index d3ee071e0..963a0d6fb 100644
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@@ -88,11 +88,7 @@ class PatreonIE(InfoExtractor):
# Currently Patreon exposes download URL via hidden CSS, so login is not
# needed. Keeping this commented for when this inevitably changes.
'''
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
login_form = {
'redirectUrl': 'http://www.patreon.com/',
'email': username,
@@ -108,8 +104,6 @@ class PatreonIE(InfoExtractor):
if re.search(r'onLoginFailed', login_page):
raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
- def _real_initialize(self):
- self._login()
'''
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py
index 497e1edbc..c4eb4913f 100644
--- a/yt_dlp/extractor/piapro.py
+++ b/yt_dlp/extractor/piapro.py
@@ -29,13 +29,9 @@ class PiaproIE(InfoExtractor):
}
}]
- def _real_initialize(self):
- self._login_status = self._login()
+ _login_status = False
- def _login(self):
- username, password = self._get_login_info()
- if not username:
- return False
+ def _perform_login(self, username, password):
login_ok = True
login_form_strs = {
'_username': username,
@@ -57,7 +53,7 @@ class PiaproIE(InfoExtractor):
if not login_ok:
self.report_warning(
'unable to log in: bad username or password')
- return login_ok
+ self._login_status = login_ok
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/yt_dlp/extractor/platzi.py b/yt_dlp/extractor/platzi.py
index 23c8256b5..17f52e7f4 100644
--- a/yt_dlp/extractor/platzi.py
+++ b/yt_dlp/extractor/platzi.py
@@ -22,14 +22,7 @@ class PlatziBaseIE(InfoExtractor):
_LOGIN_URL = 'https://platzi.com/login/'
_NETRC_MACHINE = 'platzi'
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
diff --git a/yt_dlp/extractor/playplustv.py b/yt_dlp/extractor/playplustv.py
index fd72a3717..cad2c3a0f 100644
--- a/yt_dlp/extractor/playplustv.py
+++ b/yt_dlp/extractor/playplustv.py
@@ -38,14 +38,10 @@ class PlayPlusTVIE(InfoExtractor):
'Authorization': 'Bearer ' + self._token,
}, query=query)
- def _real_initialize(self):
- email, password = self._get_login_info()
- if email is None:
- self.raise_login_required()
-
+ def _perform_login(self, username, password):
req = PUTRequest(
'https://api.playplus.tv/api/web/login', json.dumps({
- 'email': email,
+ 'email': username,
'password': password,
}).encode(), {
'Content-Type': 'application/json; charset=utf-8',
@@ -61,6 +57,10 @@ class PlayPlusTVIE(InfoExtractor):
self._profile = self._call_api('Profiles')['list'][0]['_id']
+ def _real_initialize(self):
+ if not self._token:
+ self.raise_login_required(method='password')
+
def _real_extract(self, url):
project_id, media_id = self._match_valid_url(url).groups()
media = self._call_api(
diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py
index 801057ee1..2a5e0e488 100644
--- a/yt_dlp/extractor/pluralsight.py
+++ b/yt_dlp/extractor/pluralsight.py
@@ -162,14 +162,7 @@ query viewClip {
}
}'''
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
diff --git a/yt_dlp/extractor/pokergo.py b/yt_dlp/extractor/pokergo.py
index d27031c91..c9e2fed12 100644
--- a/yt_dlp/extractor/pokergo.py
+++ b/yt_dlp/extractor/pokergo.py
@@ -15,11 +15,9 @@ class PokerGoBaseIE(InfoExtractor):
_AUTH_TOKEN = None
_PROPERTY_ID = '1dfb3940-7d53-4980-b0b0-f28b369a000d'
- def _login(self):
- username, password = self._get_login_info()
- if not username:
- self.raise_login_required(method='password')
-
+ def _perform_login(self, username, password):
+ if self._AUTH_TOKEN:
+ return
self.report_login()
PokerGoBaseIE._AUTH_TOKEN = self._download_json(
f'https://subscription.pokergo.com/properties/{self._PROPERTY_ID}/sign-in', None,
@@ -30,7 +28,7 @@ class PokerGoBaseIE(InfoExtractor):
def _real_initialize(self):
if not self._AUTH_TOKEN:
- self._login()
+ self.raise_login_required(method='password')
class PokerGoIE(PokerGoBaseIE):
diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py
index 652fdd116..a55dd4f8b 100644
--- a/yt_dlp/extractor/roosterteeth.py
+++ b/yt_dlp/extractor/roosterteeth.py
@@ -21,10 +21,7 @@ class RoosterTeethBaseIE(InfoExtractor):
_API_BASE = 'https://svod-be.roosterteeth.com'
_API_BASE_URL = f'{_API_BASE}/api/v1'
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
+ def _perform_login(self, username, password):
if self._get_cookies(self._API_BASE_URL).get('rt_access_token'):
return
@@ -47,9 +44,6 @@ class RoosterTeethBaseIE(InfoExtractor):
msg += ': ' + error
self.report_warning(msg)
- def _real_initialize(self):
- self._login()
-
def _extract_video_info(self, data):
thumbnails = []
for image in traverse_obj(data, ('included', 'images')):
diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py
index 49c1f4485..a0d5f88d9 100644
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@@ -11,6 +11,7 @@ from ..utils import (
int_or_none,
parse_iso8601,
try_get,
+ unescapeHTML,
ExtractorError,
)
@@ -28,6 +29,20 @@ class RumbleEmbedIE(InfoExtractor):
'upload_date': '20191020',
}
}, {
+ 'url': 'https://rumble.com/embed/vslb7v',
+ 'md5': '7418035de1a30a178b8af34dc2b6a52b',
+ 'info_dict': {
+ 'id': 'vslb7v',
+ 'ext': 'mp4',
+ 'title': 'Defense Sec. says US Commitment to NATO Defense \'Ironclad\'',
+ 'timestamp': 1645142135,
+ 'upload_date': '20220217',
+ 'channel_url': 'https://rumble.com/c/CyberTechNews',
+ 'channel': 'CTNews',
+ 'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg',
+ 'duration': 901,
+ }
+ }, {
'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
'only_matching': True,
}]
@@ -45,7 +60,7 @@ class RumbleEmbedIE(InfoExtractor):
video = self._download_json(
'https://rumble.com/embedJS/', video_id,
query={'request': 'video', 'v': video_id})
- title = video['title']
+ title = unescapeHTML(video['title'])
formats = []
for height, ua in (video.get('ua') or {}).items():
diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py
index 66ac32deb..0ea8253fa 100644
--- a/yt_dlp/extractor/rutv.py
+++ b/yt_dlp/extractor/rutv.py
@@ -181,7 +181,6 @@ class RUTVIE(InfoExtractor):
'rtmp_live': True,
'ext': 'flv',
'vbr': str_to_int(quality),
- 'quality': preference,
}
elif transport == 'm3u8':
formats.extend(self._extract_m3u8_formats(
@@ -192,9 +191,10 @@ class RUTVIE(InfoExtractor):
'url': url
}
fmt.update({
- 'width': width,
- 'height': height,
+ 'width': int_or_none(quality, default=height, invscale=width, scale=height),
+ 'height': int_or_none(quality, default=height),
'format_id': '%s-%s' % (transport, quality),
+ 'source_preference': preference,
})
formats.append(fmt)
diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py
index cca4464ca..7b4571daa 100644
--- a/yt_dlp/extractor/safari.py
+++ b/yt_dlp/extractor/safari.py
@@ -25,14 +25,7 @@ class SafariBaseIE(InfoExtractor):
LOGGED_IN = False
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
_, urlh = self._download_webpage_handle(
'https://learning.oreilly.com/accounts/login-check/', None,
'Downloading login page')
diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py
index ca1de63b6..7215cf5d1 100644
--- a/yt_dlp/extractor/scte.py
+++ b/yt_dlp/extractor/scte.py
@@ -14,14 +14,7 @@ class SCTEBaseIE(InfoExtractor):
_LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx'
_NETRC_MACHINE = 'scte'
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
login_popup = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login popup')
diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py
index 42de41a11..ab45d9ce4 100644
--- a/yt_dlp/extractor/shahid.py
+++ b/yt_dlp/extractor/shahid.py
@@ -79,16 +79,12 @@ class ShahidIE(ShahidBaseIE):
'only_matching': True
}]
- def _real_initialize(self):
- email, password = self._get_login_info()
- if email is None:
- return
-
+ def _perform_login(self, username, password):
try:
user_data = self._download_json(
'https://shahid.mbc.net/wd/service/users/login',
None, 'Logging in', data=json.dumps({
- 'email': email,
+ 'email': username,
'password': password,
'basic': 'false',
}).encode('utf-8'), headers={
diff --git a/yt_dlp/extractor/sonyliv.py b/yt_dlp/extractor/sonyliv.py
index a5026b2e0..5b6849fc9 100644
--- a/yt_dlp/extractor/sonyliv.py
+++ b/yt_dlp/extractor/sonyliv.py
@@ -75,9 +75,12 @@ class SonyLIVIE(InfoExtractor):
t[i] = '{:x}'.format(3 & n | 8)
return ''.join(t) + '-' + str(int(time.time() * 1000))
- def _login(self, username, password):
+ def _perform_login(self, username, password):
+ self._HEADERS['device_id'] = self._get_device_id()
+ self._HEADERS['content-type'] = 'application/json'
+
if username.lower() == 'token' and len(password) > 1198:
- return password
+ self._HEADERS['authorization'] = password
elif len(username) != 10 or not username.isdigit():
raise ExtractorError(f'Invalid username/password; {self._LOGIN_HINT}')
@@ -99,7 +102,7 @@ class SonyLIVIE(InfoExtractor):
None, note='Verifying OTP', data=data.encode(), headers=self._HEADERS)
if otp_verify_json['resultCode'] == 'KO':
raise ExtractorError(otp_request_json['message'], expected=True)
- return otp_verify_json['resultObj']['accessToken']
+ self._HEADERS['authorization'] = otp_verify_json['resultObj']['accessToken']
def _call_api(self, version, path, video_id):
try:
@@ -118,13 +121,8 @@ class SonyLIVIE(InfoExtractor):
raise ExtractorError(message)
raise
- def _real_initialize(self):
+ def _initialize_pre_login(self):
self._HEADERS['security_token'] = self._call_api('1.4', 'ALL/GETTOKEN', None)
- username, password = self._get_login_info()
- if username:
- self._HEADERS['device_id'] = self._get_device_id()
- self._HEADERS['content-type'] = 'application/json'
- self._HEADERS['authorization'] = self._login(username, password)
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index 64b8a71b6..bbc79c2be 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -107,30 +107,24 @@ class SoundcloudBaseIE(InfoExtractor):
return False
raise
- def _real_initialize(self):
+ def _initialize_pre_login(self):
self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- if username == 'oauth' and password is not None:
- self._access_token = password
- query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
- payload = {'session': {'access_token': self._access_token}}
- token_verification = sanitized_Request(self._API_VERIFY_AUTH_TOKEN % query, json.dumps(payload).encode('utf-8'))
- response = self._download_json(token_verification, None, note='Verifying login token...', fatal=False)
- if response is not False:
- self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
- self.report_login()
- else:
- self.report_warning('Provided authorization token seems to be invalid. Continue as guest')
- elif username is not None:
+
+ def _perform_login(self, username, password):
+ if username != 'oauth':
self.report_warning(
'Login using username and password is not currently supported. '
'Use "--username oauth --password <oauth_token>" to login using an oauth token')
+ self._access_token = password
+ query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
+ payload = {'session': {'access_token': self._access_token}}
+ token_verification = sanitized_Request(self._API_VERIFY_AUTH_TOKEN % query, json.dumps(payload).encode('utf-8'))
+ response = self._download_json(token_verification, None, note='Verifying login token...', fatal=False)
+ if response is not False:
+ self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
+ self.report_login()
+ else:
+ self.report_warning('Provided authorization token seems to be invalid. Continue as guest')
r'''
def genDevId():
diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py
index 37eae82bc..232eaa521 100644
--- a/yt_dlp/extractor/teachable.py
+++ b/yt_dlp/extractor/teachable.py
@@ -40,8 +40,7 @@ class TeachableBaseIE(InfoExtractor):
if self._logged_in:
return
- username, password = self._get_login_info(
- netrc_machine=self._SITES.get(site, site))
+ username, password = self._get_login_info(netrc_machine=self._SITES.get(site, site))
if username is None:
return
diff --git a/yt_dlp/extractor/teamtreehouse.py b/yt_dlp/extractor/teamtreehouse.py
index d347e97ef..64522ec4c 100644
--- a/yt_dlp/extractor/teamtreehouse.py
+++ b/yt_dlp/extractor/teamtreehouse.py
@@ -51,17 +51,14 @@ class TeamTreeHouseIE(InfoExtractor):
}]
_NETRC_MACHINE = 'teamtreehouse'
- def _real_initialize(self):
- email, password = self._get_login_info()
- if email is None:
- return
+ def _perform_login(self, username, password):
signin_page = self._download_webpage(
'https://teamtreehouse.com/signin',
None, 'Downloading signin page')
data = self._form_hidden_inputs('new_user_session', signin_page)
data.update({
- 'user_session[email]': email,
+ 'user_session[email]': username,
'user_session[password]': password,
})
error_message = get_element_by_class('error-message', self._download_webpage(
diff --git a/yt_dlp/extractor/tennistv.py b/yt_dlp/extractor/tennistv.py
index a39a2fc60..58fdecebe 100644
--- a/yt_dlp/extractor/tennistv.py
+++ b/yt_dlp/extractor/tennistv.py
@@ -30,11 +30,9 @@ class TennisTVIE(InfoExtractor):
'skip': 'Requires email and password of a subscribed account',
}
_NETRC_MACHINE = 'tennistv'
+ _session_token = None
- def _login(self):
- username, password = self._get_login_info()
- if not username or not password:
- raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+ def _perform_login(self, username, password):
login_form = {
'Email': username,
@@ -63,7 +61,8 @@ class TennisTVIE(InfoExtractor):
self._session_token = login_result['sessionToken']
def _real_initialize(self):
- self._login()
+ if not self._session_token:
+ raise self.raise_login_required('Login info is needed for this website', method='password')
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/yt_dlp/extractor/toutv.py b/yt_dlp/extractor/toutv.py
index 6c84c211c..1d5da1040 100644
--- a/yt_dlp/extractor/toutv.py
+++ b/yt_dlp/extractor/toutv.py
@@ -40,17 +40,14 @@ class TouTvIE(RadioCanadaIE):
}]
_CLIENT_KEY = '90505c8d-9c34-4f34-8da1-3a85bdc6d4f4'
- def _real_initialize(self):
- email, password = self._get_login_info()
- if email is None:
- return
+ def _perform_login(self, username, password):
try:
self._access_token = self._download_json(
'https://services.radio-canada.ca/toutv/profiling/accounts/login',
None, 'Logging in', data=json.dumps({
'ClientId': self._CLIENT_KEY,
'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20',
- 'Email': email,
+ 'Email': username,
'Password': password,
'Scope': 'id.write media-validation.read',
}).encode(), headers={
diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py
index e9b66ec77..31feb9a70 100644
--- a/yt_dlp/extractor/tubitv.py
+++ b/yt_dlp/extractor/tubitv.py
@@ -54,10 +54,7 @@ class TubiTvIE(InfoExtractor):
},
}]
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
+ def _perform_login(self, username, password):
self.report_login()
form_data = {
'username': username,
@@ -72,9 +69,6 @@ class TubiTvIE(InfoExtractor):
raise ExtractorError(
'Login failed (invalid username/password)', expected=True)
- def _real_initialize(self):
- self._login()
-
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
diff --git a/yt_dlp/extractor/tumblr.py b/yt_dlp/extractor/tumblr.py
index a3e0e15f2..8086f613d 100644
--- a/yt_dlp/extractor/tumblr.py
+++ b/yt_dlp/extractor/tumblr.py
@@ -247,11 +247,7 @@ class TumblrIE(InfoExtractor):
_ACCESS_TOKEN = None
- def _real_initialize(self):
- self.get_access_token()
- self._login()
-
- def get_access_token(self):
+ def _initialize_pre_login(self):
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page', fatal=False)
if login_page:
@@ -260,11 +256,7 @@ class TumblrIE(InfoExtractor):
if not self._ACCESS_TOKEN:
self.report_warning('Failed to get access token; metadata will be missing and some videos may not work')
- def _login(self):
- username, password = self._get_login_info()
- if not username:
- return
-
+ def _perform_login(self, username, password):
if not self._ACCESS_TOKEN:
return
diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py
index bee26c3a3..10de74c8e 100644
--- a/yt_dlp/extractor/twitch.py
+++ b/yt_dlp/extractor/twitch.py
@@ -57,14 +57,7 @@ class TwitchBaseIE(InfoExtractor):
'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41',
}
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
def fail(message):
raise ExtractorError(
'Unable to login. Twitch said: %s' % message, expected=True)
diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py
index 25b28e98e..235f89713 100644
--- a/yt_dlp/extractor/udemy.py
+++ b/yt_dlp/extractor/udemy.py
@@ -168,14 +168,7 @@ class UdemyIE(InfoExtractor):
self._handle_error(response)
return response
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
login_popup = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login popup')
diff --git a/yt_dlp/extractor/veo.py b/yt_dlp/extractor/veo.py
index 4e57a52d1..d87bb5b47 100644
--- a/yt_dlp/extractor/veo.py
+++ b/yt_dlp/extractor/veo.py
@@ -6,13 +6,14 @@ from .common import InfoExtractor
from ..utils import (
int_or_none,
mimetype2ext,
+ str_or_none,
unified_timestamp,
url_or_none,
)
class VeoIE(InfoExtractor):
- _VALID_URL = r'https?://app\.veo\.co/matches/(?P<id>[0-9A-Za-z-]+)'
+ _VALID_URL = r'https?://app\.veo\.co/matches/(?P<id>[0-9A-Za-z-_]+)'
_TESTS = [{
'url': 'https://app.veo.co/matches/20201027-last-period/',
@@ -24,7 +25,11 @@ class VeoIE(InfoExtractor):
'upload_date': '20201028',
'timestamp': 1603847208,
'duration': 1916,
+ 'view_count': int,
}
+ }, {
+ 'url': 'https://app.veo.co/matches/20220313-2022-03-13_u15m-plsjq-vs-csl/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -36,39 +41,41 @@ class VeoIE(InfoExtractor):
video_data = self._download_json(
'https://app.veo.co/api/app/matches/%s/videos' % video_id, video_id, 'Downloading video data')
- title = metadata.get('title')
- thumbnail = url_or_none(metadata.get('thumbnail'))
-
- timestamp = unified_timestamp(metadata.get('created'))
- duration = int_or_none(metadata.get('duration'))
- view_count = int_or_none(metadata.get('view_count'))
-
formats = []
for fmt in video_data:
- mimetype = fmt.get('mime_type')
+ mimetype = str_or_none(fmt.get('mime_type'))
+ format_url = url_or_none(fmt.get('url'))
# skip configuration file for panoramic video
- if mimetype == 'video/mp2t':
+ if not format_url or mimetype == 'video/mp2t':
continue
+
height = int_or_none(fmt.get('height'))
- bitrate = int_or_none(fmt.get('bit_rate'), scale=1000)
- render_type = fmt.get('render_type')
+ render_type = str_or_none(fmt.get('render_type'))
+ format_id = f'{render_type}-{height}p' if render_type and height else None
+
+ # Veo returns panoramic video information even if panoramic video is not available.
+ # e.g. https://app.veo.co/matches/20201027-last-period/
+ if render_type == 'panorama':
+ if not self._is_valid_url(format_url, video_id, format_id):
+ continue
+
formats.append({
- 'url': url_or_none(fmt.get('url')),
- 'format_id': '%s-%sp' % (render_type, height),
+ 'url': format_url,
+ 'format_id': format_id,
'ext': mimetype2ext(mimetype),
'width': int_or_none(fmt.get('width')),
'height': height,
- 'vbr': bitrate
+ 'vbr': int_or_none(fmt.get('bit_rate'), scale=1000),
})
self._sort_formats(formats)
return {
'id': video_id,
- 'title': title,
+ 'title': str_or_none(metadata.get('title')),
'formats': formats,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'view_count': view_count,
- 'duration': duration
+ 'thumbnail': url_or_none(metadata.get('thumbnail')),
+ 'timestamp': unified_timestamp(metadata.get('created')),
+ 'view_count': int_or_none(metadata.get('view_count')),
+ 'duration': int_or_none(metadata.get('duration')),
}
diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py
index e99dbdefa..6bfb8d442 100644
--- a/yt_dlp/extractor/vidio.py
+++ b/yt_dlp/extractor/vidio.py
@@ -23,11 +23,7 @@ class VidioBaseIE(InfoExtractor):
_LOGIN_URL = 'https://www.vidio.com/users/login'
_NETRC_MACHINE = 'vidio'
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
def is_logged_in():
res = self._download_json(
'https://www.vidio.com/interactions.json', None, 'Checking if logged in', fatal=False) or {}
@@ -63,10 +59,9 @@ class VidioBaseIE(InfoExtractor):
'Unable to log in: %s. %s' % (reason, clean_html(subreason)), expected=True)
raise ExtractorError('Unable to log in')
- def _real_initialize(self):
+ def _initialize_pre_login(self):
self._api_key = self._download_json(
'https://www.vidio.com/auth', None, data=b'')['api_key']
- self._login()
def _call_api(self, url, video_id, note=None):
return self._download_json(url, video_id, note=note, headers={
diff --git a/yt_dlp/extractor/viewlift.py b/yt_dlp/extractor/viewlift.py
index 5b558d890..4627f66fd 100644
--- a/yt_dlp/extractor/viewlift.py
+++ b/yt_dlp/extractor/viewlift.py
@@ -36,9 +36,6 @@ class ViewLiftBaseIE(InfoExtractor):
def _fetch_token(self, site, url):
if self._TOKENS.get(site):
return
- email, password = self._get_login_info(netrc_machine=site)
- if email:
- self.report_warning('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
cookies = self._get_cookies(url)
if cookies and cookies.get('token'):
diff --git a/yt_dlp/extractor/viki.py b/yt_dlp/extractor/viki.py
index 19b09121c..8234ba7df 100644
--- a/yt_dlp/extractor/viki.py
+++ b/yt_dlp/extractor/viki.py
@@ -99,14 +99,7 @@ class VikiBaseIE(InfoExtractor):
self.raise_login_required(message)
self._raise_error(message)
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
self._token = self._call_api(
'sessions.json', None, 'Logging in', fatal=False,
data={'username': username, 'password': password}).get('token')
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index 458a751fe..051cf1b17 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -44,12 +44,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
_LOGIN_REQUIRED = False
_LOGIN_URL = 'https://vimeo.com/log_in'
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- if self._LOGIN_REQUIRED:
- raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
- return
+ def _perform_login(self, username, password):
webpage = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
token, vuid = self._extract_xsrft_and_vuid(webpage)
@@ -75,6 +70,10 @@ class VimeoBaseInfoExtractor(InfoExtractor):
expected=True)
raise ExtractorError('Unable to log in')
+ def _real_initialize(self):
+ if self._LOGIN_REQUIRED and not self._get_cookies('https://vimeo.com').get('vuid'):
+ self._raise_login_required()
+
def _get_video_password(self):
password = self.get_param('videopassword')
if password is None:
@@ -701,9 +700,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
raise ExtractorError('Wrong video password', expected=True)
return checked
- def _real_initialize(self):
- self._login()
-
def _extract_from_api(self, video_id, unlisted_hash=None):
token = self._download_json(
'https://vimeo.com/_rv/jwt', video_id, headers={
@@ -1231,9 +1227,6 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'skip': 'video gone',
}]
- def _real_initialize(self):
- self._login()
-
def _real_extract(self, url):
page_url, video_id = self._match_valid_url(url).groups()
data = self._download_json(
@@ -1275,9 +1268,6 @@ class VimeoWatchLaterIE(VimeoChannelIE):
'only_matching': True,
}]
- def _real_initialize(self):
- self._login()
-
def _page_url(self, base_url, pagenum):
url = '%s/page:%d/' % (base_url, pagenum)
request = sanitized_Request(url)
diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py
index 18eb33b57..cbc315961 100644
--- a/yt_dlp/extractor/vk.py
+++ b/yt_dlp/extractor/vk.py
@@ -29,11 +29,7 @@ from .youtube import YoutubeIE
class VKBaseIE(InfoExtractor):
_NETRC_MACHINE = 'vk'
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
+ def _perform_login(self, username, password):
login_page, url_handle = self._download_webpage_handle(
'https://vk.com', None, 'Downloading login page')
@@ -57,9 +53,6 @@ class VKBaseIE(InfoExtractor):
raise ExtractorError(
'Unable to login, incorrect username and/or password', expected=True)
- def _real_initialize(self):
- self._login()
-
def _download_payload(self, path, video_id, data, fatal=True):
data['al'] = 1
code, payload = self._download_json(
diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py
index 74dc349d5..ae35c976c 100644
--- a/yt_dlp/extractor/vlive.py
+++ b/yt_dlp/extractor/vlive.py
@@ -26,22 +26,16 @@ class VLiveBaseIE(NaverBaseIE):
_NETRC_MACHINE = 'vlive'
_logged_in = False
- def _real_initialize(self):
- if not self._logged_in:
- VLiveBaseIE._logged_in = self._login()
-
- def _login(self):
- email, password = self._get_login_info()
- if email is None:
- return False
-
+ def _perform_login(self, username, password):
+ if self._logged_in:
+ return
LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
self._request_webpage(
LOGIN_URL, None, note='Downloading login cookies')
self._download_webpage(
LOGIN_URL, None, note='Logging in',
- data=urlencode_postdata({'email': email, 'pwd': password}),
+ data=urlencode_postdata({'email': username, 'pwd': password}),
headers={
'Referer': LOGIN_URL,
'Content-Type': 'application/x-www-form-urlencoded'
@@ -54,7 +48,7 @@ class VLiveBaseIE(NaverBaseIE):
if not try_get(login_info, lambda x: x['message']['login'], bool):
raise ExtractorError('Unable to log in', expected=True)
- return True
+ VLiveBaseIE._logged_in = True
def _call_api(self, path_template, video_id, fields=None, query_add={}, note=None):
if note is None:
diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py
index 7bc55f333..10e6be7ed 100644
--- a/yt_dlp/extractor/vrv.py
+++ b/yt_dlp/extractor/vrv.py
@@ -85,7 +85,7 @@ class VRVBaseIE(InfoExtractor):
'resource_key': resource_key,
})['__links__']['cms_resource']['href']
- def _real_initialize(self):
+ def _initialize_pre_login(self):
webpage = self._download_webpage(
'https://vrv.co/', None, headers=self.geo_verification_headers())
self._API_PARAMS = self._parse_json(self._search_regex(
@@ -124,16 +124,10 @@ class VRVIE(VRVBaseIE):
}]
_NETRC_MACHINE = 'vrv'
- def _real_initialize(self):
- super(VRVIE, self)._real_initialize()
-
- email, password = self._get_login_info()
- if email is None:
- return
-
+ def _perform_login(self, username, password):
token_credentials = self._call_api(
'authenticate/by:credentials', None, 'Token Credentials', data={
- 'email': email,
+ 'email': username,
'password': password,
})
self._TOKEN = token_credentials['oauth_token']
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 4fe9cec5b..d74d5b0e9 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -263,7 +263,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
- _NETRC_MACHINE = 'youtube'
+ # _NETRC_MACHINE = 'youtube'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
@@ -334,21 +334,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
)
- def _login(self):
- """
- Attempt to log in to YouTube.
- If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
- """
-
- if (self._LOGIN_REQUIRED
- and self.get_param('cookiefile') is None
- and self.get_param('cookiesfrombrowser') is None):
- self.raise_login_required(
- 'Login details are needed to download this content', method='cookies')
- username, password = self._get_login_info()
- if username:
- self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
-
def _initialize_consent(self):
cookies = self._get_cookies('https://www.youtube.com/')
if cookies.get('__Secure-3PSID'):
@@ -379,7 +364,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _real_initialize(self):
self._initialize_pref()
self._initialize_consent()
- self._login()
+ if (self._LOGIN_REQUIRED
+ and self.get_param('cookiefile') is None
+ and self.get_param('cookiesfrombrowser') is None):
+ self.raise_login_required('Login details are needed to download this content', method='cookies')
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
@@ -458,7 +446,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
video_id=video_id, fatal=fatal, note=note, errnote=errnote,
data=json.dumps(data).encode('utf8'), headers=real_headers,
- query={'key': api_key or self._extract_api_key()})
+ query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
def extract_yt_initial_data(self, item_id, webpage, fatal=True):
data = self._search_regex(
@@ -1297,7 +1285,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
'expected_warnings': [
'DASH manifest missing',
- 'Some formats are possibly damaged'
]
},
# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
@@ -3013,7 +3000,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.report_warning(last_error)
return prs, player_url
- def _extract_formats(self, streaming_data, video_id, player_url, is_live):
+ def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):
itags, stream_ids = {}, []
itag_qualities, res_qualities = {}, {}
q = qualities([
@@ -3024,7 +3011,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
])
streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
- approx_duration = max(traverse_obj(streaming_formats, (..., 'approxDurationMs'), expected_type=float_or_none) or [0]) or None
for fmt in streaming_formats:
if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
@@ -3091,7 +3077,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else -1)
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
- is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000)
+ # Make sure to avoid false positives with small duration differences.
+ # Eg: __2ABJjxzNo, ySuUZEjARPY
+ is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
if is_damaged:
self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
dct = {
@@ -3227,14 +3215,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return webpage, master_ytcfg, player_responses, player_url
- def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):
+ def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
is_live = get_first(video_details, 'isLive')
if is_live is None:
is_live = get_first(live_broadcast_details, 'isLiveNow')
streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
- formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
+ formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))
return live_broadcast_details, is_live, streaming_data, formats
@@ -3315,7 +3303,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self.playlist_result(
entries, video_id, video_title, video_description)
- live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)
+ duration = int_or_none(
+ get_first(video_details, 'lengthSeconds')
+ or get_first(microformats, 'lengthSeconds')
+ or parse_duration(search_meta('duration'))) or None
+
+ live_broadcast_details, is_live, streaming_data, formats = self._list_formats(
+ video_id, microformats, video_details, player_responses, player_url, duration)
if not formats:
if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
@@ -3387,10 +3381,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
get_first(video_details, 'channelId')
or get_first(microformats, 'externalChannelId')
or search_meta('channelId'))
- duration = int_or_none(
- get_first(video_details, 'lengthSeconds')
- or get_first(microformats, 'lengthSeconds')
- or parse_duration(search_meta('duration'))) or None
owner_profile_url = get_first(microformats, 'ownerProfileUrl')
live_content = get_first(video_details, 'isLiveContent')
@@ -3926,6 +3916,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
if entry:
yield entry
'''
+
def _extract_entries(self, parent_renderer, continuation_list):
# continuation_list is modified in-place with continuation_list = [continuation_token]
continuation_list[:] = [None]
@@ -4024,6 +4015,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
continue
known_renderers = {
+ 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
'gridPlaylistRenderer': (self._grid_entries, 'items'),
'gridVideoRenderer': (self._grid_entries, 'items'),
'gridChannelRenderer': (self._grid_entries, 'items'),
diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py
index 9435920b2..c02b4ca14 100644
--- a/yt_dlp/extractor/zattoo.py
+++ b/yt_dlp/extractor/zattoo.py
@@ -25,13 +25,11 @@ class ZattooPlatformBaseIE(InfoExtractor):
def _host_url(self):
return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST)
- def _login(self):
- username, password = self._get_login_info()
- if not username or not password:
- self.raise_login_required(
- 'A valid %s account is needed to access this media.'
- % self._NETRC_MACHINE)
+ def _real_initialize(self):
+ if not self._power_guide_hash:
+ self.raise_login_required('An account is needed to access this media', method='password')
+ def _perform_login(self, username, password):
try:
data = self._download_json(
'%s/zapi/v2/account/login' % self._host_url(), None, 'Logging in',
@@ -52,7 +50,7 @@ class ZattooPlatformBaseIE(InfoExtractor):
self._power_guide_hash = data['session']['power_guide_hash']
- def _real_initialize(self):
+ def _initialize_pre_login(self):
webpage = self._download_webpage(
self._host_url(), None, 'Downloading app token')
app_token = self._html_search_regex(
@@ -72,8 +70,6 @@ class ZattooPlatformBaseIE(InfoExtractor):
'format': 'json',
}))
- self._login()
-
def _extract_cid(self, video_id, channel_name):
channel_groups = self._download_json(
'%s/zapi/v2/cached/channels/%s' % (self._host_url(),
diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py
index ebe393ec7..3e3f11b15 100644
--- a/yt_dlp/extractor/zee5.py
+++ b/yt_dlp/extractor/zee5.py
@@ -93,32 +93,27 @@ class Zee5IE(InfoExtractor):
_NETRC_MACHINE = 'zee5'
_GEO_COUNTRIES = ['IN']
- def _login(self):
- username, password = self._get_login_info()
- if username:
- if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None:
- self.report_login()
- otp_request_json = self._download_json('https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{}'.format(username),
- None, note='Sending OTP')
- if otp_request_json['code'] == 0:
- self.to_screen(otp_request_json['message'])
- else:
- raise ExtractorError(otp_request_json['message'], expected=True)
- otp_code = self._get_tfa_info('OTP')
- otp_verify_json = self._download_json('https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{}&otp={}&guest_token={}&platform=web'.format(username, otp_code, self._DEVICE_ID),
- None, note='Verifying OTP', fatal=False)
- if not otp_verify_json:
- raise ExtractorError('Unable to verify OTP.', expected=True)
- self._USER_TOKEN = otp_verify_json.get('token')
- if not self._USER_TOKEN:
- raise ExtractorError(otp_request_json['message'], expected=True)
- elif username.lower() == 'token' and len(password) > 1198:
- self._USER_TOKEN = password
+ def _perform_login(self, username, password):
+ if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None:
+ self.report_login()
+ otp_request_json = self._download_json('https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{}'.format(username),
+ None, note='Sending OTP')
+ if otp_request_json['code'] == 0:
+ self.to_screen(otp_request_json['message'])
else:
- raise ExtractorError(self._LOGIN_HINT, expected=True)
-
- def _real_initialize(self):
- self._login()
+ raise ExtractorError(otp_request_json['message'], expected=True)
+ otp_code = self._get_tfa_info('OTP')
+ otp_verify_json = self._download_json('https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{}&otp={}&guest_token={}&platform=web'.format(username, otp_code, self._DEVICE_ID),
+ None, note='Verifying OTP', fatal=False)
+ if not otp_verify_json:
+ raise ExtractorError('Unable to verify OTP.', expected=True)
+ self._USER_TOKEN = otp_verify_json.get('token')
+ if not self._USER_TOKEN:
+ raise ExtractorError(otp_request_json['message'], expected=True)
+ elif username.lower() == 'token' and len(password) > 1198:
+ self._USER_TOKEN = password
+ else:
+ raise ExtractorError(self._LOGIN_HINT, expected=True)
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index c9b57c2f0..da6f27801 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -2279,8 +2279,9 @@ def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
num, factor = float_or_none(num), float(factor)
if num is None or num < 0:
return None
- exponent = 0 if num == 0 else int(math.log(num, factor))
- suffix = ['', *'kMGTPEZY'][exponent]
+ POSSIBLE_SUFFIXES = 'kMGTPEZY'
+ exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
+ suffix = ['', *POSSIBLE_SUFFIXES][exponent]
if factor == 1024:
suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
converted = num / (factor ** exponent)