aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor')
-rw-r--r--yt_dlp/extractor/__init__.py37
-rw-r--r--yt_dlp/extractor/_extractors.py2198
-rw-r--r--yt_dlp/extractor/abematv.py17
-rw-r--r--yt_dlp/extractor/adobepass.py27
-rw-r--r--yt_dlp/extractor/animelab.py270
-rw-r--r--yt_dlp/extractor/archiveorg.py45
-rw-r--r--yt_dlp/extractor/arnes.py2
-rw-r--r--yt_dlp/extractor/atscaleconf.py34
-rw-r--r--yt_dlp/extractor/audius.py4
-rw-r--r--yt_dlp/extractor/awaan.py2
-rw-r--r--yt_dlp/extractor/bbc.py12
-rw-r--r--yt_dlp/extractor/bellmedia.py10
-rw-r--r--yt_dlp/extractor/bilibili.py21
-rw-r--r--yt_dlp/extractor/bloomberg.py10
-rw-r--r--yt_dlp/extractor/brightcove.py4
-rw-r--r--yt_dlp/extractor/cbc.py4
-rw-r--r--yt_dlp/extractor/ccc.py1
-rw-r--r--yt_dlp/extractor/cda.py12
-rw-r--r--yt_dlp/extractor/chingari.py8
-rw-r--r--yt_dlp/extractor/common.py613
-rw-r--r--yt_dlp/extractor/commonprotocols.py5
-rw-r--r--yt_dlp/extractor/crunchyroll.py27
-rw-r--r--yt_dlp/extractor/curiositystream.py15
-rw-r--r--yt_dlp/extractor/cwtv.py1
-rw-r--r--yt_dlp/extractor/dailymotion.py9
-rw-r--r--yt_dlp/extractor/dailywire.py114
-rw-r--r--yt_dlp/extractor/digitalconcerthall.py2
-rw-r--r--yt_dlp/extractor/dropbox.py4
-rw-r--r--yt_dlp/extractor/dropout.py33
-rw-r--r--yt_dlp/extractor/duboku.py50
-rw-r--r--yt_dlp/extractor/ertgr.py5
-rw-r--r--yt_dlp/extractor/espn.py145
-rw-r--r--yt_dlp/extractor/expressen.py7
-rw-r--r--yt_dlp/extractor/extractors.py2190
-rw-r--r--yt_dlp/extractor/facebook.py6
-rw-r--r--yt_dlp/extractor/fc2.py18
-rw-r--r--yt_dlp/extractor/flickr.py2
-rw-r--r--yt_dlp/extractor/fourzerostudio.py107
-rw-r--r--yt_dlp/extractor/foxgay.py2
-rw-r--r--yt_dlp/extractor/foxnews.py9
-rw-r--r--yt_dlp/extractor/franceculture.py125
-rw-r--r--yt_dlp/extractor/freetv.py141
-rw-r--r--yt_dlp/extractor/fuyintv.py30
-rw-r--r--yt_dlp/extractor/generic.py229
-rw-r--r--yt_dlp/extractor/giga.py9
-rw-r--r--yt_dlp/extractor/googledrive.py58
-rw-r--r--yt_dlp/extractor/hitbox.py6
-rw-r--r--yt_dlp/extractor/ina.py84
-rw-r--r--yt_dlp/extractor/instagram.py2
-rw-r--r--yt_dlp/extractor/iqiyi.py6
-rw-r--r--yt_dlp/extractor/iwara.py80
-rw-r--r--yt_dlp/extractor/ixigua.py84
-rw-r--r--yt_dlp/extractor/joj.py2
-rw-r--r--yt_dlp/extractor/jwplatform.py5
-rw-r--r--yt_dlp/extractor/kaltura.py2
-rw-r--r--yt_dlp/extractor/keezmovies.py2
-rw-r--r--yt_dlp/extractor/kicker.py55
-rw-r--r--yt_dlp/extractor/kth.py28
-rw-r--r--yt_dlp/extractor/kusi.py6
-rw-r--r--yt_dlp/extractor/lastfm.py2
-rw-r--r--yt_dlp/extractor/lbry.py7
-rw-r--r--yt_dlp/extractor/line.py2
-rw-r--r--yt_dlp/extractor/lnkgo.py2
-rw-r--r--yt_dlp/extractor/medaltv.py2
-rw-r--r--yt_dlp/extractor/mediaset.py12
-rw-r--r--yt_dlp/extractor/metacafe.py13
-rw-r--r--yt_dlp/extractor/minds.py2
-rw-r--r--yt_dlp/extractor/mirrorcouk.py98
-rw-r--r--yt_dlp/extractor/mixcloud.py3
-rw-r--r--yt_dlp/extractor/naver.py139
-rw-r--r--yt_dlp/extractor/ndr.py244
-rw-r--r--yt_dlp/extractor/ndtv.py14
-rw-r--r--yt_dlp/extractor/nebula.py9
-rw-r--r--yt_dlp/extractor/neteasemusic.py18
-rw-r--r--yt_dlp/extractor/netverse.py176
-rw-r--r--yt_dlp/extractor/nhk.py20
-rw-r--r--yt_dlp/extractor/niconico.py6
-rw-r--r--yt_dlp/extractor/npr.py21
-rw-r--r--yt_dlp/extractor/nrk.py7
-rw-r--r--yt_dlp/extractor/openload.py15
-rw-r--r--yt_dlp/extractor/peloton.py12
-rw-r--r--yt_dlp/extractor/playsuisse.py147
-rw-r--r--yt_dlp/extractor/playvid.py13
-rw-r--r--yt_dlp/extractor/pokemon.py41
-rw-r--r--yt_dlp/extractor/popcorntimes.py7
-rw-r--r--yt_dlp/extractor/pornhub.py33
-rw-r--r--yt_dlp/extractor/premiershiprugby.py39
-rw-r--r--yt_dlp/extractor/puls4.py7
-rw-r--r--yt_dlp/extractor/radiko.py4
-rw-r--r--yt_dlp/extractor/radiofrance.py49
-rw-r--r--yt_dlp/extractor/radlive.py2
-rw-r--r--yt_dlp/extractor/rokfin.py10
-rw-r--r--yt_dlp/extractor/rtve.py10
-rw-r--r--yt_dlp/extractor/rumble.py30
-rw-r--r--yt_dlp/extractor/screencast.py13
-rw-r--r--yt_dlp/extractor/shared.py13
-rw-r--r--yt_dlp/extractor/soundcloud.py4
-rw-r--r--yt_dlp/extractor/southpark.py43
-rw-r--r--yt_dlp/extractor/spotify.py39
-rw-r--r--yt_dlp/extractor/storyfire.py2
-rw-r--r--yt_dlp/extractor/streamcz.py4
-rw-r--r--yt_dlp/extractor/stv.py2
-rw-r--r--yt_dlp/extractor/substack.py100
-rw-r--r--yt_dlp/extractor/tennistv.py179
-rw-r--r--yt_dlp/extractor/testurl.py2
-rw-r--r--yt_dlp/extractor/tiktok.py86
-rw-r--r--yt_dlp/extractor/trovo.py2
-rw-r--r--yt_dlp/extractor/tver.py37
-rw-r--r--yt_dlp/extractor/twitter.py2
-rw-r--r--yt_dlp/extractor/udemy.py14
-rw-r--r--yt_dlp/extractor/urort.py11
-rw-r--r--yt_dlp/extractor/vevo.py119
-rw-r--r--yt_dlp/extractor/videa.py9
-rw-r--r--yt_dlp/extractor/videocampus_sachsen.py27
-rw-r--r--yt_dlp/extractor/vidio.py4
-rw-r--r--yt_dlp/extractor/vidlii.py2
-rw-r--r--yt_dlp/extractor/vimeo.py32
-rw-r--r--yt_dlp/extractor/vine.py2
-rw-r--r--yt_dlp/extractor/voicy.py6
-rw-r--r--yt_dlp/extractor/vrv.py15
-rw-r--r--yt_dlp/extractor/vshare.py8
-rw-r--r--yt_dlp/extractor/wppilot.py6
-rw-r--r--yt_dlp/extractor/xfileshare.py7
-rw-r--r--yt_dlp/extractor/xhamster.py15
-rw-r--r--yt_dlp/extractor/yahoo.py17
-rw-r--r--yt_dlp/extractor/ynet.py6
-rw-r--r--yt_dlp/extractor/younow.py2
-rw-r--r--yt_dlp/extractor/youporn.py5
-rw-r--r--yt_dlp/extractor/youtube.py437
-rw-r--r--yt_dlp/extractor/zattoo.py2
-rw-r--r--yt_dlp/extractor/zdf.py18
-rw-r--r--yt_dlp/extractor/zhihu.py2
132 files changed, 5596 insertions, 3950 deletions
diff --git a/yt_dlp/extractor/__init__.py b/yt_dlp/extractor/__init__.py
index afd3d05ac..6bfa4bd7b 100644
--- a/yt_dlp/extractor/__init__.py
+++ b/yt_dlp/extractor/__init__.py
@@ -1,32 +1,15 @@
-import contextlib
-import os
+from ..compat.compat_utils import passthrough_module
-from ..utils import load_plugins
-
-_LAZY_LOADER = False
-if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
- with contextlib.suppress(ImportError):
- from .lazy_extractors import * # noqa: F403
- from .lazy_extractors import _ALL_CLASSES
- _LAZY_LOADER = True
-
-if not _LAZY_LOADER:
- from .extractors import * # noqa: F403
- _ALL_CLASSES = [ # noqa: F811
- klass
- for name, klass in globals().items()
- if name.endswith('IE') and name != 'GenericIE'
- ]
- _ALL_CLASSES.append(GenericIE) # noqa: F405
-
-_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
-_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES
+passthrough_module(__name__, '.extractors')
+del passthrough_module
def gen_extractor_classes():
""" Return a list of supported extractors.
The order does matter; the first extractor matched is the one handling the URL.
"""
+ from .extractors import _ALL_CLASSES
+
return _ALL_CLASSES
@@ -39,10 +22,12 @@ def gen_extractors():
def list_extractor_classes(age_limit=None):
"""Return a list of extractors that are suitable for the given age, sorted by extractor name"""
+ from .generic import GenericIE
+
yield from sorted(filter(
- lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, # noqa: F405
+ lambda ie: ie.is_suitable(age_limit) and ie != GenericIE,
gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower())
- yield GenericIE # noqa: F405
+ yield GenericIE
def list_extractors(age_limit=None):
@@ -52,4 +37,6 @@ def list_extractors(age_limit=None):
def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name"""
- return globals()[ie_name + 'IE']
+ from . import extractors
+
+ return getattr(extractors, f'{ie_name}IE')
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
new file mode 100644
index 000000000..37328dfc8
--- /dev/null
+++ b/yt_dlp/extractor/_extractors.py
@@ -0,0 +1,2198 @@
+# flake8: noqa: F401
+
+from .abc import (
+ ABCIE,
+ ABCIViewIE,
+ ABCIViewShowSeriesIE,
+)
+from .abcnews import (
+ AbcNewsIE,
+ AbcNewsVideoIE,
+)
+from .abcotvs import (
+ ABCOTVSIE,
+ ABCOTVSClipsIE,
+)
+from .abematv import (
+ AbemaTVIE,
+ AbemaTVTitleIE,
+)
+from .academicearth import AcademicEarthCourseIE
+from .acast import (
+ ACastIE,
+ ACastChannelIE,
+)
+from .adn import ADNIE
+from .adobeconnect import AdobeConnectIE
+from .adobetv import (
+ AdobeTVEmbedIE,
+ AdobeTVIE,
+ AdobeTVShowIE,
+ AdobeTVChannelIE,
+ AdobeTVVideoIE,
+)
+from .adultswim import AdultSwimIE
+from .aenetworks import (
+ AENetworksIE,
+ AENetworksCollectionIE,
+ AENetworksShowIE,
+ HistoryTopicIE,
+ HistoryPlayerIE,
+ BiographyIE,
+)
+from .afreecatv import (
+ AfreecaTVIE,
+ AfreecaTVLiveIE,
+ AfreecaTVUserIE,
+)
+from .airmozilla import AirMozillaIE
+from .aljazeera import AlJazeeraIE
+from .alphaporno import AlphaPornoIE
+from .amara import AmaraIE
+from .alura import (
+ AluraIE,
+ AluraCourseIE
+)
+from .amcnetworks import AMCNetworksIE
+from .amazon import AmazonStoreIE
+from .americastestkitchen import (
+ AmericasTestKitchenIE,
+ AmericasTestKitchenSeasonIE,
+)
+from .animeondemand import AnimeOnDemandIE
+from .anvato import AnvatoIE
+from .aol import AolIE
+from .allocine import AllocineIE
+from .aliexpress import AliExpressLiveIE
+from .alsace20tv import (
+ Alsace20TVIE,
+ Alsace20TVEmbedIE,
+)
+from .apa import APAIE
+from .aparat import AparatIE
+from .appleconnect import AppleConnectIE
+from .appletrailers import (
+ AppleTrailersIE,
+ AppleTrailersSectionIE,
+)
+from .applepodcasts import ApplePodcastsIE
+from .archiveorg import (
+ ArchiveOrgIE,
+ YoutubeWebArchiveIE,
+)
+from .arcpublishing import ArcPublishingIE
+from .arkena import ArkenaIE
+from .ard import (
+ ARDBetaMediathekIE,
+ ARDIE,
+ ARDMediathekIE,
+)
+from .arte import (
+ ArteTVIE,
+ ArteTVEmbedIE,
+ ArteTVPlaylistIE,
+ ArteTVCategoryIE,
+)
+from .arnes import ArnesIE
+from .asiancrush import (
+ AsianCrushIE,
+ AsianCrushPlaylistIE,
+)
+from .atresplayer import AtresPlayerIE
+from .atscaleconf import AtScaleConfEventIE
+from .atttechchannel import ATTTechChannelIE
+from .atvat import ATVAtIE
+from .audimedia import AudiMediaIE
+from .audioboom import AudioBoomIE
+from .audiomack import AudiomackIE, AudiomackAlbumIE
+from .audius import (
+ AudiusIE,
+ AudiusTrackIE,
+ AudiusPlaylistIE,
+ AudiusProfileIE,
+)
+from .awaan import (
+ AWAANIE,
+ AWAANVideoIE,
+ AWAANLiveIE,
+ AWAANSeasonIE,
+)
+from .azmedien import AZMedienIE
+from .baidu import BaiduVideoIE
+from .banbye import (
+ BanByeIE,
+ BanByeChannelIE,
+)
+from .bandaichannel import BandaiChannelIE
+from .bandcamp import (
+ BandcampIE,
+ BandcampAlbumIE,
+ BandcampWeeklyIE,
+ BandcampUserIE,
+)
+from .bannedvideo import BannedVideoIE
+from .bbc import (
+ BBCCoUkIE,
+ BBCCoUkArticleIE,
+ BBCCoUkIPlayerEpisodesIE,
+ BBCCoUkIPlayerGroupIE,
+ BBCCoUkPlaylistIE,
+ BBCIE,
+)
+from .beeg import BeegIE
+from .behindkink import BehindKinkIE
+from .bellmedia import BellMediaIE
+from .beatport import BeatportIE
+from .bet import BetIE
+from .bfi import BFIPlayerIE
+from .bfmtv import (
+ BFMTVIE,
+ BFMTVLiveIE,
+ BFMTVArticleIE,
+)
+from .bibeltv import BibelTVIE
+from .bigflix import BigflixIE
+from .bigo import BigoIE
+from .bild import BildIE
+from .bilibili import (
+ BiliBiliIE,
+ BiliBiliSearchIE,
+ BilibiliCategoryIE,
+ BiliBiliBangumiIE,
+ BilibiliAudioIE,
+ BilibiliAudioAlbumIE,
+ BiliBiliPlayerIE,
+ BilibiliChannelIE,
+ BiliIntlIE,
+ BiliIntlSeriesIE,
+ BiliLiveIE,
+)
+from .biobiochiletv import BioBioChileTVIE
+from .bitchute import (
+ BitChuteIE,
+ BitChuteChannelIE,
+)
+from .bitwave import (
+ BitwaveReplayIE,
+ BitwaveStreamIE,
+)
+from .biqle import BIQLEIE
+from .blackboardcollaborate import BlackboardCollaborateIE
+from .bleacherreport import (
+ BleacherReportIE,
+ BleacherReportCMSIE,
+)
+from .blogger import BloggerIE
+from .bloomberg import BloombergIE
+from .bokecc import BokeCCIE
+from .bongacams import BongaCamsIE
+from .bostonglobe import BostonGlobeIE
+from .box import BoxIE
+from .bpb import BpbIE
+from .br import (
+ BRIE,
+ BRMediathekIE,
+)
+from .bravotv import BravoTVIE
+from .breakcom import BreakIE
+from .breitbart import BreitBartIE
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
+from .businessinsider import BusinessInsiderIE
+from .buzzfeed import BuzzFeedIE
+from .byutv import BYUtvIE
+from .c56 import C56IE
+from .cableav import CableAVIE
+from .callin import CallinIE
+from .caltrans import CaltransIE
+from .cam4 import CAM4IE
+from .camdemy import (
+ CamdemyIE,
+ CamdemyFolderIE
+)
+from .cammodels import CamModelsIE
+from .camwithher import CamWithHerIE
+from .canalalpha import CanalAlphaIE
+from .canalplus import CanalplusIE
+from .canalc2 import Canalc2IE
+from .canvas import (
+ CanvasIE,
+ CanvasEenIE,
+ VrtNUIE,
+ DagelijkseKostIE,
+)
+from .carambatv import (
+ CarambaTVIE,
+ CarambaTVPageIE,
+)
+from .cartoonnetwork import CartoonNetworkIE
+from .cbc import (
+ CBCIE,
+ CBCPlayerIE,
+ CBCGemIE,
+ CBCGemPlaylistIE,
+ CBCGemLiveIE,
+)
+from .cbs import CBSIE
+from .cbslocal import (
+ CBSLocalIE,
+ CBSLocalArticleIE,
+)
+from .cbsinteractive import CBSInteractiveIE
+from .cbsnews import (
+ CBSNewsEmbedIE,
+ CBSNewsIE,
+ CBSNewsLiveVideoIE,
+)
+from .cbssports import (
+ CBSSportsEmbedIE,
+ CBSSportsIE,
+ TwentyFourSevenSportsIE,
+)
+from .ccc import (
+ CCCIE,
+ CCCPlaylistIE,
+)
+from .ccma import CCMAIE
+from .cctv import CCTVIE
+from .cda import CDAIE
+from .ceskatelevize import CeskaTelevizeIE
+from .cgtn import CGTNIE
+from .channel9 import Channel9IE
+from .charlierose import CharlieRoseIE
+from .chaturbate import ChaturbateIE
+from .chilloutzone import ChilloutzoneIE
+from .chingari import (
+ ChingariIE,
+ ChingariUserIE,
+)
+from .chirbit import (
+ ChirbitIE,
+ ChirbitProfileIE,
+)
+from .cinchcast import CinchcastIE
+from .cinemax import CinemaxIE
+from .ciscolive import (
+ CiscoLiveSessionIE,
+ CiscoLiveSearchIE,
+)
+from .ciscowebex import CiscoWebexIE
+from .cjsw import CJSWIE
+from .cliphunter import CliphunterIE
+from .clippit import ClippitIE
+from .cliprs import ClipRsIE
+from .clipsyndicate import ClipsyndicateIE
+from .closertotruth import CloserToTruthIE
+from .cloudflarestream import CloudflareStreamIE
+from .cloudy import CloudyIE
+from .clubic import ClubicIE
+from .clyp import ClypIE
+from .cmt import CMTIE
+from .cnbc import (
+ CNBCIE,
+ CNBCVideoIE,
+)
+from .cnn import (
+ CNNIE,
+ CNNBlogsIE,
+ CNNArticleIE,
+)
+from .coub import CoubIE
+from .comedycentral import (
+ ComedyCentralIE,
+ ComedyCentralTVIE,
+)
+from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
+from .commonprotocols import (
+ MmsIE,
+ RtmpIE,
+ ViewSourceIE,
+)
+from .condenast import CondeNastIE
+from .contv import CONtvIE
+from .corus import CorusIE
+from .cpac import (
+ CPACIE,
+ CPACPlaylistIE,
+)
+from .cozytv import CozyTVIE
+from .cracked import CrackedIE
+from .crackle import CrackleIE
+from .craftsy import CraftsyIE
+from .crooksandliars import CrooksAndLiarsIE
+from .crowdbunker import (
+ CrowdBunkerIE,
+ CrowdBunkerChannelIE,
+)
+from .crunchyroll import (
+ CrunchyrollIE,
+ CrunchyrollShowPlaylistIE,
+ CrunchyrollBetaIE,
+ CrunchyrollBetaShowIE,
+)
+from .cspan import CSpanIE, CSpanCongressIE
+from .ctsnews import CtsNewsIE
+from .ctv import CTVIE
+from .ctvnews import CTVNewsIE
+from .cultureunplugged import CultureUnpluggedIE
+from .curiositystream import (
+ CuriosityStreamIE,
+ CuriosityStreamCollectionsIE,
+ CuriosityStreamSeriesIE,
+)
+from .cwtv import CWTVIE
+from .cybrary import (
+ CybraryIE,
+ CybraryCourseIE
+)
+from .daftsex import DaftsexIE
+from .dailymail import DailyMailIE
+from .dailymotion import (
+ DailymotionIE,
+ DailymotionPlaylistIE,
+ DailymotionUserIE,
+)
+from .dailywire import (
+ DailyWireIE,
+ DailyWirePodcastIE,
+)
+from .damtomo import (
+ DamtomoRecordIE,
+ DamtomoVideoIE,
+)
+from .daum import (
+ DaumIE,
+ DaumClipIE,
+ DaumPlaylistIE,
+ DaumUserIE,
+)
+from .daystar import DaystarClipIE
+from .dbtv import DBTVIE
+from .dctp import DctpTvIE
+from .deezer import (
+ DeezerPlaylistIE,
+ DeezerAlbumIE,
+)
+from .democracynow import DemocracynowIE
+from .dfb import DFBIE
+from .dhm import DHMIE
+from .digg import DiggIE
+from .dotsub import DotsubIE
+from .douyutv import (
+ DouyuShowIE,
+ DouyuTVIE,
+)
+from .dplay import (
+ DPlayIE,
+ DiscoveryPlusIE,
+ HGTVDeIE,
+ GoDiscoveryIE,
+ TravelChannelIE,
+ CookingChannelIE,
+ HGTVUsaIE,
+ FoodNetworkIE,
+ InvestigationDiscoveryIE,
+ DestinationAmericaIE,
+ AmHistoryChannelIE,
+ ScienceChannelIE,
+ DIYNetworkIE,
+ DiscoveryLifeIE,
+ AnimalPlanetIE,
+ TLCIE,
+ DiscoveryPlusIndiaIE,
+ DiscoveryNetworksDeIE,
+ DiscoveryPlusItalyIE,
+ DiscoveryPlusItalyShowIE,
+ DiscoveryPlusIndiaShowIE,
+)
+from .dreisat import DreiSatIE
+from .drbonanza import DRBonanzaIE
+from .drtuber import DrTuberIE
+from .drtv import (
+ DRTVIE,
+ DRTVLiveIE,
+)
+from .dtube import DTubeIE
+from .dvtv import DVTVIE
+from .duboku import (
+ DubokuIE,
+ DubokuPlaylistIE
+)
+from .dumpert import DumpertIE
+from .defense import DefenseGouvFrIE
+from .digitalconcerthall import DigitalConcertHallIE
+from .discovery import DiscoveryIE
+from .disney import DisneyIE
+from .dispeak import DigitallySpeakingIE
+from .doodstream import DoodStreamIE
+from .dropbox import DropboxIE
+from .dropout import (
+ DropoutSeasonIE,
+ DropoutIE
+)
+from .dw import (
+ DWIE,
+ DWArticleIE,
+)
+from .eagleplatform import EaglePlatformIE
+from .ebaumsworld import EbaumsWorldIE
+from .echomsk import EchoMskIE
+from .egghead import (
+ EggheadCourseIE,
+ EggheadLessonIE,
+)
+from .ehow import EHowIE
+from .eighttracks import EightTracksIE
+from .einthusan import EinthusanIE
+from .eitb import EitbIE
+from .ellentube import (
+ EllenTubeIE,
+ EllenTubeVideoIE,
+ EllenTubePlaylistIE,
+)
+from .elonet import ElonetIE
+from .elpais import ElPaisIE
+from .embedly import EmbedlyIE
+from .engadget import EngadgetIE
+from .epicon import (
+ EpiconIE,
+ EpiconSeriesIE,
+)
+from .eporner import EpornerIE
+from .eroprofile import (
+ EroProfileIE,
+ EroProfileAlbumIE,
+)
+from .ertgr import (
+ ERTFlixCodenameIE,
+ ERTFlixIE,
+ ERTWebtvEmbedIE,
+)
+from .escapist import EscapistIE
+from .espn import (
+ ESPNIE,
+ WatchESPNIE,
+ ESPNArticleIE,
+ FiveThirtyEightIE,
+ ESPNCricInfoIE,
+)
+from .esri import EsriVideoIE
+from .europa import EuropaIE
+from .europeantour import EuropeanTourIE
+from .euscreen import EUScreenIE
+from .expotv import ExpoTVIE
+from .expressen import ExpressenIE
+from .extremetube import ExtremeTubeIE
+from .eyedotv import EyedoTVIE
+from .facebook import (
+ FacebookIE,
+ FacebookPluginsVideoIE,
+ FacebookRedirectURLIE,
+)
+from .fancode import (
+ FancodeVodIE,
+ FancodeLiveIE
+)
+
+from .faz import FazIE
+from .fc2 import (
+ FC2IE,
+ FC2EmbedIE,
+ FC2LiveIE,
+)
+from .fczenit import FczenitIE
+from .fifa import FifaIE
+from .filmmodu import FilmmoduIE
+from .filmon import (
+ FilmOnIE,
+ FilmOnChannelIE,
+)
+from .filmweb import FilmwebIE
+from .firsttv import FirstTVIE
+from .fivetv import FiveTVIE
+from .flickr import FlickrIE
+from .folketinget import FolketingetIE
+from .footyroom import FootyRoomIE
+from .formula1 import Formula1IE
+from .fourtube import (
+ FourTubeIE,
+ PornTubeIE,
+ PornerBrosIE,
+ FuxIE,
+)
+from .fourzerostudio import (
+ FourZeroStudioArchiveIE,
+ FourZeroStudioClipIE,
+)
+from .fox import FOXIE
+from .fox9 import (
+ FOX9IE,
+ FOX9NewsIE,
+)
+from .foxgay import FoxgayIE
+from .foxnews import (
+ FoxNewsIE,
+ FoxNewsArticleIE,
+)
+from .foxsports import FoxSportsIE
+from .fptplay import FptplayIE
+from .franceinter import FranceInterIE
+from .francetv import (
+ FranceTVIE,
+ FranceTVSiteIE,
+ FranceTVInfoIE,
+)
+from .freesound import FreesoundIE
+from .freespeech import FreespeechIE
+from .frontendmasters import (
+ FrontendMastersIE,
+ FrontendMastersLessonIE,
+ FrontendMastersCourseIE
+)
+from .freetv import (
+ FreeTvIE,
+ FreeTvMoviesIE,
+)
+from .fujitv import FujiTVFODPlus7IE
+from .funimation import (
+ FunimationIE,
+ FunimationPageIE,
+ FunimationShowIE,
+)
+from .funk import FunkIE
+from .fusion import FusionIE
+from .fuyintv import FuyinTVIE
+from .gab import (
+ GabTVIE,
+ GabIE,
+)
+from .gaia import GaiaIE
+from .gameinformer import GameInformerIE
+from .gamejolt import (
+ GameJoltIE,
+ GameJoltUserIE,
+ GameJoltGameIE,
+ GameJoltGameSoundtrackIE,
+ GameJoltCommunityIE,
+ GameJoltSearchIE,
+)
+from .gamespot import GameSpotIE
+from .gamestar import GameStarIE
+from .gaskrank import GaskrankIE
+from .gazeta import GazetaIE
+from .gdcvault import GDCVaultIE
+from .gedidigital import GediDigitalIE
+from .generic import GenericIE
+from .gettr import (
+ GettrIE,
+ GettrStreamingIE,
+)
+from .gfycat import GfycatIE
+from .giantbomb import GiantBombIE
+from .giga import GigaIE
+from .glide import GlideIE
+from .globo import (
+ GloboIE,
+ GloboArticleIE,
+)
+from .go import GoIE
+from .godtube import GodTubeIE
+from .gofile import GofileIE
+from .golem import GolemIE
+from .goodgame import GoodGameIE
+from .googledrive import (
+ GoogleDriveIE,
+ GoogleDriveFolderIE,
+)
+from .googlepodcasts import (
+ GooglePodcastsIE,
+ GooglePodcastsFeedIE,
+)
+from .googlesearch import GoogleSearchIE
+from .gopro import GoProIE
+from .goshgay import GoshgayIE
+from .gotostage import GoToStageIE
+from .gputechconf import GPUTechConfIE
+from .gronkh import (
+ GronkhIE,
+ GronkhFeedIE,
+ GronkhVodsIE
+)
+from .groupon import GrouponIE
+from .hbo import HBOIE
+from .hearthisat import HearThisAtIE
+from .heise import HeiseIE
+from .hellporno import HellPornoIE
+from .helsinki import HelsinkiIE
+from .hentaistigma import HentaiStigmaIE
+from .hgtv import HGTVComShowIE
+from .hketv import HKETVIE
+from .hidive import HiDiveIE
+from .historicfilms import HistoricFilmsIE
+from .hitbox import HitboxIE, HitboxLiveIE
+from .hitrecord import HitRecordIE
+from .hotnewhiphop import HotNewHipHopIE
+from .hotstar import (
+ HotStarIE,
+ HotStarPrefixIE,
+ HotStarPlaylistIE,
+ HotStarSeriesIE,
+)
+from .howcast import HowcastIE
+from .howstuffworks import HowStuffWorksIE
+from .hrfensehen import HRFernsehenIE
+from .hrti import (
+ HRTiIE,
+ HRTiPlaylistIE,
+)
+from .hse import (
+ HSEShowIE,
+ HSEProductIE,
+)
+from .huajiao import HuajiaoIE
+from .huya import HuyaLiveIE
+from .huffpost import HuffPostIE
+from .hungama import (
+ HungamaIE,
+ HungamaSongIE,
+ HungamaAlbumPlaylistIE,
+)
+from .hypem import HypemIE
+from .icareus import IcareusIE
+from .ichinanalive import (
+ IchinanaLiveIE,
+ IchinanaLiveClipIE,
+)
+from .ign import (
+ IGNIE,
+ IGNVideoIE,
+ IGNArticleIE,
+)
+from .iheart import (
+ IHeartRadioIE,
+ IHeartRadioPodcastIE,
+)
+from .imdb import (
+ ImdbIE,
+ ImdbListIE
+)
+from .imgur import (
+ ImgurIE,
+ ImgurAlbumIE,
+ ImgurGalleryIE,
+)
+from .ina import InaIE
+from .inc import IncIE
+from .indavideo import IndavideoEmbedIE
+from .infoq import InfoQIE
+from .instagram import (
+ InstagramIE,
+ InstagramIOSIE,
+ InstagramUserIE,
+ InstagramTagIE,
+ InstagramStoryIE,
+)
+from .internazionale import InternazionaleIE
+from .internetvideoarchive import InternetVideoArchiveIE
+from .iprima import (
+ IPrimaIE,
+ IPrimaCNNIE
+)
+from .iqiyi import (
+ IqiyiIE,
+ IqIE,
+ IqAlbumIE
+)
+from .itprotv import (
+ ITProTVIE,
+ ITProTVCourseIE
+)
+from .itv import (
+ ITVIE,
+ ITVBTCCIE,
+)
+from .ivi import (
+ IviIE,
+ IviCompilationIE
+)
+from .ivideon import IvideonIE
+from .iwara import (
+ IwaraIE,
+ IwaraPlaylistIE,
+ IwaraUserIE,
+)
+from .ixigua import IxiguaIE
+from .izlesene import IzleseneIE
+from .jable import (
+ JableIE,
+ JablePlaylistIE,
+)
+from .jamendo import (
+ JamendoIE,
+ JamendoAlbumIE,
+)
+from .jeuxvideo import JeuxVideoIE
+from .jove import JoveIE
+from .joj import JojIE
+from .jwplatform import JWPlatformIE
+from .kakao import KakaoIE
+from .kaltura import KalturaIE
+from .karaoketv import KaraoketvIE
+from .karrierevideos import KarriereVideosIE
+from .keezmovies import KeezMoviesIE
+from .kelbyone import KelbyOneIE
+from .ketnet import KetnetIE
+from .khanacademy import (
+ KhanAcademyIE,
+ KhanAcademyUnitIE,
+)
+from .kicker import KickerIE
+from .kickstarter import KickStarterIE
+from .kinja import KinjaEmbedIE
+from .kinopoisk import KinoPoiskIE
+from .konserthusetplay import KonserthusetPlayIE
+from .koo import KooIE
+from .kth import KTHIE
+from .krasview import KrasViewIE
+from .ku6 import Ku6IE
+from .kusi import KUSIIE
+from .kuwo import (
+ KuwoIE,
+ KuwoAlbumIE,
+ KuwoChartIE,
+ KuwoSingerIE,
+ KuwoCategoryIE,
+ KuwoMvIE,
+)
+from .la7 import (
+ LA7IE,
+ LA7PodcastEpisodeIE,
+ LA7PodcastIE,
+)
+from .laola1tv import (
+ Laola1TvEmbedIE,
+ Laola1TvIE,
+ EHFTVIE,
+ ITTFIE,
+)
+from .lastfm import (
+ LastFMIE,
+ LastFMPlaylistIE,
+ LastFMUserIE,
+)
+from .lbry import (
+ LBRYIE,
+ LBRYChannelIE,
+)
+from .lci import LCIIE
+from .lcp import (
+ LcpPlayIE,
+ LcpIE,
+)
+from .lecture2go import Lecture2GoIE
+from .lecturio import (
+ LecturioIE,
+ LecturioCourseIE,
+ LecturioDeCourseIE,
+)
+from .leeco import (
+ LeIE,
+ LePlaylistIE,
+ LetvCloudIE,
+)
+from .lego import LEGOIE
+from .lemonde import LemondeIE
+from .lenta import LentaIE
+from .libraryofcongress import LibraryOfCongressIE
+from .libsyn import LibsynIE
+from .lifenews import (
+ LifeNewsIE,
+ LifeEmbedIE,
+)
+from .likee import (
+ LikeeIE,
+ LikeeUserIE
+)
+from .limelight import (
+ LimelightMediaIE,
+ LimelightChannelIE,
+ LimelightChannelListIE,
+)
+from .line import (
+ LineLiveIE,
+ LineLiveChannelIE,
+)
+from .linkedin import (
+ LinkedInIE,
+ LinkedInLearningIE,
+ LinkedInLearningCourseIE,
+)
+from .linuxacademy import LinuxAcademyIE
+from .litv import LiTVIE
+from .livejournal import LiveJournalIE
+from .livestream import (
+ LivestreamIE,
+ LivestreamOriginalIE,
+ LivestreamShortenerIE,
+)
+from .lnkgo import (
+ LnkGoIE,
+ LnkIE,
+)
+from .localnews8 import LocalNews8IE
+from .lovehomeporn import LoveHomePornIE
+from .lrt import (
+ LRTVODIE,
+ LRTStreamIE
+)
+from .lynda import (
+ LyndaIE,
+ LyndaCourseIE
+)
+from .m6 import M6IE
+from .magentamusik360 import MagentaMusik360IE
+from .mailru import (
+ MailRuIE,
+ MailRuMusicIE,
+ MailRuMusicSearchIE,
+)
+from .mainstreaming import MainStreamingIE
+from .malltv import MallTVIE
+from .mangomolo import (
+ MangomoloVideoIE,
+ MangomoloLiveIE,
+)
+from .manoto import (
+ ManotoTVIE,
+ ManotoTVShowIE,
+ ManotoTVLiveIE,
+)
+from .manyvids import ManyVidsIE
+from .maoritv import MaoriTVIE
+from .markiza import (
+ MarkizaIE,
+ MarkizaPageIE,
+)
+from .massengeschmacktv import MassengeschmackTVIE
+from .masters import MastersIE
+from .matchtv import MatchTVIE
+from .mdr import MDRIE
+from .medaltv import MedalTVIE
+from .mediaite import MediaiteIE
+from .mediaklikk import MediaKlikkIE
+from .mediaset import (
+ MediasetIE,
+ MediasetShowIE,
+)
+from .mediasite import (
+ MediasiteIE,
+ MediasiteCatalogIE,
+ MediasiteNamedCatalogIE,
+)
+from .medici import MediciIE
+from .megaphone import MegaphoneIE
+from .meipai import MeipaiIE
+from .melonvod import MelonVODIE
+from .meta import METAIE
+from .metacafe import MetacafeIE
+from .metacritic import MetacriticIE
+from .mgoon import MgoonIE
+from .mgtv import MGTVIE
+from .miaopai import MiaoPaiIE
+from .microsoftstream import MicrosoftStreamIE
+from .microsoftvirtualacademy import (
+ MicrosoftVirtualAcademyIE,
+ MicrosoftVirtualAcademyCourseIE,
+)
+from .mildom import (
+ MildomIE,
+ MildomVodIE,
+ MildomClipIE,
+ MildomUserVodIE,
+)
+from .minds import (
+ MindsIE,
+ MindsChannelIE,
+ MindsGroupIE,
+)
+from .ministrygrid import MinistryGridIE
+from .minoto import MinotoIE
+from .miomio import MioMioIE
+from .mirrativ import (
+ MirrativIE,
+ MirrativUserIE,
+)
+from .mirrorcouk import MirrorCoUKIE
+from .mit import TechTVMITIE, OCWMITIE
+from .mitele import MiTeleIE
+from .mixch import (
+ MixchIE,
+ MixchArchiveIE,
+)
+from .mixcloud import (
+ MixcloudIE,
+ MixcloudUserIE,
+ MixcloudPlaylistIE,
+)
+from .mlb import (
+ MLBIE,
+ MLBVideoIE,
+)
+from .mlssoccer import MLSSoccerIE
+from .mnet import MnetIE
+from .moevideo import MoeVideoIE
+from .mofosex import (
+ MofosexIE,
+ MofosexEmbedIE,
+)
+from .mojvideo import MojvideoIE
+from .morningstar import MorningstarIE
+from .motherless import (
+ MotherlessIE,
+ MotherlessGroupIE
+)
+from .motorsport import MotorsportIE
+from .movieclips import MovieClipsIE
+from .moviepilot import MoviepilotIE
+from .moviezine import MoviezineIE
+from .movingimage import MovingImageIE
+from .msn import MSNIE
+from .mtv import (
+ MTVIE,
+ MTVVideoIE,
+ MTVServicesEmbeddedIE,
+ MTVDEIE,
+ MTVJapanIE,
+ MTVItaliaIE,
+ MTVItaliaProgrammaIE,
+)
+from .muenchentv import MuenchenTVIE
+from .murrtube import MurrtubeIE, MurrtubeUserIE
+from .musescore import MuseScoreIE
+from .musicdex import (
+ MusicdexSongIE,
+ MusicdexAlbumIE,
+ MusicdexArtistIE,
+ MusicdexPlaylistIE,
+)
+from .mwave import MwaveIE, MwaveMeetGreetIE
+from .mxplayer import (
+ MxplayerIE,
+ MxplayerShowIE,
+)
+from .mychannels import MyChannelsIE
+from .myspace import MySpaceIE, MySpaceAlbumIE
+from .myspass import MySpassIE
+from .myvi import (
+ MyviIE,
+ MyviEmbedIE,
+)
+from .myvideoge import MyVideoGeIE
+from .myvidster import MyVidsterIE
+from .n1 import (
+ N1InfoAssetIE,
+ N1InfoIIE,
+)
+from .nate import (
+ NateIE,
+ NateProgramIE,
+)
+from .nationalgeographic import (
+ NationalGeographicVideoIE,
+ NationalGeographicTVIE,
+)
+from .naver import (
+ NaverIE,
+ NaverLiveIE,
+ NaverNowIE,
+)
+from .nba import (
+ NBAWatchEmbedIE,
+ NBAWatchIE,
+ NBAWatchCollectionIE,
+ NBAEmbedIE,
+ NBAIE,
+ NBAChannelIE,
+)
+from .nbc import (
+ NBCIE,
+ NBCNewsIE,
+ NBCOlympicsIE,
+ NBCOlympicsStreamIE,
+ NBCSportsIE,
+ NBCSportsStreamIE,
+ NBCSportsVPlayerIE,
+)
+from .ndr import (
+ NDRIE,
+ NJoyIE,
+ NDREmbedBaseIE,
+ NDREmbedIE,
+ NJoyEmbedIE,
+)
+from .ndtv import NDTVIE
+from .nebula import (
+ NebulaIE,
+ NebulaSubscriptionsIE,
+ NebulaChannelIE,
+)
+from .nerdcubed import NerdCubedFeedIE
+from .netzkino import NetzkinoIE
+from .neteasemusic import (
+ NetEaseMusicIE,
+ NetEaseMusicAlbumIE,
+ NetEaseMusicSingerIE,
+ NetEaseMusicListIE,
+ NetEaseMusicMvIE,
+ NetEaseMusicProgramIE,
+ NetEaseMusicDjRadioIE,
+)
+from .netverse import (
+ NetverseIE,
+ NetversePlaylistIE,
+)
+from .newgrounds import (
+ NewgroundsIE,
+ NewgroundsPlaylistIE,
+ NewgroundsUserIE,
+)
+from .newstube import NewstubeIE
+from .newsy import NewsyIE
+from .nextmedia import (
+ NextMediaIE,
+ NextMediaActionNewsIE,
+ AppleDailyIE,
+ NextTVIE,
+)
+from .nexx import (
+ NexxIE,
+ NexxEmbedIE,
+)
+from .nfb import NFBIE
+from .nfhsnetwork import NFHSNetworkIE
+from .nfl import (
+ NFLIE,
+ NFLArticleIE,
+)
+from .nhk import (
+ NhkVodIE,
+ NhkVodProgramIE,
+ NhkForSchoolBangumiIE,
+ NhkForSchoolSubjectIE,
+ NhkForSchoolProgramListIE,
+)
+from .nhl import NHLIE
+from .nick import (
+ NickIE,
+ NickBrIE,
+ NickDeIE,
+ NickNightIE,
+ NickRuIE,
+)
+from .niconico import (
+ NiconicoIE,
+ NiconicoPlaylistIE,
+ NiconicoUserIE,
+ NiconicoSeriesIE,
+ NiconicoHistoryIE,
+ NicovideoSearchDateIE,
+ NicovideoSearchIE,
+ NicovideoSearchURLIE,
+ NicovideoTagURLIE,
+)
+from .ninecninemedia import (
+ NineCNineMediaIE,
+ CPTwentyFourIE,
+)
+from .ninegag import NineGagIE
+from .ninenow import NineNowIE
+from .nintendo import NintendoIE
+from .nitter import NitterIE
+from .njpwworld import NJPWWorldIE
+from .nobelprize import NobelPrizeIE
+from .nonktube import NonkTubeIE
+from .noodlemagazine import NoodleMagazineIE
+from .noovo import NoovoIE
+from .normalboots import NormalbootsIE
+from .nosvideo import NosVideoIE
+from .nova import (
+ NovaEmbedIE,
+ NovaIE,
+)
+from .novaplay import NovaPlayIE
+from .nowness import (
+ NownessIE,
+ NownessPlaylistIE,
+ NownessSeriesIE,
+)
+from .noz import NozIE
+from .npo import (
+ AndereTijdenIE,
+ NPOIE,
+ NPOLiveIE,
+ NPORadioIE,
+ NPORadioFragmentIE,
+ SchoolTVIE,
+ HetKlokhuisIE,
+ VPROIE,
+ WNLIE,
+)
+from .npr import NprIE
+from .nrk import (
+ NRKIE,
+ NRKPlaylistIE,
+ NRKSkoleIE,
+ NRKTVIE,
+ NRKTVDirekteIE,
+ NRKRadioPodkastIE,
+ NRKTVEpisodeIE,
+ NRKTVEpisodesIE,
+ NRKTVSeasonIE,
+ NRKTVSeriesIE,
+)
+from .nrl import NRLTVIE
+from .ntvcojp import NTVCoJpCUIE
+from .ntvde import NTVDeIE
+from .ntvru import NTVRuIE
+from .nytimes import (
+ NYTimesIE,
+ NYTimesArticleIE,
+ NYTimesCookingIE,
+)
+from .nuvid import NuvidIE
+from .nzherald import NZHeraldIE
+from .nzz import NZZIE
+from .odatv import OdaTVIE
+from .odnoklassniki import OdnoklassnikiIE
+from .oktoberfesttv import OktoberfestTVIE
+from .olympics import OlympicsReplayIE
+from .on24 import On24IE
+from .ondemandkorea import OnDemandKoreaIE
+from .onefootball import OneFootballIE
+from .onet import (
+ OnetIE,
+ OnetChannelIE,
+ OnetMVPIE,
+ OnetPlIE,
+)
+from .onionstudios import OnionStudiosIE
+from .ooyala import (
+ OoyalaIE,
+ OoyalaExternalIE,
+)
+from .opencast import (
+ OpencastIE,
+ OpencastPlaylistIE,
+)
+from .openrec import (
+ OpenRecIE,
+ OpenRecCaptureIE,
+ OpenRecMovieIE,
+)
+from .ora import OraTVIE
+from .orf import (
+ ORFTVthekIE,
+ ORFFM4IE,
+ ORFFM4StoryIE,
+ ORFOE1IE,
+ ORFOE3IE,
+ ORFNOEIE,
+ ORFWIEIE,
+ ORFBGLIE,
+ ORFOOEIE,
+ ORFSTMIE,
+ ORFKTNIE,
+ ORFSBGIE,
+ ORFTIRIE,
+ ORFVBGIE,
+ ORFIPTVIE,
+)
+from .outsidetv import OutsideTVIE
+from .packtpub import (
+ PacktPubIE,
+ PacktPubCourseIE,
+)
+from .palcomp3 import (
+ PalcoMP3IE,
+ PalcoMP3ArtistIE,
+ PalcoMP3VideoIE,
+)
+from .pandoratv import PandoraTVIE
+from .panopto import (
+ PanoptoIE,
+ PanoptoListIE,
+ PanoptoPlaylistIE
+)
+from .paramountplus import (
+ ParamountPlusIE,
+ ParamountPlusSeriesIE,
+)
+from .parliamentliveuk import ParliamentLiveUKIE
+from .parlview import ParlviewIE
+from .patreon import (
+ PatreonIE,
+ PatreonUserIE
+)
+from .pbs import PBSIE
+from .pearvideo import PearVideoIE
+from .peekvids import PeekVidsIE, PlayVidsIE
+from .peertube import (
+ PeerTubeIE,
+ PeerTubePlaylistIE,
+)
+from .peertv import PeerTVIE
+from .peloton import (
+ PelotonIE,
+ PelotonLiveIE
+)
+from .people import PeopleIE
+from .performgroup import PerformGroupIE
+from .periscope import (
+ PeriscopeIE,
+ PeriscopeUserIE,
+)
+from .philharmoniedeparis import PhilharmonieDeParisIE
+from .phoenix import PhoenixIE
+from .photobucket import PhotobucketIE
+from .piapro import PiaproIE
+from .picarto import (
+ PicartoIE,
+ PicartoVodIE,
+)
+from .piksel import PikselIE
+from .pinkbike import PinkbikeIE
+from .pinterest import (
+ PinterestIE,
+ PinterestCollectionIE,
+)
+from .pixivsketch import (
+ PixivSketchIE,
+ PixivSketchUserIE,
+)
+from .pladform import PladformIE
+from .planetmarathi import PlanetMarathiIE
+from .platzi import (
+ PlatziIE,
+ PlatziCourseIE,
+)
+from .playfm import PlayFMIE
+from .playplustv import PlayPlusTVIE
+from .plays import PlaysTVIE
+from .playstuff import PlayStuffIE
+from .playsuisse import PlaySuisseIE
+from .playtvak import PlaytvakIE
+from .playvid import PlayvidIE
+from .playwire import PlaywireIE
+from .plutotv import PlutoTVIE
+from .pluralsight import (
+ PluralsightIE,
+ PluralsightCourseIE,
+)
+from .podchaser import PodchaserIE
+from .podomatic import PodomaticIE
+from .pokemon import (
+ PokemonIE,
+ PokemonWatchIE,
+)
+from .pokergo import (
+ PokerGoIE,
+ PokerGoCollectionIE,
+)
+from .polsatgo import PolsatGoIE
+from .polskieradio import (
+ PolskieRadioIE,
+ PolskieRadioCategoryIE,
+ PolskieRadioPlayerIE,
+ PolskieRadioPodcastIE,
+ PolskieRadioPodcastListIE,
+ PolskieRadioRadioKierowcowIE,
+)
+from .popcorntimes import PopcorntimesIE
+from .popcorntv import PopcornTVIE
+from .porn91 import Porn91IE
+from .porncom import PornComIE
+from .pornflip import PornFlipIE
+from .pornhd import PornHdIE
+from .pornhub import (
+ PornHubIE,
+ PornHubUserIE,
+ PornHubPlaylistIE,
+ PornHubPagedVideoListIE,
+ PornHubUserVideosUploadIE,
+)
+from .pornotube import PornotubeIE
+from .pornovoisines import PornoVoisinesIE
+from .pornoxo import PornoXOIE
+from .pornez import PornezIE
+from .puhutv import (
+ PuhuTVIE,
+ PuhuTVSerieIE,
+)
+from .premiershiprugby import PremiershipRugbyIE
+from .presstv import PressTVIE
+from .projectveritas import ProjectVeritasIE
+from .prosiebensat1 import ProSiebenSat1IE
+from .prx import (
+ PRXStoryIE,
+ PRXSeriesIE,
+ PRXAccountIE,
+ PRXStoriesSearchIE,
+ PRXSeriesSearchIE
+)
+from .puls4 import Puls4IE
+from .pyvideo import PyvideoIE
+from .qqmusic import (
+ QQMusicIE,
+ QQMusicSingerIE,
+ QQMusicAlbumIE,
+ QQMusicToplistIE,
+ QQMusicPlaylistIE,
+)
+from .r7 import (
+ R7IE,
+ R7ArticleIE,
+)
+from .radiko import RadikoIE, RadikoRadioIE
+from .radiocanada import (
+ RadioCanadaIE,
+ RadioCanadaAudioVideoIE,
+)
+from .radiode import RadioDeIE
+from .radiojavan import RadioJavanIE
+from .radiobremen import RadioBremenIE
+from .radiofrance import FranceCultureIE, RadioFranceIE
+from .radiozet import RadioZetPodcastIE
+from .radiokapital import (
+ RadioKapitalIE,
+ RadioKapitalShowIE,
+)
+from .radlive import (
+ RadLiveIE,
+ RadLiveChannelIE,
+ RadLiveSeasonIE,
+)
+from .rai import (
+ RaiPlayIE,
+ RaiPlayLiveIE,
+ RaiPlayPlaylistIE,
+ RaiPlaySoundIE,
+ RaiPlaySoundLiveIE,
+ RaiPlaySoundPlaylistIE,
+ RaiIE,
+)
+from .raywenderlich import (
+ RayWenderlichIE,
+ RayWenderlichCourseIE,
+)
+from .rbmaradio import RBMARadioIE
+from .rcs import (
+ RCSIE,
+ RCSEmbedsIE,
+ RCSVariousIE,
+)
+from .rcti import (
+ RCTIPlusIE,
+ RCTIPlusSeriesIE,
+ RCTIPlusTVIE,
+)
+from .rds import RDSIE
+from .redbulltv import (
+ RedBullTVIE,
+ RedBullEmbedIE,
+ RedBullTVRrnContentIE,
+ RedBullIE,
+)
+from .reddit import RedditIE
+from .redgifs import (
+ RedGifsIE,
+ RedGifsSearchIE,
+ RedGifsUserIE,
+)
+from .redtube import RedTubeIE
+from .regiotv import RegioTVIE
+from .rentv import (
+ RENTVIE,
+ RENTVArticleIE,
+)
+from .restudy import RestudyIE
+from .reuters import ReutersIE
+from .reverbnation import ReverbNationIE
+from .rice import RICEIE
+from .rmcdecouverte import RMCDecouverteIE
+from .rockstargames import RockstarGamesIE
+from .rokfin import (
+ RokfinIE,
+ RokfinStackIE,
+ RokfinChannelIE,
+ RokfinSearchIE,
+)
+from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE
+from .rottentomatoes import RottenTomatoesIE
+from .rozhlas import RozhlasIE
+from .rtbf import RTBFIE
+from .rte import RteIE, RteRadioIE
+from .rtlnl import RtlNlIE
+from .rtl2 import (
+ RTL2IE,
+ RTL2YouIE,
+ RTL2YouSeriesIE,
+)
+from .rtnews import (
+ RTNewsIE,
+ RTDocumentryIE,
+ RTDocumentryPlaylistIE,
+ RuptlyIE,
+)
+from .rtp import RTPIE
+from .rtrfm import RTRFMIE
+from .rts import RTSIE
+from .rtve import (
+ RTVEALaCartaIE,
+ RTVEAudioIE,
+ RTVELiveIE,
+ RTVEInfantilIE,
+ RTVETelevisionIE,
+)
+from .rtvnh import RTVNHIE
+from .rtvs import RTVSIE
+from .ruhd import RUHDIE
+from .rule34video import Rule34VideoIE
+from .rumble import (
+ RumbleEmbedIE,
+ RumbleChannelIE,
+)
+from .rutube import (
+ RutubeIE,
+ RutubeChannelIE,
+ RutubeEmbedIE,
+ RutubeMovieIE,
+ RutubePersonIE,
+ RutubePlaylistIE,
+ RutubeTagsIE,
+)
+from .glomex import (
+ GlomexIE,
+ GlomexEmbedIE,
+)
+from .megatvcom import (
+ MegaTVComIE,
+ MegaTVComEmbedIE,
+)
+from .ant1newsgr import (
+ Ant1NewsGrWatchIE,
+ Ant1NewsGrArticleIE,
+ Ant1NewsGrEmbedIE,
+)
+from .rutv import RUTVIE
+from .ruutu import RuutuIE
+from .ruv import (
+ RuvIE,
+ RuvSpilaIE
+)
+from .safari import (
+ SafariIE,
+ SafariApiIE,
+ SafariCourseIE,
+)
+from .saitosan import SaitosanIE
+from .samplefocus import SampleFocusIE
+from .sapo import SapoIE
+from .savefrom import SaveFromIE
+from .sbs import SBSIE
+from .screencast import ScreencastIE
+from .screencastomatic import ScreencastOMaticIE
+from .scrippsnetworks import (
+ ScrippsNetworksWatchIE,
+ ScrippsNetworksIE,
+)
+from .scte import (
+ SCTEIE,
+ SCTECourseIE,
+)
+from .seeker import SeekerIE
+from .senategov import SenateISVPIE, SenateGovIE
+from .sendtonews import SendtoNewsIE
+from .servus import ServusIE
+from .sevenplus import SevenPlusIE
+from .sexu import SexuIE
+from .seznamzpravy import (
+ SeznamZpravyIE,
+ SeznamZpravyArticleIE,
+)
+from .shahid import (
+ ShahidIE,
+ ShahidShowIE,
+)
+from .shared import (
+ SharedIE,
+ VivoIE,
+)
+from .shemaroome import ShemarooMeIE
+from .showroomlive import ShowRoomLiveIE
+from .simplecast import (
+ SimplecastIE,
+ SimplecastEpisodeIE,
+ SimplecastPodcastIE,
+)
+from .sina import SinaIE
+from .sixplay import SixPlayIE
+from .skeb import SkebIE
+from .skyit import (
+ SkyItPlayerIE,
+ SkyItVideoIE,
+ SkyItVideoLiveIE,
+ SkyItIE,
+ SkyItAcademyIE,
+ SkyItArteIE,
+ CieloTVItIE,
+ TV8ItIE,
+)
+from .skylinewebcams import SkylineWebcamsIE
+from .skynewsarabia import (
+ SkyNewsArabiaIE,
+ SkyNewsArabiaArticleIE,
+)
+from .skynewsau import SkyNewsAUIE
+from .sky import (
+ SkyNewsIE,
+ SkyNewsStoryIE,
+ SkySportsIE,
+ SkySportsNewsIE,
+)
+from .slideshare import SlideshareIE
+from .slideslive import SlidesLiveIE
+from .slutload import SlutloadIE
+from .snotr import SnotrIE
+from .sohu import SohuIE
+from .sonyliv import (
+ SonyLIVIE,
+ SonyLIVSeriesIE,
+)
+from .soundcloud import (
+ SoundcloudEmbedIE,
+ SoundcloudIE,
+ SoundcloudSetIE,
+ SoundcloudRelatedIE,
+ SoundcloudUserIE,
+ SoundcloudTrackStationIE,
+ SoundcloudPlaylistIE,
+ SoundcloudSearchIE,
+)
+from .soundgasm import (
+ SoundgasmIE,
+ SoundgasmProfileIE
+)
+from .southpark import (
+ SouthParkIE,
+ SouthParkDeIE,
+ SouthParkDkIE,
+ SouthParkEsIE,
+ SouthParkLatIE,
+ SouthParkNlIE
+)
+from .sovietscloset import (
+ SovietsClosetIE,
+ SovietsClosetPlaylistIE
+)
+from .spankbang import (
+ SpankBangIE,
+ SpankBangPlaylistIE,
+)
+from .spankwire import SpankwireIE
+from .spiegel import SpiegelIE
+from .spike import (
+ BellatorIE,
+ ParamountNetworkIE,
+)
+from .stitcher import (
+ StitcherIE,
+ StitcherShowIE,
+)
+from .sport5 import Sport5IE
+from .sportbox import SportBoxIE
+from .sportdeutschland import SportDeutschlandIE
+from .spotify import (
+ SpotifyIE,
+ SpotifyShowIE,
+)
+from .spreaker import (
+ SpreakerIE,
+ SpreakerPageIE,
+ SpreakerShowIE,
+ SpreakerShowPageIE,
+)
+from .springboardplatform import SpringboardPlatformIE
+from .sprout import SproutIE
+from .srgssr import (
+ SRGSSRIE,
+ SRGSSRPlayIE,
+)
+from .srmediathek import SRMediathekIE
+from .stanfordoc import StanfordOpenClassroomIE
+from .startv import StarTVIE
+from .steam import SteamIE
+from .storyfire import (
+ StoryFireIE,
+ StoryFireUserIE,
+ StoryFireSeriesIE,
+)
+from .streamable import StreamableIE
+from .streamanity import StreamanityIE
+from .streamcloud import StreamcloudIE
+from .streamcz import StreamCZIE
+from .streamff import StreamFFIE
+from .streetvoice import StreetVoiceIE
+from .stretchinternet import StretchInternetIE
+from .stripchat import StripchatIE
+from .stv import STVPlayerIE
+from .substack import SubstackIE
+from .sunporno import SunPornoIE
+from .sverigesradio import (
+ SverigesRadioEpisodeIE,
+ SverigesRadioPublicationIE,
+)
+from .svt import (
+ SVTIE,
+ SVTPageIE,
+ SVTPlayIE,
+ SVTSeriesIE,
+)
+from .swrmediathek import SWRMediathekIE
+from .syfy import SyfyIE
+from .sztvhu import SztvHuIE
+from .tagesschau import TagesschauIE
+from .tass import TassIE
+from .tbs import TBSIE
+from .tdslifeway import TDSLifewayIE
+from .teachable import (
+ TeachableIE,
+ TeachableCourseIE,
+)
+from .teachertube import (
+ TeacherTubeIE,
+ TeacherTubeUserIE,
+)
+from .teachingchannel import TeachingChannelIE
+from .teamcoco import TeamcocoIE
+from .teamtreehouse import TeamTreeHouseIE
+from .techtalks import TechTalksIE
+from .ted import (
+ TedEmbedIE,
+ TedPlaylistIE,
+ TedSeriesIE,
+ TedTalkIE,
+)
+from .tele5 import Tele5IE
+from .tele13 import Tele13IE
+from .telebruxelles import TeleBruxellesIE
+from .telecinco import TelecincoIE
+from .telegraaf import TelegraafIE
+from .telegram import TelegramEmbedIE
+from .telemb import TeleMBIE
+from .telemundo import TelemundoIE
+from .telequebec import (
+ TeleQuebecIE,
+ TeleQuebecSquatIE,
+ TeleQuebecEmissionIE,
+ TeleQuebecLiveIE,
+ TeleQuebecVideoIE,
+)
+from .teletask import TeleTaskIE
+from .telewebion import TelewebionIE
+from .tennistv import TennisTVIE
+from .tenplay import TenPlayIE
+from .testurl import TestURLIE
+from .tf1 import TF1IE
+from .tfo import TFOIE
+from .theintercept import TheInterceptIE
+from .theplatform import (
+ ThePlatformIE,
+ ThePlatformFeedIE,
+)
+from .thestar import TheStarIE
+from .thesun import TheSunIE
+from .theta import (
+ ThetaVideoIE,
+ ThetaStreamIE,
+)
+from .theweatherchannel import TheWeatherChannelIE
+from .thisamericanlife import ThisAmericanLifeIE
+from .thisav import ThisAVIE
+from .thisoldhouse import ThisOldHouseIE
+from .threespeak import (
+ ThreeSpeakIE,
+ ThreeSpeakUserIE,
+)
+from .threeqsdn import ThreeQSDNIE
+from .tiktok import (
+ TikTokIE,
+ TikTokUserIE,
+ TikTokSoundIE,
+ TikTokEffectIE,
+ TikTokTagIE,
+ TikTokVMIE,
+ DouyinIE,
+)
+from .tinypic import TinyPicIE
+from .tmz import TMZIE
+from .tnaflix import (
+ TNAFlixNetworkEmbedIE,
+ TNAFlixIE,
+ EMPFlixIE,
+ MovieFapIE,
+)
+from .toggle import (
+ ToggleIE,
+ MeWatchIE,
+)
+from .toggo import (
+ ToggoIE,
+)
+from .tokentube import (
+ TokentubeIE,
+ TokentubeChannelIE
+)
+from .tonline import TOnlineIE
+from .toongoggles import ToonGogglesIE
+from .toutv import TouTvIE
+from .toypics import ToypicsUserIE, ToypicsIE
+from .traileraddict import TrailerAddictIE
+from .trilulilu import TriluliluIE
+from .trovo import (
+ TrovoIE,
+ TrovoVodIE,
+ TrovoChannelVodIE,
+ TrovoChannelClipIE,
+)
+from .trueid import TrueIDIE
+from .trunews import TruNewsIE
+from .trutv import TruTVIE
+from .tube8 import Tube8IE
+from .tubitv import (
+ TubiTvIE,
+ TubiTvShowIE,
+)
+from .tumblr import TumblrIE
+from .tunein import (
+ TuneInClipIE,
+ TuneInStationIE,
+ TuneInProgramIE,
+ TuneInTopicIE,
+ TuneInShortenerIE,
+)
+from .tunepk import TunePkIE
+from .turbo import TurboIE
+from .tv2 import (
+ TV2IE,
+ TV2ArticleIE,
+ KatsomoIE,
+ MTVUutisetArticleIE,
+)
+from .tv2dk import (
+ TV2DKIE,
+ TV2DKBornholmPlayIE,
+)
+from .tv2hu import (
+ TV2HuIE,
+ TV2HuSeriesIE,
+)
+from .tv4 import TV4IE
+from .tv5mondeplus import TV5MondePlusIE
+from .tv5unis import (
+ TV5UnisVideoIE,
+ TV5UnisIE,
+)
+from .tva import (
+ TVAIE,
+ QubIE,
+)
+from .tvanouvelles import (
+ TVANouvellesIE,
+ TVANouvellesArticleIE,
+)
+from .tvc import (
+ TVCIE,
+ TVCArticleIE,
+)
+from .tver import TVerIE
+from .tvigle import TvigleIE
+from .tvland import TVLandIE
+from .tvn24 import TVN24IE
+from .tvnet import TVNetIE
+from .tvnoe import TVNoeIE
+from .tvnow import (
+ TVNowIE,
+ TVNowFilmIE,
+ TVNowNewIE,
+ TVNowSeasonIE,
+ TVNowAnnualIE,
+ TVNowShowIE,
+)
+from .tvopengr import (
+ TVOpenGrWatchIE,
+ TVOpenGrEmbedIE,
+)
+from .tvp import (
+ TVPEmbedIE,
+ TVPIE,
+ TVPStreamIE,
+ TVPWebsiteIE,
+)
+from .tvplay import (
+ TVPlayIE,
+ ViafreeIE,
+ TVPlayHomeIE,
+)
+from .tvplayer import TVPlayerIE
+from .tweakers import TweakersIE
+from .twentyfourvideo import TwentyFourVideoIE
+from .twentymin import TwentyMinutenIE
+from .twentythreevideo import TwentyThreeVideoIE
+from .twitcasting import (
+ TwitCastingIE,
+ TwitCastingLiveIE,
+ TwitCastingUserIE,
+)
+from .twitch import (
+ TwitchVodIE,
+ TwitchCollectionIE,
+ TwitchVideosIE,
+ TwitchVideosClipsIE,
+ TwitchVideosCollectionsIE,
+ TwitchStreamIE,
+ TwitchClipsIE,
+)
+from .twitter import (
+ TwitterCardIE,
+ TwitterIE,
+ TwitterAmplifyIE,
+ TwitterBroadcastIE,
+ TwitterShortenerIE,
+)
+from .udemy import (
+ UdemyIE,
+ UdemyCourseIE
+)
+from .udn import UDNEmbedIE
+from .ufctv import (
+ UFCTVIE,
+ UFCArabiaIE,
+)
+from .ukcolumn import UkColumnIE
+from .uktvplay import UKTVPlayIE
+from .digiteka import DigitekaIE
+from .dlive import (
+ DLiveVODIE,
+ DLiveStreamIE,
+)
+from .drooble import DroobleIE
+from .umg import UMGDeIE
+from .unistra import UnistraIE
+from .unity import UnityIE
+from .uol import UOLIE
+from .uplynk import (
+ UplynkIE,
+ UplynkPreplayIE,
+)
+from .urort import UrortIE
+from .urplay import URPlayIE
+from .usanetwork import USANetworkIE
+from .usatoday import USATodayIE
+from .ustream import UstreamIE, UstreamChannelIE
+from .ustudio import (
+ UstudioIE,
+ UstudioEmbedIE,
+)
+from .utreon import UtreonIE
+from .varzesh3 import Varzesh3IE
+from .vbox7 import Vbox7IE
+from .veehd import VeeHDIE
+from .veo import VeoIE
+from .veoh import VeohIE
+from .vesti import VestiIE
+from .vevo import (
+ VevoIE,
+ VevoPlaylistIE,
+)
+from .vgtv import (
+ BTArticleIE,
+ BTVestlendingenIE,
+ VGTVIE,
+)
+from .vh1 import VH1IE
+from .vice import (
+ ViceIE,
+ ViceArticleIE,
+ ViceShowIE,
+)
+from .vidbit import VidbitIE
+from .viddler import ViddlerIE
+from .videa import VideaIE
+from .videocampus_sachsen import VideocampusSachsenIE
+from .videodetective import VideoDetectiveIE
+from .videofyme import VideofyMeIE
+from .videomore import (
+ VideomoreIE,
+ VideomoreVideoIE,
+ VideomoreSeasonIE,
+)
+from .videopress import VideoPressIE
+from .vidio import (
+ VidioIE,
+ VidioPremierIE,
+ VidioLiveIE
+)
+from .vidlii import VidLiiIE
+from .vier import VierIE, VierVideosIE
+from .viewlift import (
+ ViewLiftIE,
+ ViewLiftEmbedIE,
+)
+from .viidea import ViideaIE
+from .vimeo import (
+ VimeoIE,
+ VimeoAlbumIE,
+ VimeoChannelIE,
+ VimeoGroupsIE,
+ VimeoLikesIE,
+ VimeoOndemandIE,
+ VimeoReviewIE,
+ VimeoUserIE,
+ VimeoWatchLaterIE,
+ VHXEmbedIE,
+)
+from .vimm import (
+ VimmIE,
+ VimmRecordingIE,
+)
+from .vimple import VimpleIE
+from .vine import (
+ VineIE,
+ VineUserIE,
+)
+from .viki import (
+ VikiIE,
+ VikiChannelIE,
+)
+from .viqeo import ViqeoIE
+from .viu import (
+ ViuIE,
+ ViuPlaylistIE,
+ ViuOTTIE,
+)
+from .vk import (
+ VKIE,
+ VKUserVideosIE,
+ VKWallPostIE,
+)
+from .vlive import (
+ VLiveIE,
+ VLivePostIE,
+ VLiveChannelIE,
+)
+from .vodlocker import VodlockerIE
+from .vodpl import VODPlIE
+from .vodplatform import VODPlatformIE
+from .voicerepublic import VoiceRepublicIE
+from .voicy import (
+ VoicyIE,
+ VoicyChannelIE,
+)
+from .voot import (
+ VootIE,
+ VootSeriesIE,
+)
+from .voxmedia import (
+ VoxMediaVolumeIE,
+ VoxMediaIE,
+)
+from .vrt import VRTIE
+from .vrak import VrakIE
+from .vrv import (
+ VRVIE,
+ VRVSeriesIE,
+)
+from .vshare import VShareIE
+from .vtm import VTMIE
+from .medialaan import MedialaanIE
+from .vuclip import VuClipIE
+from .vupload import VuploadIE
+from .vvvvid import (
+ VVVVIDIE,
+ VVVVIDShowIE,
+)
+from .vyborymos import VyboryMosIE
+from .vzaar import VzaarIE
+from .wakanim import WakanimIE
+from .walla import WallaIE
+from .washingtonpost import (
+ WashingtonPostIE,
+ WashingtonPostArticleIE,
+)
+from .wasdtv import (
+ WASDTVStreamIE,
+ WASDTVRecordIE,
+ WASDTVClipIE,
+)
+from .wat import WatIE
+from .watchbox import WatchBoxIE
+from .watchindianporn import WatchIndianPornIE
+from .wdr import (
+ WDRIE,
+ WDRPageIE,
+ WDRElefantIE,
+ WDRMobileIE,
+)
+from .webcaster import (
+ WebcasterIE,
+ WebcasterFeedIE,
+)
+from .webofstories import (
+ WebOfStoriesIE,
+ WebOfStoriesPlaylistIE,
+)
+from .weibo import (
+ WeiboIE,
+ WeiboMobileIE
+)
+from .weiqitv import WeiqiTVIE
+from .willow import WillowIE
+from .wimtv import WimTVIE
+from .whowatch import WhoWatchIE
+from .wistia import (
+ WistiaIE,
+ WistiaPlaylistIE,
+)
+from .worldstarhiphop import WorldStarHipHopIE
+from .wppilot import (
+ WPPilotIE,
+ WPPilotChannelsIE,
+)
+from .wsj import (
+ WSJIE,
+ WSJArticleIE,
+)
+from .wwe import WWEIE
+from .xbef import XBefIE
+from .xboxclips import XboxClipsIE
+from .xfileshare import XFileShareIE
+from .xhamster import (
+ XHamsterIE,
+ XHamsterEmbedIE,
+ XHamsterUserIE,
+)
+from .xiami import (
+ XiamiSongIE,
+ XiamiAlbumIE,
+ XiamiArtistIE,
+ XiamiCollectionIE
+)
+from .ximalaya import (
+ XimalayaIE,
+ XimalayaAlbumIE
+)
+from .xinpianchang import XinpianchangIE
+from .xminus import XMinusIE
+from .xnxx import XNXXIE
+from .xstream import XstreamIE
+from .xtube import XTubeUserIE, XTubeIE
+from .xuite import XuiteIE
+from .xvideos import XVideosIE
+from .xxxymovies import XXXYMoviesIE
+from .yahoo import (
+ YahooIE,
+ YahooSearchIE,
+ YahooGyaOPlayerIE,
+ YahooGyaOIE,
+ YahooJapanNewsIE,
+)
+from .yandexdisk import YandexDiskIE
+from .yandexmusic import (
+ YandexMusicTrackIE,
+ YandexMusicAlbumIE,
+ YandexMusicPlaylistIE,
+ YandexMusicArtistTracksIE,
+ YandexMusicArtistAlbumsIE,
+)
+from .yandexvideo import (
+ YandexVideoIE,
+ YandexVideoPreviewIE,
+ ZenYandexIE,
+ ZenYandexChannelIE,
+)
+from .yapfiles import YapFilesIE
+from .yesjapan import YesJapanIE
+from .yinyuetai import YinYueTaiIE
+from .ynet import YnetIE
+from .youjizz import YouJizzIE
+from .youku import (
+ YoukuIE,
+ YoukuShowIE,
+)
+from .younow import (
+ YouNowLiveIE,
+ YouNowChannelIE,
+ YouNowMomentIE,
+)
+from .youporn import YouPornIE
+from .yourporn import YourPornIE
+from .yourupload import YourUploadIE
+from .youtube import (
+ YoutubeIE,
+ YoutubeClipIE,
+ YoutubeFavouritesIE,
+ YoutubeNotificationsIE,
+ YoutubeHistoryIE,
+ YoutubeTabIE,
+ YoutubeLivestreamEmbedIE,
+ YoutubePlaylistIE,
+ YoutubeRecommendedIE,
+ YoutubeSearchDateIE,
+ YoutubeSearchIE,
+ YoutubeSearchURLIE,
+ YoutubeMusicSearchURLIE,
+ YoutubeSubscriptionsIE,
+ YoutubeStoriesIE,
+ YoutubeTruncatedIDIE,
+ YoutubeTruncatedURLIE,
+ YoutubeYtBeIE,
+ YoutubeYtUserIE,
+ YoutubeWatchLaterIE,
+)
+from .zapiks import ZapiksIE
+from .zattoo import (
+ BBVTVIE,
+ EinsUndEinsTVIE,
+ EWETVIE,
+ GlattvisionTVIE,
+ MNetTVIE,
+ NetPlusIE,
+ OsnatelTVIE,
+ QuantumTVIE,
+ SaltTVIE,
+ SAKTVIE,
+ VTXTVIE,
+ WalyTVIE,
+ ZattooIE,
+ ZattooLiveIE,
+ ZattooMoviesIE,
+ ZattooRecordingsIE,
+)
+from .zdf import ZDFIE, ZDFChannelIE
+from .zee5 import (
+ Zee5IE,
+ Zee5SeriesIE,
+)
+from .zhihu import ZhihuIE
+from .zingmp3 import (
+ ZingMp3IE,
+ ZingMp3AlbumIE,
+ ZingMp3ChartHomeIE,
+ ZingMp3WeekChartIE,
+ ZingMp3ChartMusicVideoIE,
+ ZingMp3UserIE,
+)
+from .zoom import ZoomIE
+from .zype import ZypeIE
diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
index 1b9deeae8..a75efdd0f 100644
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@@ -7,16 +7,17 @@ import json
import re
import struct
import time
+import urllib.parse
+import urllib.request
import urllib.response
import uuid
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
-from ..compat import compat_urllib_parse_urlparse, compat_urllib_request
from ..utils import (
ExtractorError,
bytes_to_intlist,
- decode_base,
+ decode_base_n,
int_or_none,
intlist_to_bytes,
request_to_url,
@@ -33,7 +34,7 @@ def add_opener(ydl, handler):
''' Add a handler for opening URLs, like _download_webpage '''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
- assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
+ assert isinstance(ydl._opener, urllib.request.OpenerDirector)
ydl._opener.add_handler(handler)
@@ -46,7 +47,7 @@ def remove_opener(ydl, handler):
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
opener = ydl._opener
- assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
+ assert isinstance(ydl._opener, urllib.request.OpenerDirector)
if isinstance(handler, (type, tuple)):
find_cp = lambda x: isinstance(x, handler)
else:
@@ -96,7 +97,7 @@ def remove_opener(ydl, handler):
opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
-class AbemaLicenseHandler(compat_urllib_request.BaseHandler):
+class AbemaLicenseHandler(urllib.request.BaseHandler):
handler_order = 499
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
@@ -109,7 +110,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler):
self.ie = ie
def _get_videokey_from_ticket(self, ticket):
- to_show = self.ie._downloader.params.get('verbose', False)
+ to_show = self.ie.get_param('verbose', False)
media_token = self.ie._get_media_token(to_show=to_show)
license_response = self.ie._download_json(
@@ -123,7 +124,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler):
'Content-Type': 'application/json',
})
- res = decode_base(license_response['k'], self.STRTABLE)
+ res = decode_base_n(license_response['k'], table=self.STRTABLE)
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
h = hmac.new(
@@ -136,7 +137,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler):
def abematv_license_open(self, url):
url = request_to_url(url)
- ticket = compat_urllib_parse_urlparse(url).netloc
+ ticket = urllib.parse.urlparse(url).netloc
response_data = self._get_videokey_from_ticket(ticket)
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
'Content-Length': len(response_data),
diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py
index a8e6c4363..a2666c2b8 100644
--- a/yt_dlp/extractor/adobepass.py
+++ b/yt_dlp/extractor/adobepass.py
@@ -1,3 +1,4 @@
+import getpass
import json
import re
import time
@@ -5,19 +6,15 @@ import urllib.error
import xml.etree.ElementTree as etree
from .common import InfoExtractor
-from ..compat import (
- compat_urlparse,
- compat_getpass
-)
+from ..compat import compat_urlparse
from ..utils import (
+ NO_DEFAULT,
+ ExtractorError,
unescapeHTML,
- urlencode_postdata,
unified_timestamp,
- ExtractorError,
- NO_DEFAULT,
+ urlencode_postdata,
)
-
MSO_INFO = {
'DTV': {
'name': 'DIRECTV',
@@ -1431,7 +1428,7 @@ class AdobePassIE(InfoExtractor):
guid = xml_text(resource, 'guid') if '<' in resource else resource
count = 0
while count < 2:
- requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {}
+ requestor_info = self.cache.load(self._MVPD_CACHE, requestor_id) or {}
authn_token = requestor_info.get('authn_token')
if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
authn_token = None
@@ -1506,7 +1503,7 @@ class AdobePassIE(InfoExtractor):
'send_confirm_link': False,
'send_token': True
}))
- philo_code = compat_getpass('Type auth code you have received [Return]: ')
+ philo_code = getpass.getpass('Type auth code you have received [Return]: ')
self._download_webpage(
'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({
'token': philo_code
@@ -1726,12 +1723,12 @@ class AdobePassIE(InfoExtractor):
raise_mvpd_required()
raise
if '<pendingLogout' in session:
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+ self.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
requestor_info['authn_token'] = authn_token
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
+ self.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
authz_token = requestor_info.get(guid)
if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
@@ -1747,14 +1744,14 @@ class AdobePassIE(InfoExtractor):
'userMeta': '1',
}), headers=mvpd_headers)
if '<pendingLogout' in authorize:
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+ self.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
if '<error' in authorize:
raise ExtractorError(xml_text(authorize, 'details'), expected=True)
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
requestor_info[guid] = authz_token
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
+ self.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
mvpd_headers.update({
'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
@@ -1770,7 +1767,7 @@ class AdobePassIE(InfoExtractor):
'hashed_guid': 'false',
}), headers=mvpd_headers)
if '<pendingLogout' in short_authorize:
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+ self.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
return short_authorize
diff --git a/yt_dlp/extractor/animelab.py b/yt_dlp/extractor/animelab.py
deleted file mode 100644
index fe2b70aed..000000000
--- a/yt_dlp/extractor/animelab.py
+++ /dev/null
@@ -1,270 +0,0 @@
-from .common import InfoExtractor
-
-from ..utils import (
- ExtractorError,
- urlencode_postdata,
- int_or_none,
- str_or_none,
- determine_ext,
-)
-
-from ..compat import compat_HTTPError
-
-
-class AnimeLabBaseIE(InfoExtractor):
- _LOGIN_URL = 'https://www.animelab.com/login'
- _NETRC_MACHINE = 'animelab'
- _LOGGED_IN = False
-
- def _is_logged_in(self, login_page=None):
- if not self._LOGGED_IN:
- if not login_page:
- login_page = self._download_webpage(self._LOGIN_URL, None, 'Downloading login page')
- AnimeLabBaseIE._LOGGED_IN = 'Sign In' not in login_page
- return self._LOGGED_IN
-
- def _perform_login(self, username, password):
- if self._is_logged_in():
- return
-
- login_form = {
- 'email': username,
- 'password': password,
- }
-
- try:
- response = self._download_webpage(
- self._LOGIN_URL, None, 'Logging in', 'Wrong login info',
- data=urlencode_postdata(login_form),
- headers={'Content-Type': 'application/x-www-form-urlencoded'})
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
- raise ExtractorError('Unable to log in (wrong credentials?)', expected=True)
- raise
-
- if not self._is_logged_in(response):
- raise ExtractorError('Unable to login (cannot verify if logged in)')
-
- def _real_initialize(self):
- if not self._is_logged_in():
- self.raise_login_required('Login is required to access any AnimeLab content')
-
-
-class AnimeLabIE(AnimeLabBaseIE):
- _VALID_URL = r'https?://(?:www\.)?animelab\.com/player/(?P<id>[^/]+)'
-
- _TEST = {
- 'url': 'https://www.animelab.com/player/fullmetal-alchemist-brotherhood-episode-42',
- 'md5': '05bde4b91a5d1ff46ef5b94df05b0f7f',
- 'info_dict': {
- 'id': '383',
- 'ext': 'mp4',
- 'display_id': 'fullmetal-alchemist-brotherhood-episode-42',
- 'title': 'Fullmetal Alchemist: Brotherhood - Episode 42 - Signs of a Counteroffensive',
- 'description': 'md5:103eb61dd0a56d3dfc5dbf748e5e83f4',
- 'series': 'Fullmetal Alchemist: Brotherhood',
- 'episode': 'Signs of a Counteroffensive',
- 'episode_number': 42,
- 'duration': 1469,
- 'season': 'Season 1',
- 'season_number': 1,
- 'season_id': '38',
- },
- 'params': {
- # Ensure the same video is downloaded whether the user is premium or not
- 'format': '[format_id=21711_yeshardsubbed_ja-JP][height=480]',
- },
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- # unfortunately we can get different URLs for the same formats
- # e.g. if we are using a "free" account so no dubs available
- # (so _remove_duplicate_formats is not effective)
- # so we use a dictionary as a workaround
- formats = {}
- for language_option_url in ('https://www.animelab.com/player/%s/subtitles',
- 'https://www.animelab.com/player/%s/dubbed'):
- actual_url = language_option_url % display_id
- webpage = self._download_webpage(actual_url, display_id, 'Downloading URL ' + actual_url)
-
- video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id)
- position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position'))
-
- raw_data = video_collection[position]['videoEntry']
-
- video_id = str_or_none(raw_data['id'])
-
- # create a title from many sources (while grabbing other info)
- # TODO use more fallback sources to get some of these
- series = raw_data.get('showTitle')
- video_type = raw_data.get('videoEntryType', {}).get('name')
- episode_number = raw_data.get('episodeNumber')
- episode_name = raw_data.get('name')
-
- title_parts = (series, video_type, episode_number, episode_name)
- if None not in title_parts:
- title = '%s - %s %s - %s' % title_parts
- else:
- title = episode_name
-
- description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None)
-
- duration = int_or_none(raw_data.get('duration'))
-
- thumbnail_data = raw_data.get('images', [])
- thumbnails = []
- for thumbnail in thumbnail_data:
- for instance in thumbnail['imageInstances']:
- image_data = instance.get('imageInfo', {})
- thumbnails.append({
- 'id': str_or_none(image_data.get('id')),
- 'url': image_data.get('fullPath'),
- 'width': image_data.get('width'),
- 'height': image_data.get('height'),
- })
-
- season_data = raw_data.get('season', {}) or {}
- season = str_or_none(season_data.get('name'))
- season_number = int_or_none(season_data.get('seasonNumber'))
- season_id = str_or_none(season_data.get('id'))
-
- for video_data in raw_data['videoList']:
- current_video_list = {}
- current_video_list['language'] = video_data.get('language', {}).get('languageCode')
-
- is_hardsubbed = video_data.get('hardSubbed')
-
- for video_instance in video_data['videoInstances']:
- httpurl = video_instance.get('httpUrl')
- url = httpurl if httpurl else video_instance.get('rtmpUrl')
- if url is None:
- # this video format is unavailable to the user (not premium etc.)
- continue
-
- current_format = current_video_list.copy()
-
- format_id_parts = []
-
- format_id_parts.append(str_or_none(video_instance.get('id')))
-
- if is_hardsubbed is not None:
- if is_hardsubbed:
- format_id_parts.append('yeshardsubbed')
- else:
- format_id_parts.append('nothardsubbed')
-
- format_id_parts.append(current_format['language'])
-
- format_id = '_'.join([x for x in format_id_parts if x is not None])
-
- ext = determine_ext(url)
- if ext == 'm3u8':
- for format_ in self._extract_m3u8_formats(
- url, video_id, m3u8_id=format_id, fatal=False):
- formats[format_['format_id']] = format_
- continue
- elif ext == 'mpd':
- for format_ in self._extract_mpd_formats(
- url, video_id, mpd_id=format_id, fatal=False):
- formats[format_['format_id']] = format_
- continue
-
- current_format['url'] = url
- quality_data = video_instance.get('videoQuality')
- if quality_data:
- quality = quality_data.get('name') or quality_data.get('description')
- else:
- quality = None
-
- height = None
- if quality:
- height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None))
-
- if height is None:
- self.report_warning('Could not get height of video')
- else:
- current_format['height'] = height
- current_format['format_id'] = format_id
-
- formats[current_format['format_id']] = current_format
-
- formats = list(formats.values())
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'series': series,
- 'episode': episode_name,
- 'episode_number': int_or_none(episode_number),
- 'thumbnails': thumbnails,
- 'duration': duration,
- 'formats': formats,
- 'season': season,
- 'season_number': season_number,
- 'season_id': season_id,
- }
-
-
-class AnimeLabShowsIE(AnimeLabBaseIE):
- _VALID_URL = r'https?://(?:www\.)?animelab\.com/shows/(?P<id>[^/]+)'
-
- _TEST = {
- 'url': 'https://www.animelab.com/shows/attack-on-titan',
- 'info_dict': {
- 'id': '45',
- 'title': 'Attack on Titan',
- 'description': 'md5:989d95a2677e9309368d5cf39ba91469',
- },
- 'playlist_count': 59,
- 'skip': 'All AnimeLab content requires authentication',
- }
-
- def _real_extract(self, url):
- _BASE_URL = 'http://www.animelab.com'
- _SHOWS_API_URL = '/api/videoentries/show/videos/'
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id, 'Downloading requested URL')
-
- show_data_str = self._search_regex(r'({"id":.*}),\svideoEntry', webpage, 'AnimeLab show data')
- show_data = self._parse_json(show_data_str, display_id)
-
- show_id = str_or_none(show_data.get('id'))
- title = show_data.get('name')
- description = show_data.get('shortSynopsis') or show_data.get('longSynopsis')
-
- entries = []
- for season in show_data['seasons']:
- season_id = season['id']
- get_data = urlencode_postdata({
- 'seasonId': season_id,
- 'limit': 1000,
- })
- # despite using urlencode_postdata, we are sending a GET request
- target_url = _BASE_URL + _SHOWS_API_URL + show_id + "?" + get_data.decode('utf-8')
- response = self._download_webpage(
- target_url,
- None, 'Season id %s' % season_id)
-
- season_data = self._parse_json(response, display_id)
-
- for video_data in season_data['list']:
- entries.append(self.url_result(
- _BASE_URL + '/player/' + video_data['slug'], 'AnimeLab',
- str_or_none(video_data.get('id')), video_data.get('name')
- ))
-
- return {
- '_type': 'playlist',
- 'id': show_id,
- 'title': title,
- 'description': description,
- 'entries': entries,
- }
-
-# TODO implement myqueue
diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py
index c85d5297d..1ca6ddc4d 100644
--- a/yt_dlp/extractor/archiveorg.py
+++ b/yt_dlp/extractor/archiveorg.py
@@ -1,36 +1,34 @@
-import re
import json
+import re
+import urllib.parse
+
from .common import InfoExtractor
-from .youtube import YoutubeIE, YoutubeBaseInfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
- compat_HTTPError
-)
+from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
+from ..compat import compat_HTTPError, compat_urllib_parse_unquote
from ..utils import (
+ KNOWN_EXTENSIONS,
+ ExtractorError,
+ HEADRequest,
bug_reports_message,
clean_html,
dict_get,
extract_attributes,
- ExtractorError,
get_element_by_id,
- HEADRequest,
int_or_none,
join_nonempty,
- KNOWN_EXTENSIONS,
merge_dicts,
mimetype2ext,
orderedSet,
parse_duration,
parse_qs,
- str_to_int,
str_or_none,
+ str_to_int,
traverse_obj,
try_get,
unified_strdate,
unified_timestamp,
+ url_or_none,
urlhandle_detect_ext,
- url_or_none
)
@@ -143,7 +141,7 @@ class ArchiveOrgIE(InfoExtractor):
return json.loads(extract_attributes(element)['value'])
def _real_extract(self, url):
- video_id = compat_urllib_parse_unquote_plus(self._match_id(url))
+ video_id = urllib.parse.unquote_plus(self._match_id(url))
identifier, entry_id = (video_id.split('/', 1) + [None])[:2]
# Archive.org metadata API doesn't clearly demarcate playlist entries
@@ -442,9 +440,10 @@ class YoutubeWebArchiveIE(InfoExtractor):
'only_matching': True
},
]
- _YT_INITIAL_DATA_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
- _YT_INITIAL_PLAYER_RESPONSE_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*({.+?})[)\s]*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE
- _YT_INITIAL_BOUNDARY_RE = r'(?:(?:var\s+meta|</script|\n)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_BOUNDARY_RE
+ _YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
+ _YT_INITIAL_PLAYER_RESPONSE_RE = fr'''(?x)
+ (?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*|
+ {YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE}'''
_YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers
_YT_ALL_THUMB_SERVERS = orderedSet(
@@ -474,11 +473,6 @@ class YoutubeWebArchiveIE(InfoExtractor):
elif not isinstance(res, list) or len(res) != 0:
self.report_warning('Error while parsing CDX API response' + bug_reports_message())
- def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
- return self._parse_json(self._search_regex(
- (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',
- regex), webpage, name, default='{}'), video_id, fatal=False)
-
def _extract_webpage_title(self, webpage):
page_title = self._html_extract_title(webpage, default='')
# YouTube video pages appear to always have either 'YouTube -' as prefix or '- YouTube' as suffix.
@@ -488,10 +482,11 @@ class YoutubeWebArchiveIE(InfoExtractor):
def _extract_metadata(self, video_id, webpage):
search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None)) if webpage else (lambda x: None))
- player_response = self._extract_yt_initial_variable(
- webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE, video_id, 'initial player response') or {}
- initial_data = self._extract_yt_initial_variable(
- webpage, self._YT_INITIAL_DATA_RE, video_id, 'initial player response') or {}
+ player_response = self._search_json(
+ self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response',
+ video_id, default={})
+ initial_data = self._search_json(
+ self._YT_INITIAL_DATA_RE, webpage, 'initial data', video_id, default={})
initial_data_video = traverse_obj(
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'videoPrimaryInfoRenderer'),
diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py
index 96b134fa0..c80ce2233 100644
--- a/yt_dlp/extractor/arnes.py
+++ b/yt_dlp/extractor/arnes.py
@@ -90,7 +90,7 @@ class ArnesIE(InfoExtractor):
'timestamp': parse_iso8601(video.get('creationTime')),
'channel': channel.get('name'),
'channel_id': channel_id,
- 'channel_url': format_field(channel_id, template=f'{self._BASE_URL}/?channel=%s'),
+ 'channel_url': format_field(channel_id, None, f'{self._BASE_URL}/?channel=%s'),
'duration': float_or_none(video.get('duration'), 1000),
'view_count': int_or_none(video.get('views')),
'tags': video.get('hashtags'),
diff --git a/yt_dlp/extractor/atscaleconf.py b/yt_dlp/extractor/atscaleconf.py
new file mode 100644
index 000000000..3f7b1e9f8
--- /dev/null
+++ b/yt_dlp/extractor/atscaleconf.py
@@ -0,0 +1,34 @@
+import re
+
+from .common import InfoExtractor
+
+
+class AtScaleConfEventIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?atscaleconference\.com/events/(?P<id>[^/&$?]+)'
+
+ _TESTS = [{
+ 'url': 'https://atscaleconference.com/events/data-scale-spring-2022/',
+ 'playlist_mincount': 13,
+ 'info_dict': {
+ 'id': 'data-scale-spring-2022',
+ 'title': 'Data @Scale Spring 2022',
+ 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
+ },
+ }, {
+ 'url': 'https://atscaleconference.com/events/video-scale-2021/',
+ 'playlist_mincount': 14,
+ 'info_dict': {
+ 'id': 'video-scale-2021',
+ 'title': 'Video @Scale 2021',
+ 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+
+ return self.playlist_from_matches(
+ re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage),
+ ie='Generic', playlist_id=id,
+ title=self._og_search_title(webpage), description=self._og_search_description(webpage))
diff --git a/yt_dlp/extractor/audius.py b/yt_dlp/extractor/audius.py
index 189d1224f..0105d9db8 100644
--- a/yt_dlp/extractor/audius.py
+++ b/yt_dlp/extractor/audius.py
@@ -1,8 +1,8 @@
import random
from .common import InfoExtractor
-from ..utils import ExtractorError, try_get, compat_str, str_or_none
-from ..compat import compat_urllib_parse_unquote
+from ..compat import compat_str, compat_urllib_parse_unquote
+from ..utils import ExtractorError, str_or_none, try_get
class AudiusBaseIE(InfoExtractor):
diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py
index d289f6be3..6fc938de9 100644
--- a/yt_dlp/extractor/awaan.py
+++ b/yt_dlp/extractor/awaan.py
@@ -41,7 +41,7 @@ class AWAANBaseIE(InfoExtractor):
'id': video_id,
'title': title,
'description': video_data.get('description_en') or video_data.get('description_ar'),
- 'thumbnail': format_field(img, template='http://admin.mangomolo.com/analytics/%s'),
+ 'thumbnail': format_field(img, None, 'http://admin.mangomolo.com/analytics/%s'),
'duration': int_or_none(video_data.get('duration')),
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
'is_live': is_live,
diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py
index 9cb019a49..5ddeef7b5 100644
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@@ -1,16 +1,12 @@
-import xml.etree.ElementTree
import functools
import itertools
import json
import re
+import urllib.error
+import xml.etree.ElementTree
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
- compat_urllib_error,
- compat_urlparse,
-)
+from ..compat import compat_HTTPError, compat_str, compat_urlparse
from ..utils import (
ExtractorError,
OnDemandPagedList,
@@ -391,7 +387,7 @@ class BBCCoUkIE(InfoExtractor):
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)
except ExtractorError as e:
- if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
+ if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
and e.exc_info[1].code in (403, 404)):
raise
fmts = []
diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py
index 8f9849d9b..5ae4b917a 100644
--- a/yt_dlp/extractor/bellmedia.py
+++ b/yt_dlp/extractor/bellmedia.py
@@ -24,7 +24,7 @@ class BellMediaIE(InfoExtractor):
)/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
_TESTS = [{
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
- 'md5': '36d3ef559cfe8af8efe15922cd3ce950',
+ 'md5': '3e5b8e38370741d5089da79161646635',
'info_dict': {
'id': '1403070',
'ext': 'flv',
@@ -32,6 +32,14 @@ class BellMediaIE(InfoExtractor):
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
'upload_date': '20180525',
'timestamp': 1527288600,
+ 'season_id': 73997,
+ 'season': '2018',
+ 'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg',
+ 'tags': [],
+ 'categories': ['ETFs'],
+ 'season_number': 8,
+ 'duration': 272.038,
+ 'series': 'Market Call Tonight',
},
}, {
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index ead0dd88b..d695d9b49 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -677,6 +677,11 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
'vcodec': 'none'
}]
+ for a_format in formats:
+ a_format.setdefault('http_headers', {}).update({
+ 'Referer': url,
+ })
+
song = self._call_api('song/info', au_id)
title = song['title']
statistic = song.get('statistic') or {}
@@ -784,7 +789,8 @@ class BiliIntlBaseIE(InfoExtractor):
def json2srt(self, json):
data = '\n\n'.join(
f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
- for i, line in enumerate(json['body']) if line.get('content'))
+ for i, line in enumerate(traverse_obj(json, (
+ 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
return data
def _get_subtitles(self, *, ep_id=None, aid=None):
@@ -947,12 +953,11 @@ class BiliIntlIE(BiliIntlBaseIE):
video_id = ep_id or aid
webpage = self._download_webpage(url, video_id)
# Bstation layout
- initial_data = self._parse_json(self._search_regex(
- r'window\.__INITIAL_(?:DATA|STATE)__\s*=\s*({.+?});', webpage,
- 'preload state', default='{}'), video_id, fatal=False) or {}
- video_data = (
- traverse_obj(initial_data, ('OgvVideo', 'epDetail'), expected_type=dict)
- or traverse_obj(initial_data, ('UgcVideo', 'videoData'), expected_type=dict) or {})
+ initial_data = (
+ self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
+ or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
+ video_data = traverse_obj(
+ initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict)
if season_id and not video_data:
# Non-Bstation layout, read through episode list
@@ -960,7 +965,7 @@ class BiliIntlIE(BiliIntlBaseIE):
video_data = traverse_obj(season_json,
('sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == ep_id),
expected_type=dict, get_all=False)
- return self._extract_video_info(video_data, ep_id=ep_id, aid=aid)
+ return self._extract_video_info(video_data or {}, ep_id=ep_id, aid=aid)
class BiliIntlSeriesIE(BiliIntlBaseIE):
diff --git a/yt_dlp/extractor/bloomberg.py b/yt_dlp/extractor/bloomberg.py
index c0aaeae02..c842c342c 100644
--- a/yt_dlp/extractor/bloomberg.py
+++ b/yt_dlp/extractor/bloomberg.py
@@ -7,13 +7,11 @@ class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
_TESTS = [{
- 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
- # The md5 checksum changes
+ 'url': 'https://www.bloomberg.com/news/videos/2021-09-14/apple-unveils-the-new-iphone-13-stock-doesn-t-move-much-video',
'info_dict': {
- 'id': 'qurhIVlJSB6hzkVi229d8g',
+ 'id': 'V8cFcYMxTHaMcEiiYVr39A',
'ext': 'flv',
- 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
- 'description': 'md5:a8ba0302912d03d246979735c17d2761',
+ 'title': 'Apple Unveils the New IPhone 13, Stock Doesn\'t Move Much',
},
'params': {
'format': 'best[format_id^=hds]',
@@ -57,7 +55,7 @@ class BloombergIE(InfoExtractor):
title = re.sub(': Video$', '', self._og_search_title(webpage))
embed_info = self._download_json(
- 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
+ 'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id)
formats = []
for stream in embed_info['streams']:
stream_url = stream.get('url')
diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py
index 936c34e15..a5412897d 100644
--- a/yt_dlp/extractor/brightcove.py
+++ b/yt_dlp/extractor/brightcove.py
@@ -600,9 +600,9 @@ class BrightcoveNewIE(AdobePassIE):
account_id, player_id, embed, content_type, video_id = self._match_valid_url(url).groups()
policy_key_id = '%s_%s' % (account_id, player_id)
- policy_key = self._downloader.cache.load('brightcove', policy_key_id)
+ policy_key = self.cache.load('brightcove', policy_key_id)
policy_key_extracted = False
- store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
+ store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)
def extract_policy_key():
base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index cac3f1e9d..999b7bc53 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -304,13 +304,13 @@ class CBCGemIE(InfoExtractor):
def _get_claims_token(self, email, password):
if not self.claims_token_valid():
self._claims_token = self._new_claims_token(email, password)
- self._downloader.cache.store(self._NETRC_MACHINE, 'claims_token', self._claims_token)
+ self.cache.store(self._NETRC_MACHINE, 'claims_token', self._claims_token)
return self._claims_token
def _real_initialize(self):
if self.claims_token_valid():
return
- self._claims_token = self._downloader.cache.load(self._NETRC_MACHINE, 'claims_token')
+ self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
def _find_secret_formats(self, formats, video_id):
""" Find a valid video url and convert it to the secret variant """
diff --git a/yt_dlp/extractor/ccc.py b/yt_dlp/extractor/ccc.py
index b11e1f74e..1bc0f07f2 100644
--- a/yt_dlp/extractor/ccc.py
+++ b/yt_dlp/extractor/ccc.py
@@ -75,6 +75,7 @@ class CCCIE(InfoExtractor):
'thumbnail': event_data.get('thumb_url'),
'timestamp': parse_iso8601(event_data.get('date')),
'duration': int_or_none(event_data.get('length')),
+ 'view_count': int_or_none(event_data.get('view_count')),
'tags': event_data.get('tags'),
'formats': formats,
}
diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py
index 9b257bee9..6d01c60d5 100644
--- a/yt_dlp/extractor/cda.py
+++ b/yt_dlp/extractor/cda.py
@@ -1,13 +1,9 @@
import codecs
-import re
import json
+import re
from .common import InfoExtractor
-from ..compat import (
- compat_chr,
- compat_ord,
- compat_urllib_parse_unquote,
-)
+from ..compat import compat_ord, compat_urllib_parse_unquote
from ..utils import (
ExtractorError,
float_or_none,
@@ -16,8 +12,8 @@ from ..utils import (
multipart_encode,
parse_duration,
random_birthday,
- urljoin,
try_get,
+ urljoin,
)
@@ -144,7 +140,7 @@ class CDAIE(InfoExtractor):
b = []
for c in a:
f = compat_ord(c)
- b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f <= 126 else compat_chr(f))
+ b.append(chr(33 + (f + 14) % 94) if 33 <= f <= 126 else chr(f))
a = ''.join(b)
a = a.replace('.cda.mp4', '')
for p in ('.2cda.pl', '.3cda.pl'):
diff --git a/yt_dlp/extractor/chingari.py b/yt_dlp/extractor/chingari.py
index 7e8c0bfc9..e54d92a86 100644
--- a/yt_dlp/extractor/chingari.py
+++ b/yt_dlp/extractor/chingari.py
@@ -1,11 +1,11 @@
import itertools
import json
+import urllib.parse
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote_plus
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
int_or_none,
str_to_int,
url_or_none,
@@ -47,8 +47,8 @@ class ChingariBaseIE(InfoExtractor):
'id': id,
'extractor_key': ChingariIE.ie_key(),
'extractor': 'Chingari',
- 'title': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))),
- 'description': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))),
+ 'title': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
+ 'description': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
'duration': media_data.get('duration'),
'thumbnail': url_or_none(thumbnail),
'like_count': post_data.get('likeCount'),
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index ebeca4395..4fbcfe203 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1,6 +1,10 @@
import base64
import collections
+import getpass
import hashlib
+import http.client
+import http.cookiejar
+import http.cookies
import itertools
import json
import math
@@ -9,24 +13,12 @@ import os
import random
import sys
import time
+import urllib.parse
+import urllib.request
import xml.etree.ElementTree
-from ..compat import (
- compat_cookiejar_Cookie,
- compat_cookies_SimpleCookie,
- compat_etree_fromstring,
- compat_expanduser,
- compat_getpass,
- compat_http_client,
- compat_os_name,
- compat_str,
- compat_urllib_error,
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlencode,
- compat_urllib_request,
- compat_urlparse,
- re,
-)
+from ..compat import functools, re # isort: split
+from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
from ..downloader import FileDownloader
from ..downloader.f4m import get_base_url, remove_encrypted_media
from ..utils import (
@@ -35,6 +27,7 @@ from ..utils import (
ExtractorError,
GeoRestrictedError,
GeoUtils,
+ LenientJSONDecoder,
RegexNotFoundError,
UnsupportedError,
age_restricted,
@@ -384,6 +377,11 @@ class InfoExtractor:
release_year: Year (YYYY) when the album was released.
composer: Composer of the piece
+ The following fields should only be set for clips that should be cut from the original video:
+
+ section_start: Start time of the section in seconds
+ section_end: End time of the section in seconds
+
Unless mentioned otherwise, the fields should be Unicode strings.
Unless mentioned otherwise, None is equivalent to absence of information.
@@ -610,8 +608,7 @@ class InfoExtractor:
if ip_block:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
- self._downloader.write_debug(
- '[debug] Using fake IP %s as X-Forwarded-For' % self._x_forwarded_for_ip)
+ self.write_debug(f'Using fake IP {self._x_forwarded_for_ip} as X-Forwarded-For')
return
# Path 2: bypassing based on country code
@@ -666,7 +663,7 @@ class InfoExtractor:
if hasattr(e, 'countries'):
kwargs['countries'] = e.countries
raise type(e)(e.orig_msg, **kwargs)
- except compat_http_client.IncompleteRead as e:
+ except http.client.IncompleteRead as e:
raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
except (KeyError, StopIteration) as e:
raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
@@ -690,6 +687,14 @@ class InfoExtractor:
"""Sets a YoutubeDL instance as the downloader for this IE."""
self._downloader = downloader
+ @property
+ def cache(self):
+ return self._downloader.cache
+
+ @property
+ def cookiejar(self):
+ return self._downloader.cookiejar
+
def _initialize_pre_login(self):
""" Intialization before login. Redefine in subclasses."""
pass
@@ -717,7 +722,7 @@ class InfoExtractor:
@staticmethod
def __can_accept_status_code(err, expected_status):
- assert isinstance(err, compat_urllib_error.HTTPError)
+ assert isinstance(err, urllib.error.HTTPError)
if expected_status is None:
return False
elif callable(expected_status):
@@ -725,7 +730,14 @@ class InfoExtractor:
else:
return err.code in variadic(expected_status)
- def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
+ def _create_request(self, url_or_request, data=None, headers=None, query=None):
+ if isinstance(url_or_request, urllib.request.Request):
+ return update_Request(url_or_request, data=data, headers=headers, query=query)
+ if query:
+ url_or_request = update_url_query(url_or_request, query)
+ return sanitized_Request(url_or_request, data, headers or {})
+
+ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
"""
Return the response handle.
@@ -753,21 +765,13 @@ class InfoExtractor:
# geo unrestricted country. We will do so once we encounter any
# geo restriction error.
if self._x_forwarded_for_ip:
- if 'X-Forwarded-For' not in headers:
- headers['X-Forwarded-For'] = self._x_forwarded_for_ip
+ headers = (headers or {}).copy()
+ headers.setdefault('X-Forwarded-For', self._x_forwarded_for_ip)
- if isinstance(url_or_request, compat_urllib_request.Request):
- url_or_request = update_Request(
- url_or_request, data=data, headers=headers, query=query)
- else:
- if query:
- url_or_request = update_url_query(url_or_request, query)
- if data is not None or headers:
- url_or_request = sanitized_Request(url_or_request, data, headers)
try:
- return self._downloader.urlopen(url_or_request)
+ return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
except network_exceptions as err:
- if isinstance(err, compat_urllib_error.HTTPError):
+ if isinstance(err, urllib.error.HTTPError):
if self.__can_accept_status_code(err, expected_status):
# Retain reference to error to prevent file object from
# being closed before it can be read. Works around the
@@ -788,14 +792,42 @@ class InfoExtractor:
self.report_warning(errmsg)
return False
- def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True,
+ encoding=None, data=None, headers={}, query={}, expected_status=None):
"""
Return a tuple (page content as string, URL handle).
- See _download_webpage docstring for arguments specification.
+ Arguments:
+ url_or_request -- plain text URL as a string or
+ a urllib.request.Request object
+ video_id -- Video/playlist/item identifier (string)
+
+ Keyword arguments:
+ note -- note printed before downloading (string)
+ errnote -- note printed in case of an error (string)
+ fatal -- flag denoting whether error should be considered fatal,
+ i.e. whether it should cause ExtractionError to be raised,
+ otherwise a warning will be reported and extraction continued
+ encoding -- encoding for a page content decoding, guessed automatically
+ when not explicitly specified
+ data -- POST data (bytes)
+ headers -- HTTP headers (dict)
+ query -- URL query (dict)
+ expected_status -- allows to accept failed HTTP requests (non 2xx
+ status code) by explicitly specifying a set of accepted status
+ codes. Can be any of the following entities:
+ - an integer type specifying an exact failed status code to
+ accept
+ - a list or a tuple of integer types specifying a list of
+ failed status codes to accept
+ - a callable accepting an actual failed status code and
+ returning True if it should be accepted
+ Note that this argument does not affect success status codes (2xx)
+ which are always accepted.
"""
+
# Strip hashes from the URL (#1038)
- if isinstance(url_or_request, (compat_str, str)):
+ if isinstance(url_or_request, str):
url_or_request = url_or_request.partition('#')[0]
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
@@ -850,140 +882,48 @@ class InfoExtractor:
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
expected=True)
+ def _request_dump_filename(self, url, video_id):
+ basen = f'{video_id}_{url}'
+ trim_length = self.get_param('trim_file_name') or 240
+ if len(basen) > trim_length:
+ h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
+ basen = basen[:trim_length - len(h)] + h
+ filename = sanitize_filename(f'{basen}.dump', restricted=True)
+ # Working around MAX_PATH limitation on Windows (see
+ # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
+ if compat_os_name == 'nt':
+ absfilepath = os.path.abspath(filename)
+ if len(absfilepath) > 259:
+ filename = fR'\\?\{absfilepath}'
+ return filename
+
+ def __decode_webpage(self, webpage_bytes, encoding, headers):
+ if not encoding:
+ encoding = self._guess_encoding_from_content(headers.get('Content-Type', ''), webpage_bytes)
+ try:
+ return webpage_bytes.decode(encoding, 'replace')
+ except LookupError:
+ return webpage_bytes.decode('utf-8', 'replace')
+
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
- content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
if prefix is not None:
webpage_bytes = prefix + webpage_bytes
- if not encoding:
- encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
if self.get_param('dump_intermediate_pages', False):
self.to_screen('Dumping request to ' + urlh.geturl())
dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump)
- if self.get_param('write_pages', False):
- basen = f'{video_id}_{urlh.geturl()}'
- trim_length = self.get_param('trim_file_name') or 240
- if len(basen) > trim_length:
- h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
- basen = basen[:trim_length - len(h)] + h
- raw_filename = basen + '.dump'
- filename = sanitize_filename(raw_filename, restricted=True)
- self.to_screen('Saving request to ' + filename)
- # Working around MAX_PATH limitation on Windows (see
- # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
- if compat_os_name == 'nt':
- absfilepath = os.path.abspath(filename)
- if len(absfilepath) > 259:
- filename = '\\\\?\\' + absfilepath
+ if self.get_param('write_pages'):
+ filename = self._request_dump_filename(urlh.geturl(), video_id)
+ self.to_screen(f'Saving request to {filename}')
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
- try:
- content = webpage_bytes.decode(encoding, 'replace')
- except LookupError:
- content = webpage_bytes.decode('utf-8', 'replace')
-
+ content = self.__decode_webpage(webpage_bytes, encoding, urlh.headers)
self.__check_blocked(content)
return content
- def _download_webpage(
- self, url_or_request, video_id, note=None, errnote=None,
- fatal=True, tries=1, timeout=5, encoding=None, data=None,
- headers={}, query={}, expected_status=None):
- """
- Return the data of the page as a string.
-
- Arguments:
- url_or_request -- plain text URL as a string or
- a compat_urllib_request.Requestobject
- video_id -- Video/playlist/item identifier (string)
-
- Keyword arguments:
- note -- note printed before downloading (string)
- errnote -- note printed in case of an error (string)
- fatal -- flag denoting whether error should be considered fatal,
- i.e. whether it should cause ExtractionError to be raised,
- otherwise a warning will be reported and extraction continued
- tries -- number of tries
- timeout -- sleep interval between tries
- encoding -- encoding for a page content decoding, guessed automatically
- when not explicitly specified
- data -- POST data (bytes)
- headers -- HTTP headers (dict)
- query -- URL query (dict)
- expected_status -- allows to accept failed HTTP requests (non 2xx
- status code) by explicitly specifying a set of accepted status
- codes. Can be any of the following entities:
- - an integer type specifying an exact failed status code to
- accept
- - a list or a tuple of integer types specifying a list of
- failed status codes to accept
- - a callable accepting an actual failed status code and
- returning True if it should be accepted
- Note that this argument does not affect success status codes (2xx)
- which are always accepted.
- """
-
- success = False
- try_count = 0
- while success is False:
- try:
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- success = True
- except compat_http_client.IncompleteRead as e:
- try_count += 1
- if try_count >= tries:
- raise e
- self._sleep(timeout, video_id)
- if res is False:
- return res
- else:
- content, _ = res
- return content
-
- def _download_xml_handle(
- self, url_or_request, video_id, note='Downloading XML',
- errnote='Unable to download XML', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle).
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal=fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- if res is False:
- return res
- xml_string, urlh = res
- return self._parse_xml(
- xml_string, video_id, transform_source=transform_source,
- fatal=fatal), urlh
-
- def _download_xml(
- self, url_or_request, video_id,
- note='Downloading XML', errnote='Unable to download XML',
- transform_source=None, fatal=True, encoding=None,
- data=None, headers={}, query={}, expected_status=None):
- """
- Return the xml as an xml.etree.ElementTree.Element.
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_xml_handle(
- url_or_request, video_id, note=note, errnote=errnote,
- transform_source=transform_source, fatal=fatal, encoding=encoding,
- data=data, headers=headers, query=query,
- expected_status=expected_status)
- return res if res is False else res[0]
-
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
if transform_source:
xml_string = transform_source(xml_string)
@@ -996,101 +936,126 @@ class InfoExtractor:
else:
self.report_warning(errmsg + str(ve))
- def _download_json_handle(
- self, url_or_request, video_id, note='Downloading JSON metadata',
- errnote='Unable to download JSON metadata', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return a tuple (JSON object, URL handle).
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal=fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- if res is False:
- return res
- json_string, urlh = res
- return self._parse_json(
- json_string, video_id, transform_source=transform_source,
- fatal=fatal), urlh
-
- def _download_json(
- self, url_or_request, video_id, note='Downloading JSON metadata',
- errnote='Unable to download JSON metadata', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return the JSON object as a dict.
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_json_handle(
- url_or_request, video_id, note=note, errnote=errnote,
- transform_source=transform_source, fatal=fatal, encoding=encoding,
- data=data, headers=headers, query=query,
- expected_status=expected_status)
- return res if res is False else res[0]
-
- def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
- if transform_source:
- json_string = transform_source(json_string)
+ def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, **parser_kwargs):
try:
- return json.loads(json_string, strict=False)
+ return json.loads(
+ json_string, cls=LenientJSONDecoder, strict=False, transform_source=transform_source, **parser_kwargs)
except ValueError as ve:
- errmsg = '%s: Failed to parse JSON ' % video_id
+ errmsg = f'{video_id}: Failed to parse JSON'
if fatal:
raise ExtractorError(errmsg, cause=ve)
else:
- self.report_warning(errmsg + str(ve))
+ self.report_warning(f'{errmsg}: {ve}')
def _parse_socket_response_as_json(self, data, video_id, transform_source=None, fatal=True):
return self._parse_json(
data[data.find('{'):data.rfind('}') + 1],
video_id, transform_source, fatal)
- def _download_socket_json_handle(
- self, url_or_request, video_id, note='Polling socket',
- errnote='Unable to poll socket', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return a tuple (JSON object, URL handle).
+ def __create_download_methods(name, parser, note, errnote, return_value):
+
+ def parse(ie, content, *args, **kwargs):
+ if parser is None:
+ return content
+ # parser is fetched by name so subclasses can override it
+ return getattr(ie, parser)(content, *args, **kwargs)
+
+ def download_handle(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ res = self._download_webpage_handle(
+ url_or_request, video_id, note=note, errnote=errnote, fatal=fatal, encoding=encoding,
+ data=data, headers=headers, query=query, expected_status=expected_status)
+ if res is False:
+ return res
+ content, urlh = res
+ return parse(self, content, video_id, transform_source=transform_source, fatal=fatal), urlh
+
+ def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ if self.get_param('load_pages'):
+ url_or_request = self._create_request(url_or_request, data, headers, query)
+ filename = self._request_dump_filename(url_or_request.full_url, video_id)
+ self.to_screen(f'Loading request from {filename}')
+ try:
+ with open(filename, 'rb') as dumpf:
+ webpage_bytes = dumpf.read()
+ except OSError as e:
+ self.report_warning(f'Unable to load request from disk: {e}')
+ else:
+ content = self.__decode_webpage(webpage_bytes, encoding, url_or_request.headers)
+ return parse(self, content, video_id, transform_source, fatal)
+ kwargs = {
+ 'note': note,
+ 'errnote': errnote,
+ 'transform_source': transform_source,
+ 'fatal': fatal,
+ 'encoding': encoding,
+ 'data': data,
+ 'headers': headers,
+ 'query': query,
+ 'expected_status': expected_status,
+ }
+ if parser is None:
+ kwargs.pop('transform_source')
+ # The method is fetched by name so subclasses can override _download_..._handle
+ res = getattr(self, download_handle.__name__)(url_or_request, video_id, **kwargs)
+ return res if res is False else res[0]
+
+ def impersonate(func, name, return_value):
+ func.__name__, func.__qualname__ = name, f'InfoExtractor.{name}'
+ func.__doc__ = f'''
+ @param transform_source Apply this transformation before parsing
+ @returns {return_value}
+
+ See _download_webpage_handle docstring for other arguments specification
+ '''
+
+ impersonate(download_handle, f'_download_{name}_handle', f'({return_value}, URL handle)')
+ impersonate(download_content, f'_download_{name}', f'{return_value}')
+ return download_handle, download_content
+
+ _download_xml_handle, _download_xml = __create_download_methods(
+ 'xml', '_parse_xml', 'Downloading XML', 'Unable to download XML', 'xml as an xml.etree.ElementTree.Element')
+ _download_json_handle, _download_json = __create_download_methods(
+ 'json', '_parse_json', 'Downloading JSON metadata', 'Unable to download JSON metadata', 'JSON object as a dict')
+ _download_socket_json_handle, _download_socket_json = __create_download_methods(
+ 'socket_json', '_parse_socket_response_as_json', 'Polling socket', 'Unable to poll socket', 'JSON object as a dict')
+ __download_webpage = __create_download_methods('webpage', None, None, None, 'data of the page as a string')[1]
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal=fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- if res is False:
- return res
- webpage, urlh = res
- return self._parse_socket_response_as_json(
- webpage, video_id, transform_source=transform_source,
- fatal=fatal), urlh
-
- def _download_socket_json(
- self, url_or_request, video_id, note='Polling socket',
- errnote='Unable to poll socket', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
+ def _download_webpage(
+ self, url_or_request, video_id, note=None, errnote=None,
+ fatal=True, tries=1, timeout=NO_DEFAULT, *args, **kwargs):
"""
- Return the JSON object as a dict.
+ Return the data of the page as a string.
- See _download_webpage docstring for arguments specification.
+ Keyword arguments:
+ tries -- number of tries
+ timeout -- sleep interval between tries
+
+ See _download_webpage_handle docstring for other arguments specification.
"""
- res = self._download_socket_json_handle(
- url_or_request, video_id, note=note, errnote=errnote,
- transform_source=transform_source, fatal=fatal, encoding=encoding,
- data=data, headers=headers, query=query,
- expected_status=expected_status)
- return res if res is False else res[0]
+
+ R''' # NB: These are unused; should they be deprecated?
+ if tries != 1:
+ self._downloader.deprecation_warning('tries argument is deprecated in InfoExtractor._download_webpage')
+ if timeout is NO_DEFAULT:
+ timeout = 5
+ else:
+ self._downloader.deprecation_warning('timeout argument is deprecated in InfoExtractor._download_webpage')
+ '''
+
+ try_count = 0
+ while True:
+ try:
+ return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
+ except http.client.IncompleteRead as e:
+ try_count += 1
+ if try_count >= tries:
+ raise e
+ self._sleep(timeout, video_id)
def report_warning(self, msg, video_id=None, *args, only_once=False, **kwargs):
- idstr = format_field(video_id, template='%s: ')
+ idstr = format_field(video_id, None, '%s: ')
msg = f'[{self.IE_NAME}] {idstr}{msg}'
if only_once:
if f'WARNING: {msg}' in self._printed_messages:
@@ -1136,7 +1101,7 @@ class InfoExtractor:
self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
self.report_warning(msg)
return
- msg += format_field(self._login_hint(method), template='. %s')
+ msg += format_field(self._login_hint(method), None, '. %s')
raise ExtractorError(msg, expected=True)
def raise_geo_restricted(
@@ -1228,6 +1193,33 @@ class InfoExtractor:
self.report_warning('unable to extract %s' % _name + bug_reports_message())
return None
+ def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
+ contains_pattern='(?s:.+)', fatal=True, default=NO_DEFAULT, **kwargs):
+ """Searches string for the JSON object specified by start_pattern"""
+ # NB: end_pattern is only used to reduce the size of the initial match
+ if default is NO_DEFAULT:
+ default, has_default = {}, False
+ else:
+ fatal, has_default = False, True
+
+ json_string = self._search_regex(
+ rf'{start_pattern}\s*(?P<json>{{\s*{contains_pattern}\s*}})\s*{end_pattern}',
+ string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
+ if not json_string:
+ return default
+
+ _name = self._downloader._format_err(name, self._downloader.Styles.EMPHASIS)
+ try:
+ return self._parse_json(json_string, video_id, ignore_extra=True, **kwargs)
+ except ExtractorError as e:
+ if fatal:
+ raise ExtractorError(
+ f'Unable to extract {_name} - Failed to parse JSON', cause=e.cause, video_id=video_id)
+ elif not has_default:
+ self.report_warning(
+ f'Unable to extract {_name} - Failed to parse JSON: {e}', video_id=video_id)
+ return default
+
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
"""
Like _search_regex, but strips HTML tags and unescapes entities.
@@ -1292,7 +1284,7 @@ class InfoExtractor:
if tfa is not None:
return tfa
- return compat_getpass('Type %s and press [Return]: ' % note)
+ return getpass.getpass('Type %s and press [Return]: ' % note)
# Helper functions for extracting OpenGraph info
@staticmethod
@@ -1343,7 +1335,7 @@ class InfoExtractor:
return self._og_search_property('url', html, **kargs)
def _html_extract_title(self, html, name='title', *, fatal=False, **kwargs):
- return self._html_search_regex(r'(?s)<title>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
+ return self._html_search_regex(r'(?s)<title\b[^>]*>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
name = variadic(name)
@@ -1400,27 +1392,25 @@ class InfoExtractor:
return self._html_search_meta('twitter:player', html,
'twitter card player')
- def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
- json_ld_list = list(re.finditer(JSON_LD_RE, html))
- default = kwargs.get('default', NO_DEFAULT)
- # JSON-LD may be malformed and thus `fatal` should be respected.
- # At the same time `default` may be passed that assumes `fatal=False`
- # for _search_regex. Let's simulate the same behavior here as well.
- fatal = kwargs.get('fatal', True) if default is NO_DEFAULT else False
- json_ld = []
- for mobj in json_ld_list:
- json_ld_item = self._parse_json(
- mobj.group('json_ld'), video_id, fatal=fatal)
- if not json_ld_item:
- continue
- if isinstance(json_ld_item, dict):
- json_ld.append(json_ld_item)
- elif isinstance(json_ld_item, (list, tuple)):
- json_ld.extend(json_ld_item)
- if json_ld:
- json_ld = self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
- if json_ld:
- return json_ld
+ def _yield_json_ld(self, html, video_id, *, fatal=True, default=NO_DEFAULT):
+ """Yield all json ld objects in the html"""
+ if default is not NO_DEFAULT:
+ fatal = False
+ for mobj in re.finditer(JSON_LD_RE, html):
+ json_ld_item = self._parse_json(mobj.group('json_ld'), video_id, fatal=fatal)
+ for json_ld in variadic(json_ld_item):
+ if isinstance(json_ld, dict):
+ yield json_ld
+
+ def _search_json_ld(self, html, video_id, expected_type=None, *, fatal=True, default=NO_DEFAULT):
+ """Search for a video in any json ld in the html"""
+ if default is not NO_DEFAULT:
+ fatal = False
+ info = self._json_ld(
+ list(self._yield_json_ld(html, video_id, fatal=fatal, default=default)),
+ video_id, fatal=fatal, expected_type=expected_type)
+ if info:
+ return info
if default is not NO_DEFAULT:
return default
elif fatal:
@@ -1430,7 +1420,7 @@ class InfoExtractor:
return {}
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
- if isinstance(json_ld, compat_str):
+ if isinstance(json_ld, str):
json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
if not json_ld:
return {}
@@ -1451,6 +1441,10 @@ class InfoExtractor:
'ViewAction': 'view',
}
+ def is_type(e, *expected_types):
+ type = variadic(traverse_obj(e, '@type'))
+ return any(x in type for x in expected_types)
+
def extract_interaction_type(e):
interaction_type = e.get('interactionType')
if isinstance(interaction_type, dict):
@@ -1464,9 +1458,7 @@ class InfoExtractor:
if not isinstance(interaction_statistic, list):
return
for is_e in interaction_statistic:
- if not isinstance(is_e, dict):
- continue
- if is_e.get('@type') != 'InteractionCounter':
+ if not is_type(is_e, 'InteractionCounter'):
continue
interaction_type = extract_interaction_type(is_e)
if not interaction_type:
@@ -1503,22 +1495,23 @@ class InfoExtractor:
info['chapters'] = chapters
def extract_video_object(e):
- assert e['@type'] == 'VideoObject'
+ assert is_type(e, 'VideoObject')
author = e.get('author')
info.update({
'url': url_or_none(e.get('contentUrl')),
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
- 'thumbnails': [{'url': url_or_none(url)}
- for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))],
+ 'thumbnails': [{'url': url}
+ for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))
+ if url_or_none(url)],
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')),
# author can be an instance of 'Organization' or 'Person' types.
# both types can have 'name' property(inherited from 'Thing' type). [1]
# however some websites are using 'Text' type instead.
# 1. https://schema.org/VideoObject
- 'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
- 'filesize': float_or_none(e.get('contentSize')),
+ 'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, str) else None,
+ 'filesize': int_or_none(float_or_none(e.get('contentSize'))),
'tbr': int_or_none(e.get('bitrate')),
'width': int_or_none(e.get('width')),
'height': int_or_none(e.get('height')),
@@ -1534,13 +1527,12 @@ class InfoExtractor:
if at_top_level and set(e.keys()) == {'@context', '@graph'}:
traverse_json_ld(variadic(e['@graph'], allowed_types=(dict,)), at_top_level=False)
break
- item_type = e.get('@type')
- if expected_type is not None and expected_type != item_type:
+ if expected_type is not None and not is_type(e, expected_type):
continue
rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
if rating is not None:
info['average_rating'] = rating
- if item_type in ('TVEpisode', 'Episode'):
+ if is_type(e, 'TVEpisode', 'Episode'):
episode_name = unescapeHTML(e.get('name'))
info.update({
'episode': episode_name,
@@ -1550,37 +1542,39 @@ class InfoExtractor:
if not info.get('title') and episode_name:
info['title'] = episode_name
part_of_season = e.get('partOfSeason')
- if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
+ if is_type(part_of_season, 'TVSeason', 'Season', 'CreativeWorkSeason'):
info.update({
'season': unescapeHTML(part_of_season.get('name')),
'season_number': int_or_none(part_of_season.get('seasonNumber')),
})
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
- if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
+ if is_type(part_of_series, 'TVSeries', 'Series', 'CreativeWorkSeries'):
info['series'] = unescapeHTML(part_of_series.get('name'))
- elif item_type == 'Movie':
+ elif is_type(e, 'Movie'):
info.update({
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('dateCreated')),
})
- elif item_type in ('Article', 'NewsArticle'):
+ elif is_type(e, 'Article', 'NewsArticle'):
info.update({
'timestamp': parse_iso8601(e.get('datePublished')),
'title': unescapeHTML(e.get('headline')),
'description': unescapeHTML(e.get('articleBody') or e.get('description')),
})
- if traverse_obj(e, ('video', 0, '@type')) == 'VideoObject':
+ if is_type(traverse_obj(e, ('video', 0)), 'VideoObject'):
extract_video_object(e['video'][0])
- elif item_type == 'VideoObject':
+ elif is_type(traverse_obj(e, ('subjectOf', 0)), 'VideoObject'):
+ extract_video_object(e['subjectOf'][0])
+ elif is_type(e, 'VideoObject'):
extract_video_object(e)
if expected_type is None:
continue
else:
break
video = e.get('video')
- if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+ if is_type(video, 'VideoObject'):
extract_video_object(video)
if expected_type is None:
continue
@@ -1597,15 +1591,13 @@ class InfoExtractor:
webpage, 'next.js data', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
- def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__'):
- ''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. '''
- # not all website do this, but it can be changed
- # https://stackoverflow.com/questions/67463109/how-to-change-or-hide-nuxt-and-nuxt-keyword-in-page-source
+ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
+ """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
rectx = re.escape(context_name)
+ FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
js, arg_keys, arg_vals = self._search_regex(
- (r'<script>window\.%s=\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.+?)\)\);?</script>' % rectx,
- r'%s\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)' % rectx),
- webpage, context_name, group=['js', 'arg_keys', 'arg_vals'])
+ (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
+ webpage, context_name, group=('js', 'arg_keys', 'arg_vals'), fatal=fatal)
args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
@@ -1613,7 +1605,8 @@ class InfoExtractor:
if val in ('undefined', 'void 0'):
args[key] = 'null'
- return self._parse_json(js_to_json(js, args), video_id)['data'][0]
+ ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
+ return traverse_obj(ret, traverse) or {}
@staticmethod
def _hidden_inputs(html):
@@ -2166,7 +2159,7 @@ class InfoExtractor:
]), m3u8_doc)
def format_url(url):
- return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url)
+ return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
if self.get_param('hls_split_discontinuity', False):
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
@@ -2539,7 +2532,7 @@ class InfoExtractor:
})
continue
- src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
+ src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src)
src_url = src_url.strip()
if proto == 'm3u8' or src_ext == 'm3u8':
@@ -2562,7 +2555,7 @@ class InfoExtractor:
'plugin': 'flowplayer-3.2.0.1',
}
f4m_url += '&' if '?' in f4m_url else '?'
- f4m_url += compat_urllib_parse_urlencode(f4m_params)
+ f4m_url += urllib.parse.urlencode(f4m_params)
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
elif src_ext == 'mpd':
formats.extend(self._extract_mpd_formats(
@@ -2803,13 +2796,18 @@ class InfoExtractor:
mime_type = representation_attrib['mimeType']
content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
- codecs = parse_codecs(representation_attrib.get('codecs', ''))
+ codec_str = representation_attrib.get('codecs', '')
+ # Some kind of binary subtitle found in some youtube livestreams
+ if mime_type == 'application/x-rawcc':
+ codecs = {'scodec': codec_str}
+ else:
+ codecs = parse_codecs(codec_str)
if content_type not in ('video', 'audio', 'text'):
if mime_type == 'image/jpeg':
content_type = mime_type
- elif codecs['vcodec'] != 'none':
+ elif codecs.get('vcodec', 'none') != 'none':
content_type = 'video'
- elif codecs['acodec'] != 'none':
+ elif codecs.get('acodec', 'none') != 'none':
content_type = 'audio'
elif codecs.get('scodec', 'none') != 'none':
content_type = 'text'
@@ -2827,7 +2825,7 @@ class InfoExtractor:
if re.match(r'^https?://', base_url):
break
if mpd_base_url and base_url.startswith('/'):
- base_url = compat_urlparse.urljoin(mpd_base_url, base_url)
+ base_url = urllib.parse.urljoin(mpd_base_url, base_url)
elif mpd_base_url and not re.match(r'^https?://', base_url):
if not mpd_base_url.endswith('/'):
mpd_base_url += '/'
@@ -3097,7 +3095,7 @@ class InfoExtractor:
sampling_rate = int_or_none(track.get('SamplingRate'))
track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
- track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
+ track_url_pattern = urllib.parse.urljoin(ism_url, track_url_pattern)
fragments = []
fragment_ctx = {
@@ -3116,7 +3114,7 @@ class InfoExtractor:
fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
for _ in range(fragment_repeat):
fragments.append({
- 'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
+ 'url': re.sub(r'{start[ _]time}', str(fragment_ctx['time']), track_url_pattern),
'duration': fragment_ctx['duration'] / stream_timescale,
})
fragment_ctx['time'] += fragment_ctx['duration']
@@ -3184,7 +3182,8 @@ class InfoExtractor:
return f
return {}
- def _media_formats(src, cur_media_type, type_info={}):
+ def _media_formats(src, cur_media_type, type_info=None):
+ type_info = type_info or {}
full_url = absolute_url(src)
ext = type_info.get('ext') or determine_ext(full_url)
if ext == 'm3u8':
@@ -3202,6 +3201,7 @@ class InfoExtractor:
formats = [{
'url': full_url,
'vcodec': 'none' if cur_media_type == 'audio' else None,
+ 'ext': ext,
}]
return is_plain_url, formats
@@ -3228,7 +3228,8 @@ class InfoExtractor:
media_attributes = extract_attributes(media_tag)
src = strip_or_none(media_attributes.get('src'))
if src:
- _, formats = _media_formats(src, media_type)
+ f = parse_content_type(media_attributes.get('type'))
+ _, formats = _media_formats(src, media_type, f)
media_info['formats'].extend(formats)
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
if media_content:
@@ -3357,7 +3358,7 @@ class InfoExtractor:
return formats, subtitles
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
- query = compat_urlparse.urlparse(url).query
+ query = urllib.parse.urlparse(url).query
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
mobj = re.search(
r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
@@ -3463,7 +3464,7 @@ class InfoExtractor:
if not isinstance(track, dict):
continue
track_kind = track.get('kind')
- if not track_kind or not isinstance(track_kind, compat_str):
+ if not track_kind or not isinstance(track_kind, str):
continue
if track_kind.lower() not in ('captions', 'subtitles'):
continue
@@ -3536,7 +3537,7 @@ class InfoExtractor:
# Often no height is provided but there is a label in
# format like "1080p", "720p SD", or 1080.
height = int_or_none(self._search_regex(
- r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
+ r'^(\d{3,4})[pP]?(?:\b|$)', str(source.get('label') or ''),
'height', default=None))
a_format = {
'url': source_url,
@@ -3588,17 +3589,15 @@ class InfoExtractor:
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
path='/', secure=False, discard=False, rest={}, **kwargs):
- cookie = compat_cookiejar_Cookie(
+ cookie = http.cookiejar.Cookie(
0, name, value, port, port is not None, domain, True,
domain.startswith('.'), path, True, secure, expire_time,
discard, None, None, rest)
- self._downloader.cookiejar.set_cookie(cookie)
+ self.cookiejar.set_cookie(cookie)
def _get_cookies(self, url):
- """ Return a compat_cookies_SimpleCookie with the cookies for the url """
- req = sanitized_Request(url)
- self._downloader.cookiejar.add_cookie_header(req)
- return compat_cookies_SimpleCookie(req.get_header('Cookie'))
+ """ Return a http.cookies.SimpleCookie with the cookies for the url """
+ return http.cookies.SimpleCookie(self._downloader._calc_cookies(url))
def _apply_first_set_cookie_header(self, url_handle, cookie):
"""
@@ -3742,7 +3741,7 @@ class InfoExtractor:
def _get_automatic_captions(self, *args, **kwargs):
raise NotImplementedError('This method must be implemented by subclasses')
- @property
+ @functools.cached_property
def _cookies_passed(self):
"""Whether cookies have been passed to YoutubeDL"""
return self.get_param('cookiefile') is not None or self.get_param('cookiesfrombrowser') is not None
@@ -3764,10 +3763,10 @@ class InfoExtractor:
return headers
def _generic_id(self, url):
- return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+ return urllib.parse.unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
def _generic_title(self, url):
- return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+ return urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
@staticmethod
def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
diff --git a/yt_dlp/extractor/commonprotocols.py b/yt_dlp/extractor/commonprotocols.py
index e8f19b9e0..2f93e8ea5 100644
--- a/yt_dlp/extractor/commonprotocols.py
+++ b/yt_dlp/extractor/commonprotocols.py
@@ -1,5 +1,6 @@
+import urllib.parse
+
from .common import InfoExtractor
-from ..compat import compat_urlparse
class RtmpIE(InfoExtractor):
@@ -23,7 +24,7 @@ class RtmpIE(InfoExtractor):
'formats': [{
'url': url,
'ext': 'flv',
- 'format_id': compat_urlparse.urlparse(url).scheme,
+ 'format_id': urllib.parse.urlparse(url).scheme,
}],
}
diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py
index bb1dbbaad..6877e1a3f 100644
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@@ -1,19 +1,20 @@
import base64
-import re
import json
-import zlib
-
+import re
+import urllib.request
import xml.etree.ElementTree
+import zlib
from hashlib import sha1
-from math import pow, sqrt, floor
+from math import floor, pow, sqrt
+
from .common import InfoExtractor
from .vrv import VRVBaseIE
+from ..aes import aes_cbc_decrypt
from ..compat import (
compat_b64decode,
compat_etree_fromstring,
compat_str,
compat_urllib_parse_urlencode,
- compat_urllib_request,
compat_urlparse,
)
from ..utils import (
@@ -22,8 +23,8 @@ from ..utils import (
extract_attributes,
float_or_none,
format_field,
- intlist_to_bytes,
int_or_none,
+ intlist_to_bytes,
join_nonempty,
lowercase_escape,
merge_dicts,
@@ -34,9 +35,6 @@ from ..utils import (
try_get,
xpath_text,
)
-from ..aes import (
- aes_cbc_decrypt,
-)
class CrunchyrollBaseIE(InfoExtractor):
@@ -259,7 +257,7 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVBaseIE):
}
def _download_webpage(self, url_or_request, *args, **kwargs):
- request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
+ request = (url_or_request if isinstance(url_or_request, urllib.request.Request)
else sanitized_Request(url_or_request))
# Accept-Language must be set explicitly to accept any language to avoid issues
# similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
@@ -728,11 +726,12 @@ class CrunchyrollBetaBaseIE(CrunchyrollBaseIE):
headers={
'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
})
- bucket = policy_response['cms']['bucket']
+ cms = traverse_obj(policy_response, 'cms_beta', 'cms')
+ bucket = cms['bucket']
params = {
- 'Policy': policy_response['cms']['policy'],
- 'Signature': policy_response['cms']['signature'],
- 'Key-Pair-Id': policy_response['cms']['key_pair_id']
+ 'Policy': cms['policy'],
+ 'Signature': cms['signature'],
+ 'Key-Pair-Id': cms['key_pair_id']
}
locale = traverse_obj(initial_state, ('localization', 'locale'))
if locale:
diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py
index 5b76b29ff..a105b6ce2 100644
--- a/yt_dlp/extractor/curiositystream.py
+++ b/yt_dlp/extractor/curiositystream.py
@@ -1,12 +1,8 @@
import re
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- urlencode_postdata,
- compat_str,
- ExtractorError,
-)
+from ..compat import compat_str
+from ..utils import ExtractorError, int_or_none, urlencode_postdata
class CuriosityStreamBaseIE(InfoExtractor):
@@ -23,6 +19,11 @@ class CuriosityStreamBaseIE(InfoExtractor):
def _call_api(self, path, video_id, query=None):
headers = {}
+ if not self._auth_token:
+ auth_cookie = self._get_cookies('https://curiositystream.com').get('auth_token')
+ if auth_cookie:
+ self.write_debug('Obtained auth_token cookie')
+ self._auth_token = auth_cookie.value
if self._auth_token:
headers['X-Auth-Token'] = self._auth_token
result = self._download_json(
@@ -45,7 +46,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
IE_NAME = 'curiositystream'
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
_TESTS = [{
- 'url': 'https://app.curiositystream.com/video/2',
+ 'url': 'http://app.curiositystream.com/video/2',
'info_dict': {
'id': '2',
'ext': 'mp4',
diff --git a/yt_dlp/extractor/cwtv.py b/yt_dlp/extractor/cwtv.py
index 07239f39c..9b83264ee 100644
--- a/yt_dlp/extractor/cwtv.py
+++ b/yt_dlp/extractor/cwtv.py
@@ -91,4 +91,5 @@ class CWTVIE(InfoExtractor):
'timestamp': parse_iso8601(video_data.get('start_time')),
'age_limit': parse_age_limit(video_data.get('rating')),
'ie_key': 'ThePlatform',
+ 'thumbnail': video_data.get('large_thumbnail')
}
diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py
index 3b090d5e0..46438891f 100644
--- a/yt_dlp/extractor/dailymotion.py
+++ b/yt_dlp/extractor/dailymotion.py
@@ -5,13 +5,15 @@ import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
age_restricted,
clean_html,
- ExtractorError,
int_or_none,
- OnDemandPagedList,
+ traverse_obj,
try_get,
unescapeHTML,
+ unsmuggle_url,
urlencode_postdata,
)
@@ -220,6 +222,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
return urls
def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url)
video_id, playlist_id = self._match_valid_url(url).groups()
if playlist_id:
@@ -252,7 +255,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
metadata = self._download_json(
'https://www.dailymotion.com/player/metadata/video/' + xid,
xid, 'Downloading metadata JSON',
- query={'app': 'com.dailymotion.neon'})
+ query=traverse_obj(smuggled_data, 'query') or {'app': 'com.dailymotion.neon'})
error = metadata.get('error')
if error:
diff --git a/yt_dlp/extractor/dailywire.py b/yt_dlp/extractor/dailywire.py
new file mode 100644
index 000000000..1f27797ad
--- /dev/null
+++ b/yt_dlp/extractor/dailywire.py
@@ -0,0 +1,114 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ join_nonempty,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class DailyWireBaseIE(InfoExtractor):
+ _JSON_PATH = {
+ 'episode': ('props', 'pageProps', 'episodeData', 'episode'),
+ 'videos': ('props', 'pageProps', 'videoData', 'video'),
+ 'podcasts': ('props', 'pageProps', 'episode'),
+ }
+
+ def _get_json(self, url):
+ sites_type, slug = self._match_valid_url(url).group('sites_type', 'id')
+ json_data = self._search_nextjs_data(self._download_webpage(url, slug), slug)
+ return slug, traverse_obj(json_data, self._JSON_PATH[sites_type])
+
+
+class DailyWireIE(DailyWireBaseIE):
+ _VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>episode|videos)/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.dailywire.com/episode/1-fauci',
+ 'info_dict': {
+ 'id': 'ckzsl50xnqpy30850in3v4bu7',
+ 'ext': 'mp4',
+ 'display_id': '1-fauci',
+ 'title': '1. Fauci',
+ 'description': 'md5:9df630347ef85081b7e97dd30bc22853',
+ 'thumbnail': 'https://daily-wire-production.imgix.net/episodes/ckzsl50xnqpy30850in3v4bu7/ckzsl50xnqpy30850in3v4bu7-1648237399554.jpg',
+ 'creator': 'Caroline Roberts',
+ 'series_id': 'ckzplm0a097fn0826r2vc3j7h',
+ 'series': 'China: The Enemy Within',
+ }
+ }, {
+ 'url': 'https://www.dailywire.com/episode/ep-124-bill-maher',
+ 'info_dict': {
+ 'id': 'cl0ngbaalplc80894sfdo9edf',
+ 'ext': 'mp3',
+ 'display_id': 'ep-124-bill-maher',
+ 'title': 'Ep. 124 - Bill Maher',
+ 'thumbnail': 'https://daily-wire-production.imgix.net/episodes/cl0ngbaalplc80894sfdo9edf/cl0ngbaalplc80894sfdo9edf-1647065568518.jpg',
+ 'creator': 'Caroline Roberts',
+ 'description': 'md5:adb0de584bcfa9c41374999d9e324e98',
+ 'series_id': 'cjzvep7270hp00786l9hwccob',
+ 'series': 'The Sunday Special',
+ }
+ }, {
+ 'url': 'https://www.dailywire.com/videos/the-hyperions',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ slug, episode_info = self._get_json(url)
+ urls = traverse_obj(
+ episode_info, (('segments', 'videoUrl'), ..., ('video', 'audio')), expected_type=url_or_none)
+
+ formats, subtitles = [], {}
+ for url in urls:
+ if determine_ext(url) != 'm3u8':
+ formats.append({'url': url})
+ continue
+ format_, subs_ = self._extract_m3u8_formats_and_subtitles(url, slug)
+ formats.extend(format_)
+ self._merge_subtitles(subs_, target=subtitles)
+ self._sort_formats(formats)
+ return {
+ 'id': episode_info['id'],
+ 'display_id': slug,
+ 'title': traverse_obj(episode_info, 'title', 'name'),
+ 'description': episode_info.get('description'),
+ 'creator': join_nonempty(('createdBy', 'firstName'), ('createdBy', 'lastName'), from_dict=episode_info, delim=' '),
+ 'duration': float_or_none(episode_info.get('duration')),
+ 'is_live': episode_info.get('isLive'),
+ 'thumbnail': traverse_obj(episode_info, 'thumbnail', 'image', expected_type=url_or_none),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'series_id': traverse_obj(episode_info, ('show', 'id')),
+ 'series': traverse_obj(episode_info, ('show', 'name')),
+ }
+
+
+class DailyWirePodcastIE(DailyWireBaseIE):
+ _VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>podcasts)/(?P<podcaster>[\w-]+/(?P<id>[\w-]+))'
+ _TESTS = [{
+ 'url': 'https://www.dailywire.com/podcasts/morning-wire/get-ready-for-recession-6-15-22',
+ 'info_dict': {
+ 'id': 'cl4f01d0w8pbe0a98ydd0cfn1',
+ 'ext': 'm4a',
+ 'display_id': 'get-ready-for-recession-6-15-22',
+ 'title': 'Get Ready for Recession | 6.15.22',
+ 'description': 'md5:c4afbadda4e1c38a4496f6d62be55634',
+ 'thumbnail': 'https://daily-wire-production.imgix.net/podcasts/ckx4otgd71jm508699tzb6hf4-1639506575562.jpg',
+ 'duration': 900.117667,
+ }
+ }]
+
+ def _real_extract(self, url):
+ slug, episode_info = self._get_json(url)
+ audio_id = traverse_obj(episode_info, 'audioMuxPlaybackId', 'VUsAipTrBVSgzw73SpC2DAJD401TYYwEp')
+
+ return {
+ 'id': episode_info['id'],
+ 'url': f'https://stream.media.dailywire.com/{audio_id}/audio.m4a',
+ 'display_id': slug,
+ 'title': episode_info.get('title'),
+ 'duration': float_or_none(episode_info.get('duration')),
+ 'thumbnail': episode_info.get('thumbnail'),
+ 'description': episode_info.get('description'),
+ }
diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py
index c891ad0a6..3813a51fe 100644
--- a/yt_dlp/extractor/digitalconcerthall.py
+++ b/yt_dlp/extractor/digitalconcerthall.py
@@ -86,7 +86,7 @@ class DigitalConcertHallIE(InfoExtractor):
})
m3u8_url = traverse_obj(
- stream_info, ('channel', lambda x: x.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
+ stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False)
self._sort_formats(formats)
diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py
index 6ac0c713a..0d12513b2 100644
--- a/yt_dlp/extractor/dropbox.py
+++ b/yt_dlp/extractor/dropbox.py
@@ -53,8 +53,8 @@ class DropboxIE(InfoExtractor):
else:
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
- json_string = self._html_search_regex(r'InitReact\.mountComponent\(.*?,\s*(\{.+\})\s*?\)', webpage, 'Info JSON')
- info_json = self._parse_json(json_string, video_id).get('props')
+ info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id,
+ contains_pattern=r'.+?"preview".+?', end_pattern=r'\)')['props']
transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py
index 475825eb8..e280b1c9f 100644
--- a/yt_dlp/extractor/dropout.py
+++ b/yt_dlp/extractor/dropout.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from .vimeo import VHXEmbedIE
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
get_element_by_class,
get_element_by_id,
get_elements_by_class,
@@ -96,11 +96,12 @@ class DropoutIE(InfoExtractor):
def _login(self, display_id):
username, password = self._get_login_info()
- if not (username and password):
- self.raise_login_required(method='password')
+ if not username:
+ return True
response = self._download_webpage(
- self._LOGIN_URL, display_id, note='Logging in', data=urlencode_postdata({
+ self._LOGIN_URL, display_id, note='Logging in', fatal=False,
+ data=urlencode_postdata({
'email': username,
'password': password,
'authenticity_token': self._get_authenticity_token(display_id),
@@ -110,19 +111,25 @@ class DropoutIE(InfoExtractor):
user_has_subscription = self._search_regex(
r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none')
if user_has_subscription.lower() == 'true':
- return response
+ return
elif user_has_subscription.lower() == 'false':
- raise ExtractorError('Account is not subscribed')
+ return 'Account is not subscribed'
else:
- raise ExtractorError('Incorrect username/password')
+ return 'Incorrect username/password'
def _real_extract(self, url):
display_id = self._match_id(url)
- try:
- self._login(display_id)
- webpage = self._download_webpage(url, display_id, note='Downloading video webpage')
- finally:
- self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out', fatal=False)
+
+ webpage = None
+ if self._get_cookies('https://www.dropout.tv').get('_session'):
+ webpage = self._download_webpage(url, display_id)
+ if not webpage or '<div id="watch-unauthorized"' in webpage:
+ login_err = self._login(display_id)
+ webpage = self._download_webpage(url, display_id)
+ if login_err and '<div id="watch-unauthorized"' in webpage:
+ if login_err is True:
+ self.raise_login_required(method='any')
+ raise ExtractorError(login_err, expected=True)
embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
thumbnail = self._og_search_thumbnail(webpage)
@@ -137,7 +144,7 @@ class DropoutIE(InfoExtractor):
return {
'_type': 'url_transparent',
'ie_key': VHXEmbedIE.ie_key(),
- 'url': embed_url,
+ 'url': VHXEmbedIE._smuggle_referrer(embed_url, 'https://www.dropout.tv'),
'id': self._search_regex(r'embed\.vhx\.tv/videos/(.+?)\?', embed_url, 'id'),
'display_id': display_id,
'title': title,
diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py
index 24403842d..fb0546cae 100644
--- a/yt_dlp/extractor/duboku.py
+++ b/yt_dlp/extractor/duboku.py
@@ -51,31 +51,39 @@ def _get_element_by_tag_and_attrib(html, tag=None, attribute=None, value=None, e
class DubokuIE(InfoExtractor):
IE_NAME = 'duboku'
- IE_DESC = 'www.duboku.co'
+ IE_DESC = 'www.duboku.io'
- _VALID_URL = r'(?:https?://[^/]+\.duboku\.co/vodplay/)(?P<id>[0-9]+-[0-9-]+)\.html.*'
+ _VALID_URL = r'(?:https?://[^/]+\.duboku\.io/vodplay/)(?P<id>[0-9]+-[0-9-]+)\.html.*'
_TESTS = [{
- 'url': 'https://www.duboku.co/vodplay/1575-1-1.html',
+ 'url': 'https://w.duboku.io/vodplay/1575-1-1.html',
'info_dict': {
'id': '1575-1-1',
- 'ext': 'ts',
+ 'ext': 'mp4',
'series': '白色月光',
'title': 'contains:白色月光',
'season_number': 1,
'episode_number': 1,
+ 'season': 'Season 1',
+ 'episode_id': '1',
+ 'season_id': '1',
+ 'episode': 'Episode 1',
},
'params': {
'skip_download': 'm3u8 download',
},
}, {
- 'url': 'https://www.duboku.co/vodplay/1588-1-1.html',
+ 'url': 'https://w.duboku.io/vodplay/1588-1-1.html',
'info_dict': {
'id': '1588-1-1',
- 'ext': 'ts',
+ 'ext': 'mp4',
'series': '亲爱的自己',
- 'title': 'contains:预告片',
+ 'title': 'contains:第1集',
'season_number': 1,
'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'season': 'Season 1',
+ 'episode_id': '1',
+ 'season_id': '1',
},
'params': {
'skip_download': 'm3u8 download',
@@ -91,7 +99,7 @@ class DubokuIE(InfoExtractor):
season_id = temp[1]
episode_id = temp[2]
- webpage_url = 'https://www.duboku.co/vodplay/%s.html' % video_id
+ webpage_url = 'https://w.duboku.io/vodplay/%s.html' % video_id
webpage_html = self._download_webpage(webpage_url, video_id)
# extract video url
@@ -124,12 +132,13 @@ class DubokuIE(InfoExtractor):
data_from = player_data.get('from')
# if it is an embedded iframe, maybe it's an external source
+ headers = {'Referer': webpage_url}
if data_from == 'iframe':
# use _type url_transparent to retain the meaningful details
# of the video.
return {
'_type': 'url_transparent',
- 'url': smuggle_url(data_url, {'http_headers': {'Referer': webpage_url}}),
+ 'url': smuggle_url(data_url, {'http_headers': headers}),
'id': video_id,
'title': title,
'series': series_title,
@@ -139,7 +148,7 @@ class DubokuIE(InfoExtractor):
'episode_id': episode_id,
}
- formats = self._extract_m3u8_formats(data_url, video_id, 'mp4')
+ formats = self._extract_m3u8_formats(data_url, video_id, 'mp4', headers=headers)
return {
'id': video_id,
@@ -150,36 +159,29 @@ class DubokuIE(InfoExtractor):
'episode_number': int_or_none(episode_id),
'episode_id': episode_id,
'formats': formats,
- 'http_headers': {'Referer': 'https://www.duboku.co/static/player/videojs.html'}
+ 'http_headers': headers
}
class DubokuPlaylistIE(InfoExtractor):
IE_NAME = 'duboku:list'
- IE_DESC = 'www.duboku.co entire series'
+ IE_DESC = 'www.duboku.io entire series'
- _VALID_URL = r'(?:https?://[^/]+\.duboku\.co/voddetail/)(?P<id>[0-9]+)\.html.*'
+ _VALID_URL = r'(?:https?://[^/]+\.duboku\.io/voddetail/)(?P<id>[0-9]+)\.html.*'
_TESTS = [{
- 'url': 'https://www.duboku.co/voddetail/1575.html',
+ 'url': 'https://w.duboku.io/voddetail/1575.html',
'info_dict': {
'id': 'startswith:1575',
'title': '白色月光',
},
'playlist_count': 12,
}, {
- 'url': 'https://www.duboku.co/voddetail/1554.html',
+ 'url': 'https://w.duboku.io/voddetail/1554.html',
'info_dict': {
'id': 'startswith:1554',
'title': '以家人之名',
},
'playlist_mincount': 30,
- }, {
- 'url': 'https://www.duboku.co/voddetail/1554.html#playlist2',
- 'info_dict': {
- 'id': '1554#playlist2',
- 'title': '以家人之名',
- },
- 'playlist_mincount': 27,
}]
def _real_extract(self, url):
@@ -189,7 +191,7 @@ class DubokuPlaylistIE(InfoExtractor):
series_id = mobj.group('id')
fragment = compat_urlparse.urlparse(url).fragment
- webpage_url = 'https://www.duboku.co/voddetail/%s.html' % series_id
+ webpage_url = 'https://w.duboku.io/voddetail/%s.html' % series_id
webpage_html = self._download_webpage(webpage_url, series_id)
# extract title
@@ -234,6 +236,6 @@ class DubokuPlaylistIE(InfoExtractor):
# return url results
return self.playlist_result([
self.url_result(
- compat_urlparse.urljoin('https://www.duboku.co', x['href']),
+ compat_urlparse.urljoin('https://w.duboku.io', x['href']),
ie=DubokuIE.ie_key(), video_title=x.get('title'))
for x in playlist], series_id + '#' + playlist_id, title)
diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py
index 507f0a5c1..276543653 100644
--- a/yt_dlp/extractor/ertgr.py
+++ b/yt_dlp/extractor/ertgr.py
@@ -119,7 +119,7 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
class ERTFlixIE(ERTFlixBaseIE):
IE_NAME = 'ertflix'
IE_DESC = 'ERTFLIX videos'
- _VALID_URL = r'https?://www\.ertflix\.gr/(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
+ _VALID_URL = r'https?://www\.ertflix\.gr/(?:[^/]+/)?(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
_TESTS = [{
'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates',
'md5': '6479d5e60fd7e520b07ba5411dcdd6e7',
@@ -171,6 +171,9 @@ class ERTFlixIE(ERTFlixBaseIE):
'title': 'Το δίκτυο',
},
'playlist_mincount': 9,
+ }, {
+ 'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari',
+ 'only_matching': True,
}]
def _extract_episode(self, episode):
diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py
index 8fad70e6b..451148636 100644
--- a/yt_dlp/extractor/espn.py
+++ b/yt_dlp/extractor/espn.py
@@ -1,8 +1,11 @@
+import base64
+import json
import re
+import urllib.parse
+from .adobepass import AdobePassIE
from .common import InfoExtractor
from .once import OnceIE
-from ..compat import compat_str
from ..utils import (
determine_ext,
dict_get,
@@ -24,7 +27,6 @@ class ESPNIE(OnceIE):
(?:
(?:
video/(?:clip|iframe/twitter)|
- watch/player
)
(?:
.*?\?.*?\bid=|
@@ -47,6 +49,8 @@ class ESPNIE(OnceIE):
'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
'timestamp': 1390936111,
'upload_date': '20140128',
+ 'duration': 1302,
+ 'thumbnail': r're:https://.+\.jpg',
},
'params': {
'skip_download': True,
@@ -72,15 +76,6 @@ class ESPNIE(OnceIE):
'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774',
'only_matching': True,
}, {
- 'url': 'http://www.espn.com/watch/player?id=19141491',
- 'only_matching': True,
- }, {
- 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875',
- 'only_matching': True,
- }, {
- 'url': 'http://www.espn.com/watch/player/_/id/19141491',
- 'only_matching': True,
- }, {
'url': 'http://www.espn.com/video/clip?id=10365079',
'only_matching': True,
}, {
@@ -98,7 +93,13 @@ class ESPNIE(OnceIE):
}, {
'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings',
'only_matching': True,
- }]
+ }, {
+ 'url': 'http://www.espn.com/watch/player?id=19141491',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875',
+ 'only_matching': True,
+ }, ]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -116,7 +117,7 @@ class ESPNIE(OnceIE):
for source_id, source in source.items():
if source_id == 'alert':
continue
- elif isinstance(source, compat_str):
+ elif isinstance(source, str):
extract_source(source, base_source_id)
elif isinstance(source, dict):
traverse_source(
@@ -196,7 +197,7 @@ class ESPNArticleIE(InfoExtractor):
@classmethod
def suitable(cls, url):
- return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
+ return False if (ESPNIE.suitable(url) or WatchESPNIE.suitable(url)) else super().suitable(url)
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -277,3 +278,119 @@ class ESPNCricInfoIE(InfoExtractor):
'formats': formats,
'subtitles': subtitles,
}
+
+
+class WatchESPNIE(AdobePassIE):
+ _VALID_URL = r'https://www.espn.com/watch/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
+ _TESTS = [{
+ 'url': 'https://www.espn.com/watch/player/_/id/ba7d17da-453b-4697-bf92-76a99f61642b',
+ 'info_dict': {
+ 'id': 'ba7d17da-453b-4697-bf92-76a99f61642b',
+ 'ext': 'mp4',
+ 'title': 'Serbia vs. Turkey',
+ 'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/ba7d17da-453b-4697-bf92-76a99f61642b/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.espn.com/watch/player/_/id/4e9b5bd1-4ceb-4482-9d28-1dd5f30d2f34',
+ 'info_dict': {
+ 'id': '4e9b5bd1-4ceb-4482-9d28-1dd5f30d2f34',
+ 'ext': 'mp4',
+ 'title': 'Real Madrid vs. Real Betis (LaLiga)',
+ 'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ _API_KEY = 'ZXNwbiZicm93c2VyJjEuMC4w.ptUt7QxsteaRruuPmGZFaJByOoqKvDP2a5YkInHrc7c'
+
+ def _call_bamgrid_api(self, path, video_id, payload=None, headers={}):
+ if 'Authorization' not in headers:
+ headers['Authorization'] = f'Bearer {self._API_KEY}'
+ parse = urllib.parse.urlencode if path == 'token' else json.dumps
+ return self._download_json(
+ f'https://espn.api.edge.bamgrid.com/{path}', video_id, headers=headers, data=parse(payload).encode())
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ f'https://watch-cdn.product.api.espn.com/api/product/v3/watchespn/web/playback/event?id={video_id}',
+ video_id)['playbackState']
+
+ # ESPN+ subscription required, through cookies
+ if 'DTC' in video_data.get('sourceId'):
+ cookie = self._get_cookies(url).get('ESPN-ONESITE.WEB-PROD.token')
+ if not cookie:
+ self.raise_login_required(method='cookies')
+
+ assertion = self._call_bamgrid_api(
+ 'devices', video_id,
+ headers={'Content-Type': 'application/json; charset=UTF-8'},
+ payload={
+ 'deviceFamily': 'android',
+ 'applicationRuntime': 'android',
+ 'deviceProfile': 'tv',
+ 'attributes': {},
+ })['assertion']
+ token = self._call_bamgrid_api(
+ 'token', video_id, payload={
+ 'subject_token': assertion,
+ 'subject_token_type': 'urn:bamtech:params:oauth:token-type:device',
+ 'platform': 'android',
+ 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange'
+ })['access_token']
+
+ assertion = self._call_bamgrid_api(
+ 'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]},
+ headers={
+ 'Authorization': token,
+ 'Content-Type': 'application/json; charset=UTF-8'
+ })['assertion']
+ token = self._call_bamgrid_api(
+ 'token', video_id, payload={
+ 'subject_token': assertion,
+ 'subject_token_type': 'urn:bamtech:params:oauth:token-type:account',
+ 'platform': 'android',
+ 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange'
+ })['access_token']
+
+ playback = self._download_json(
+ video_data['videoHref'].format(scenario='browser~ssai'), video_id,
+ headers={
+ 'Accept': 'application/vnd.media-service+json; version=5',
+ 'Authorization': token
+ })
+ m3u8_url, headers = playback['stream']['complete'][0]['url'], {'authorization': token}
+
+ # No login required
+ elif video_data.get('sourceId') == 'ESPN_FREE':
+ asset = self._download_json(
+ f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb',
+ video_id)
+ m3u8_url, headers = asset['stream'], {}
+
+ # TV Provider required
+ else:
+ resource = self._get_mvpd_resource('ESPN', video_data['name'], video_id, None)
+ auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource).encode()
+
+ asset = self._download_json(
+ f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb',
+ video_id, data=f'adobeToken={urllib.parse.quote_plus(base64.b64encode(auth))}&drmSupport=HLS'.encode())
+ m3u8_url, headers = asset['stream'], {}
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_data.get('name'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': video_data.get('posterHref'),
+ 'http_headers': headers,
+ }
diff --git a/yt_dlp/extractor/expressen.py b/yt_dlp/extractor/expressen.py
index a1b8e9bc9..5aba21ba7 100644
--- a/yt_dlp/extractor/expressen.py
+++ b/yt_dlp/extractor/expressen.py
@@ -19,9 +19,10 @@ class ExpressenIE(InfoExtractor):
'''
_TESTS = [{
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
- 'md5': '2fbbe3ca14392a6b1b36941858d33a45',
+ 'md5': 'deb2ca62e7b1dcd19fa18ba37523f66e',
'info_dict': {
- 'id': '8690962',
+ 'id': 'ba90f5a9-78d1-4511-aa02-c177b9c99136',
+ 'display_id': 'ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden',
'ext': 'mp4',
'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden',
'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba',
@@ -64,7 +65,7 @@ class ExpressenIE(InfoExtractor):
display_id, transform_source=unescapeHTML)
info = extract_data('video-tracking-info')
- video_id = info['videoId']
+ video_id = info['contentId']
data = extract_data('article-data')
stream = data['stream']
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index 9c5a5f482..32818a024 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -1,2175 +1,23 @@
-# flake8: noqa: F401
+import contextlib
+import os
-from .abc import (
- ABCIE,
- ABCIViewIE,
- ABCIViewShowSeriesIE,
-)
-from .abcnews import (
- AbcNewsIE,
- AbcNewsVideoIE,
-)
-from .abcotvs import (
- ABCOTVSIE,
- ABCOTVSClipsIE,
-)
-from .abematv import (
- AbemaTVIE,
- AbemaTVTitleIE,
-)
-from .academicearth import AcademicEarthCourseIE
-from .acast import (
- ACastIE,
- ACastChannelIE,
-)
-from .adn import ADNIE
-from .adobeconnect import AdobeConnectIE
-from .adobetv import (
- AdobeTVEmbedIE,
- AdobeTVIE,
- AdobeTVShowIE,
- AdobeTVChannelIE,
- AdobeTVVideoIE,
-)
-from .adultswim import AdultSwimIE
-from .aenetworks import (
- AENetworksIE,
- AENetworksCollectionIE,
- AENetworksShowIE,
- HistoryTopicIE,
- HistoryPlayerIE,
- BiographyIE,
-)
-from .afreecatv import (
- AfreecaTVIE,
- AfreecaTVLiveIE,
- AfreecaTVUserIE,
-)
-from .airmozilla import AirMozillaIE
-from .aljazeera import AlJazeeraIE
-from .alphaporno import AlphaPornoIE
-from .amara import AmaraIE
-from .alura import (
- AluraIE,
- AluraCourseIE
-)
-from .amcnetworks import AMCNetworksIE
-from .animelab import (
- AnimeLabIE,
- AnimeLabShowsIE,
-)
-from .amazon import AmazonStoreIE
-from .americastestkitchen import (
- AmericasTestKitchenIE,
- AmericasTestKitchenSeasonIE,
-)
-from .animeondemand import AnimeOnDemandIE
-from .anvato import AnvatoIE
-from .aol import AolIE
-from .allocine import AllocineIE
-from .aliexpress import AliExpressLiveIE
-from .alsace20tv import (
- Alsace20TVIE,
- Alsace20TVEmbedIE,
-)
-from .apa import APAIE
-from .aparat import AparatIE
-from .appleconnect import AppleConnectIE
-from .appletrailers import (
- AppleTrailersIE,
- AppleTrailersSectionIE,
-)
-from .applepodcasts import ApplePodcastsIE
-from .archiveorg import (
- ArchiveOrgIE,
- YoutubeWebArchiveIE,
-)
-from .arcpublishing import ArcPublishingIE
-from .arkena import ArkenaIE
-from .ard import (
- ARDBetaMediathekIE,
- ARDIE,
- ARDMediathekIE,
-)
-from .arte import (
- ArteTVIE,
- ArteTVEmbedIE,
- ArteTVPlaylistIE,
- ArteTVCategoryIE,
-)
-from .arnes import ArnesIE
-from .asiancrush import (
- AsianCrushIE,
- AsianCrushPlaylistIE,
-)
-from .atresplayer import AtresPlayerIE
-from .atttechchannel import ATTTechChannelIE
-from .atvat import ATVAtIE
-from .audimedia import AudiMediaIE
-from .audioboom import AudioBoomIE
-from .audiomack import AudiomackIE, AudiomackAlbumIE
-from .audius import (
- AudiusIE,
- AudiusTrackIE,
- AudiusPlaylistIE,
- AudiusProfileIE,
-)
-from .awaan import (
- AWAANIE,
- AWAANVideoIE,
- AWAANLiveIE,
- AWAANSeasonIE,
-)
-from .azmedien import AZMedienIE
-from .baidu import BaiduVideoIE
-from .banbye import (
- BanByeIE,
- BanByeChannelIE,
-)
-from .bandaichannel import BandaiChannelIE
-from .bandcamp import (
- BandcampIE,
- BandcampAlbumIE,
- BandcampWeeklyIE,
- BandcampUserIE,
-)
-from .bannedvideo import BannedVideoIE
-from .bbc import (
- BBCCoUkIE,
- BBCCoUkArticleIE,
- BBCCoUkIPlayerEpisodesIE,
- BBCCoUkIPlayerGroupIE,
- BBCCoUkPlaylistIE,
- BBCIE,
-)
-from .beeg import BeegIE
-from .behindkink import BehindKinkIE
-from .bellmedia import BellMediaIE
-from .beatport import BeatportIE
-from .bet import BetIE
-from .bfi import BFIPlayerIE
-from .bfmtv import (
- BFMTVIE,
- BFMTVLiveIE,
- BFMTVArticleIE,
-)
-from .bibeltv import BibelTVIE
-from .bigflix import BigflixIE
-from .bigo import BigoIE
-from .bild import BildIE
-from .bilibili import (
- BiliBiliIE,
- BiliBiliSearchIE,
- BilibiliCategoryIE,
- BiliBiliBangumiIE,
- BilibiliAudioIE,
- BilibiliAudioAlbumIE,
- BiliBiliPlayerIE,
- BilibiliChannelIE,
- BiliIntlIE,
- BiliIntlSeriesIE,
- BiliLiveIE,
-)
-from .biobiochiletv import BioBioChileTVIE
-from .bitchute import (
- BitChuteIE,
- BitChuteChannelIE,
-)
-from .bitwave import (
- BitwaveReplayIE,
- BitwaveStreamIE,
-)
-from .biqle import BIQLEIE
-from .blackboardcollaborate import BlackboardCollaborateIE
-from .bleacherreport import (
- BleacherReportIE,
- BleacherReportCMSIE,
-)
-from .blogger import BloggerIE
-from .bloomberg import BloombergIE
-from .bokecc import BokeCCIE
-from .bongacams import BongaCamsIE
-from .bostonglobe import BostonGlobeIE
-from .box import BoxIE
-from .bpb import BpbIE
-from .br import (
- BRIE,
- BRMediathekIE,
-)
-from .bravotv import BravoTVIE
-from .breakcom import BreakIE
-from .breitbart import BreitBartIE
-from .brightcove import (
- BrightcoveLegacyIE,
- BrightcoveNewIE,
-)
-from .businessinsider import BusinessInsiderIE
-from .buzzfeed import BuzzFeedIE
-from .byutv import BYUtvIE
-from .c56 import C56IE
-from .cableav import CableAVIE
-from .callin import CallinIE
-from .caltrans import CaltransIE
-from .cam4 import CAM4IE
-from .camdemy import (
- CamdemyIE,
- CamdemyFolderIE
-)
-from .cammodels import CamModelsIE
-from .camwithher import CamWithHerIE
-from .canalalpha import CanalAlphaIE
-from .canalplus import CanalplusIE
-from .canalc2 import Canalc2IE
-from .canvas import (
- CanvasIE,
- CanvasEenIE,
- VrtNUIE,
- DagelijkseKostIE,
-)
-from .carambatv import (
- CarambaTVIE,
- CarambaTVPageIE,
-)
-from .cartoonnetwork import CartoonNetworkIE
-from .cbc import (
- CBCIE,
- CBCPlayerIE,
- CBCGemIE,
- CBCGemPlaylistIE,
- CBCGemLiveIE,
-)
-from .cbs import CBSIE
-from .cbslocal import (
- CBSLocalIE,
- CBSLocalArticleIE,
-)
-from .cbsinteractive import CBSInteractiveIE
-from .cbsnews import (
- CBSNewsEmbedIE,
- CBSNewsIE,
- CBSNewsLiveVideoIE,
-)
-from .cbssports import (
- CBSSportsEmbedIE,
- CBSSportsIE,
- TwentyFourSevenSportsIE,
-)
-from .ccc import (
- CCCIE,
- CCCPlaylistIE,
-)
-from .ccma import CCMAIE
-from .cctv import CCTVIE
-from .cda import CDAIE
-from .ceskatelevize import CeskaTelevizeIE
-from .cgtn import CGTNIE
-from .channel9 import Channel9IE
-from .charlierose import CharlieRoseIE
-from .chaturbate import ChaturbateIE
-from .chilloutzone import ChilloutzoneIE
-from .chingari import (
- ChingariIE,
- ChingariUserIE,
-)
-from .chirbit import (
- ChirbitIE,
- ChirbitProfileIE,
-)
-from .cinchcast import CinchcastIE
-from .cinemax import CinemaxIE
-from .ciscolive import (
- CiscoLiveSessionIE,
- CiscoLiveSearchIE,
-)
-from .ciscowebex import CiscoWebexIE
-from .cjsw import CJSWIE
-from .cliphunter import CliphunterIE
-from .clippit import ClippitIE
-from .cliprs import ClipRsIE
-from .clipsyndicate import ClipsyndicateIE
-from .closertotruth import CloserToTruthIE
-from .cloudflarestream import CloudflareStreamIE
-from .cloudy import CloudyIE
-from .clubic import ClubicIE
-from .clyp import ClypIE
-from .cmt import CMTIE
-from .cnbc import (
- CNBCIE,
- CNBCVideoIE,
-)
-from .cnn import (
- CNNIE,
- CNNBlogsIE,
- CNNArticleIE,
-)
-from .coub import CoubIE
-from .comedycentral import (
- ComedyCentralIE,
- ComedyCentralTVIE,
-)
-from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
-from .commonprotocols import (
- MmsIE,
- RtmpIE,
- ViewSourceIE,
-)
-from .condenast import CondeNastIE
-from .contv import CONtvIE
-from .corus import CorusIE
-from .cpac import (
- CPACIE,
- CPACPlaylistIE,
-)
-from .cozytv import CozyTVIE
-from .cracked import CrackedIE
-from .crackle import CrackleIE
-from .craftsy import CraftsyIE
-from .crooksandliars import CrooksAndLiarsIE
-from .crowdbunker import (
- CrowdBunkerIE,
- CrowdBunkerChannelIE,
-)
-from .crunchyroll import (
- CrunchyrollIE,
- CrunchyrollShowPlaylistIE,
- CrunchyrollBetaIE,
- CrunchyrollBetaShowIE,
-)
-from .cspan import CSpanIE, CSpanCongressIE
-from .ctsnews import CtsNewsIE
-from .ctv import CTVIE
-from .ctvnews import CTVNewsIE
-from .cultureunplugged import CultureUnpluggedIE
-from .curiositystream import (
- CuriosityStreamIE,
- CuriosityStreamCollectionsIE,
- CuriosityStreamSeriesIE,
-)
-from .cwtv import CWTVIE
-from .cybrary import (
- CybraryIE,
- CybraryCourseIE
-)
-from .daftsex import DaftsexIE
-from .dailymail import DailyMailIE
-from .dailymotion import (
- DailymotionIE,
- DailymotionPlaylistIE,
- DailymotionUserIE,
-)
-from .damtomo import (
- DamtomoRecordIE,
- DamtomoVideoIE,
-)
-from .daum import (
- DaumIE,
- DaumClipIE,
- DaumPlaylistIE,
- DaumUserIE,
-)
-from .daystar import DaystarClipIE
-from .dbtv import DBTVIE
-from .dctp import DctpTvIE
-from .deezer import (
- DeezerPlaylistIE,
- DeezerAlbumIE,
-)
-from .democracynow import DemocracynowIE
-from .dfb import DFBIE
-from .dhm import DHMIE
-from .digg import DiggIE
-from .dotsub import DotsubIE
-from .douyutv import (
- DouyuShowIE,
- DouyuTVIE,
-)
-from .dplay import (
- DPlayIE,
- DiscoveryPlusIE,
- HGTVDeIE,
- GoDiscoveryIE,
- TravelChannelIE,
- CookingChannelIE,
- HGTVUsaIE,
- FoodNetworkIE,
- InvestigationDiscoveryIE,
- DestinationAmericaIE,
- AmHistoryChannelIE,
- ScienceChannelIE,
- DIYNetworkIE,
- DiscoveryLifeIE,
- AnimalPlanetIE,
- TLCIE,
- DiscoveryPlusIndiaIE,
- DiscoveryNetworksDeIE,
- DiscoveryPlusItalyIE,
- DiscoveryPlusItalyShowIE,
- DiscoveryPlusIndiaShowIE,
-)
-from .dreisat import DreiSatIE
-from .drbonanza import DRBonanzaIE
-from .drtuber import DrTuberIE
-from .drtv import (
- DRTVIE,
- DRTVLiveIE,
-)
-from .dtube import DTubeIE
-from .dvtv import DVTVIE
-from .duboku import (
- DubokuIE,
- DubokuPlaylistIE
-)
-from .dumpert import DumpertIE
-from .defense import DefenseGouvFrIE
-from .digitalconcerthall import DigitalConcertHallIE
-from .discovery import DiscoveryIE
-from .disney import DisneyIE
-from .dispeak import DigitallySpeakingIE
-from .doodstream import DoodStreamIE
-from .dropbox import DropboxIE
-from .dropout import (
- DropoutSeasonIE,
- DropoutIE
-)
-from .dw import (
- DWIE,
- DWArticleIE,
-)
-from .eagleplatform import EaglePlatformIE
-from .ebaumsworld import EbaumsWorldIE
-from .echomsk import EchoMskIE
-from .egghead import (
- EggheadCourseIE,
- EggheadLessonIE,
-)
-from .ehow import EHowIE
-from .eighttracks import EightTracksIE
-from .einthusan import EinthusanIE
-from .eitb import EitbIE
-from .ellentube import (
- EllenTubeIE,
- EllenTubeVideoIE,
- EllenTubePlaylistIE,
-)
-from .elonet import ElonetIE
-from .elpais import ElPaisIE
-from .embedly import EmbedlyIE
-from .engadget import EngadgetIE
-from .epicon import (
- EpiconIE,
- EpiconSeriesIE,
-)
-from .eporner import EpornerIE
-from .eroprofile import (
- EroProfileIE,
- EroProfileAlbumIE,
-)
-from .ertgr import (
- ERTFlixCodenameIE,
- ERTFlixIE,
- ERTWebtvEmbedIE,
-)
-from .escapist import EscapistIE
-from .espn import (
- ESPNIE,
- ESPNArticleIE,
- FiveThirtyEightIE,
- ESPNCricInfoIE,
-)
-from .esri import EsriVideoIE
-from .europa import EuropaIE
-from .europeantour import EuropeanTourIE
-from .euscreen import EUScreenIE
-from .expotv import ExpoTVIE
-from .expressen import ExpressenIE
-from .extremetube import ExtremeTubeIE
-from .eyedotv import EyedoTVIE
-from .facebook import (
- FacebookIE,
- FacebookPluginsVideoIE,
- FacebookRedirectURLIE,
-)
-from .fancode import (
- FancodeVodIE,
- FancodeLiveIE
-)
+from ..utils import load_plugins
-from .faz import FazIE
-from .fc2 import (
- FC2IE,
- FC2EmbedIE,
- FC2LiveIE,
-)
-from .fczenit import FczenitIE
-from .fifa import FifaIE
-from .filmmodu import FilmmoduIE
-from .filmon import (
- FilmOnIE,
- FilmOnChannelIE,
-)
-from .filmweb import FilmwebIE
-from .firsttv import FirstTVIE
-from .fivetv import FiveTVIE
-from .flickr import FlickrIE
-from .folketinget import FolketingetIE
-from .footyroom import FootyRoomIE
-from .formula1 import Formula1IE
-from .fourtube import (
- FourTubeIE,
- PornTubeIE,
- PornerBrosIE,
- FuxIE,
-)
-from .fox import FOXIE
-from .fox9 import (
- FOX9IE,
- FOX9NewsIE,
-)
-from .foxgay import FoxgayIE
-from .foxnews import (
- FoxNewsIE,
- FoxNewsArticleIE,
-)
-from .foxsports import FoxSportsIE
-from .fptplay import FptplayIE
-from .franceculture import FranceCultureIE
-from .franceinter import FranceInterIE
-from .francetv import (
- FranceTVIE,
- FranceTVSiteIE,
- FranceTVInfoIE,
-)
-from .freesound import FreesoundIE
-from .freespeech import FreespeechIE
-from .frontendmasters import (
- FrontendMastersIE,
- FrontendMastersLessonIE,
- FrontendMastersCourseIE
-)
-from .fujitv import FujiTVFODPlus7IE
-from .funimation import (
- FunimationIE,
- FunimationPageIE,
- FunimationShowIE,
-)
-from .funk import FunkIE
-from .fusion import FusionIE
-from .gab import (
- GabTVIE,
- GabIE,
-)
-from .gaia import GaiaIE
-from .gameinformer import GameInformerIE
-from .gamejolt import (
- GameJoltIE,
- GameJoltUserIE,
- GameJoltGameIE,
- GameJoltGameSoundtrackIE,
- GameJoltCommunityIE,
- GameJoltSearchIE,
-)
-from .gamespot import GameSpotIE
-from .gamestar import GameStarIE
-from .gaskrank import GaskrankIE
-from .gazeta import GazetaIE
-from .gdcvault import GDCVaultIE
-from .gedidigital import GediDigitalIE
-from .generic import GenericIE
-from .gettr import (
- GettrIE,
- GettrStreamingIE,
-)
-from .gfycat import GfycatIE
-from .giantbomb import GiantBombIE
-from .giga import GigaIE
-from .glide import GlideIE
-from .globo import (
- GloboIE,
- GloboArticleIE,
-)
-from .go import GoIE
-from .godtube import GodTubeIE
-from .gofile import GofileIE
-from .golem import GolemIE
-from .goodgame import GoodGameIE
-from .googledrive import GoogleDriveIE
-from .googlepodcasts import (
- GooglePodcastsIE,
- GooglePodcastsFeedIE,
-)
-from .googlesearch import GoogleSearchIE
-from .gopro import GoProIE
-from .goshgay import GoshgayIE
-from .gotostage import GoToStageIE
-from .gputechconf import GPUTechConfIE
-from .gronkh import (
- GronkhIE,
- GronkhFeedIE,
- GronkhVodsIE
-)
-from .groupon import GrouponIE
-from .hbo import HBOIE
-from .hearthisat import HearThisAtIE
-from .heise import HeiseIE
-from .hellporno import HellPornoIE
-from .helsinki import HelsinkiIE
-from .hentaistigma import HentaiStigmaIE
-from .hgtv import HGTVComShowIE
-from .hketv import HKETVIE
-from .hidive import HiDiveIE
-from .historicfilms import HistoricFilmsIE
-from .hitbox import HitboxIE, HitboxLiveIE
-from .hitrecord import HitRecordIE
-from .hotnewhiphop import HotNewHipHopIE
-from .hotstar import (
- HotStarIE,
- HotStarPrefixIE,
- HotStarPlaylistIE,
- HotStarSeriesIE,
-)
-from .howcast import HowcastIE
-from .howstuffworks import HowStuffWorksIE
-from .hrfensehen import HRFernsehenIE
-from .hrti import (
- HRTiIE,
- HRTiPlaylistIE,
-)
-from .hse import (
- HSEShowIE,
- HSEProductIE,
-)
-from .huajiao import HuajiaoIE
-from .huya import HuyaLiveIE
-from .huffpost import HuffPostIE
-from .hungama import (
- HungamaIE,
- HungamaSongIE,
- HungamaAlbumPlaylistIE,
-)
-from .hypem import HypemIE
-from .icareus import IcareusIE
-from .ichinanalive import (
- IchinanaLiveIE,
- IchinanaLiveClipIE,
-)
-from .ign import (
- IGNIE,
- IGNVideoIE,
- IGNArticleIE,
-)
-from .iheart import (
- IHeartRadioIE,
- IHeartRadioPodcastIE,
-)
-from .imdb import (
- ImdbIE,
- ImdbListIE
-)
-from .imgur import (
- ImgurIE,
- ImgurAlbumIE,
- ImgurGalleryIE,
-)
-from .ina import InaIE
-from .inc import IncIE
-from .indavideo import IndavideoEmbedIE
-from .infoq import InfoQIE
-from .instagram import (
- InstagramIE,
- InstagramIOSIE,
- InstagramUserIE,
- InstagramTagIE,
- InstagramStoryIE,
-)
-from .internazionale import InternazionaleIE
-from .internetvideoarchive import InternetVideoArchiveIE
-from .iprima import (
- IPrimaIE,
- IPrimaCNNIE
-)
-from .iqiyi import (
- IqiyiIE,
- IqIE,
- IqAlbumIE
-)
+_LAZY_LOADER = False
+if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
+ with contextlib.suppress(ImportError):
+ from .lazy_extractors import * # noqa: F403
+ from .lazy_extractors import _ALL_CLASSES
+ _LAZY_LOADER = True
-from .itprotv import (
- ITProTVIE,
- ITProTVCourseIE
-)
+if not _LAZY_LOADER:
+ from ._extractors import * # noqa: F403
+ _ALL_CLASSES = [ # noqa: F811
+ klass
+ for name, klass in globals().items()
+ if name.endswith('IE') and name != 'GenericIE'
+ ]
+ _ALL_CLASSES.append(GenericIE) # noqa: F405
-from .itv import (
- ITVIE,
- ITVBTCCIE,
-)
-from .ivi import (
- IviIE,
- IviCompilationIE
-)
-from .ivideon import IvideonIE
-from .iwara import (
- IwaraIE,
- IwaraPlaylistIE,
- IwaraUserIE,
-)
-from .izlesene import IzleseneIE
-from .jable import (
- JableIE,
- JablePlaylistIE,
-)
-from .jamendo import (
- JamendoIE,
- JamendoAlbumIE,
-)
-from .jeuxvideo import JeuxVideoIE
-from .jove import JoveIE
-from .joj import JojIE
-from .jwplatform import JWPlatformIE
-from .kakao import KakaoIE
-from .kaltura import KalturaIE
-from .karaoketv import KaraoketvIE
-from .karrierevideos import KarriereVideosIE
-from .keezmovies import KeezMoviesIE
-from .kelbyone import KelbyOneIE
-from .ketnet import KetnetIE
-from .khanacademy import (
- KhanAcademyIE,
- KhanAcademyUnitIE,
-)
-from .kickstarter import KickStarterIE
-from .kinja import KinjaEmbedIE
-from .kinopoisk import KinoPoiskIE
-from .konserthusetplay import KonserthusetPlayIE
-from .koo import KooIE
-from .krasview import KrasViewIE
-from .ku6 import Ku6IE
-from .kusi import KUSIIE
-from .kuwo import (
- KuwoIE,
- KuwoAlbumIE,
- KuwoChartIE,
- KuwoSingerIE,
- KuwoCategoryIE,
- KuwoMvIE,
-)
-from .la7 import (
- LA7IE,
- LA7PodcastEpisodeIE,
- LA7PodcastIE,
-)
-from .laola1tv import (
- Laola1TvEmbedIE,
- Laola1TvIE,
- EHFTVIE,
- ITTFIE,
-)
-from .lastfm import (
- LastFMIE,
- LastFMPlaylistIE,
- LastFMUserIE,
-)
-from .lbry import (
- LBRYIE,
- LBRYChannelIE,
-)
-from .lci import LCIIE
-from .lcp import (
- LcpPlayIE,
- LcpIE,
-)
-from .lecture2go import Lecture2GoIE
-from .lecturio import (
- LecturioIE,
- LecturioCourseIE,
- LecturioDeCourseIE,
-)
-from .leeco import (
- LeIE,
- LePlaylistIE,
- LetvCloudIE,
-)
-from .lego import LEGOIE
-from .lemonde import LemondeIE
-from .lenta import LentaIE
-from .libraryofcongress import LibraryOfCongressIE
-from .libsyn import LibsynIE
-from .lifenews import (
- LifeNewsIE,
- LifeEmbedIE,
-)
-from .likee import (
- LikeeIE,
- LikeeUserIE
-)
-from .limelight import (
- LimelightMediaIE,
- LimelightChannelIE,
- LimelightChannelListIE,
-)
-from .line import (
- LineLiveIE,
- LineLiveChannelIE,
-)
-from .linkedin import (
- LinkedInIE,
- LinkedInLearningIE,
- LinkedInLearningCourseIE,
-)
-from .linuxacademy import LinuxAcademyIE
-from .litv import LiTVIE
-from .livejournal import LiveJournalIE
-from .livestream import (
- LivestreamIE,
- LivestreamOriginalIE,
- LivestreamShortenerIE,
-)
-from .lnkgo import (
- LnkGoIE,
- LnkIE,
-)
-from .localnews8 import LocalNews8IE
-from .lovehomeporn import LoveHomePornIE
-from .lrt import (
- LRTVODIE,
- LRTStreamIE
-)
-from .lynda import (
- LyndaIE,
- LyndaCourseIE
-)
-from .m6 import M6IE
-from .magentamusik360 import MagentaMusik360IE
-from .mailru import (
- MailRuIE,
- MailRuMusicIE,
- MailRuMusicSearchIE,
-)
-from .mainstreaming import MainStreamingIE
-from .malltv import MallTVIE
-from .mangomolo import (
- MangomoloVideoIE,
- MangomoloLiveIE,
-)
-from .manoto import (
- ManotoTVIE,
- ManotoTVShowIE,
- ManotoTVLiveIE,
-)
-from .manyvids import ManyVidsIE
-from .maoritv import MaoriTVIE
-from .markiza import (
- MarkizaIE,
- MarkizaPageIE,
-)
-from .massengeschmacktv import MassengeschmackTVIE
-from .masters import MastersIE
-from .matchtv import MatchTVIE
-from .mdr import MDRIE
-from .medaltv import MedalTVIE
-from .mediaite import MediaiteIE
-from .mediaklikk import MediaKlikkIE
-from .mediaset import (
- MediasetIE,
- MediasetShowIE,
-)
-from .mediasite import (
- MediasiteIE,
- MediasiteCatalogIE,
- MediasiteNamedCatalogIE,
-)
-from .medici import MediciIE
-from .megaphone import MegaphoneIE
-from .meipai import MeipaiIE
-from .melonvod import MelonVODIE
-from .meta import METAIE
-from .metacafe import MetacafeIE
-from .metacritic import MetacriticIE
-from .mgoon import MgoonIE
-from .mgtv import MGTVIE
-from .miaopai import MiaoPaiIE
-from .microsoftstream import MicrosoftStreamIE
-from .microsoftvirtualacademy import (
- MicrosoftVirtualAcademyIE,
- MicrosoftVirtualAcademyCourseIE,
-)
-from .mildom import (
- MildomIE,
- MildomVodIE,
- MildomClipIE,
- MildomUserVodIE,
-)
-from .minds import (
- MindsIE,
- MindsChannelIE,
- MindsGroupIE,
-)
-from .ministrygrid import MinistryGridIE
-from .minoto import MinotoIE
-from .miomio import MioMioIE
-from .mirrativ import (
- MirrativIE,
- MirrativUserIE,
-)
-from .mit import TechTVMITIE, OCWMITIE
-from .mitele import MiTeleIE
-from .mixch import (
- MixchIE,
- MixchArchiveIE,
-)
-from .mixcloud import (
- MixcloudIE,
- MixcloudUserIE,
- MixcloudPlaylistIE,
-)
-from .mlb import (
- MLBIE,
- MLBVideoIE,
-)
-from .mlssoccer import MLSSoccerIE
-from .mnet import MnetIE
-from .moevideo import MoeVideoIE
-from .mofosex import (
- MofosexIE,
- MofosexEmbedIE,
-)
-from .mojvideo import MojvideoIE
-from .morningstar import MorningstarIE
-from .motherless import (
- MotherlessIE,
- MotherlessGroupIE
-)
-from .motorsport import MotorsportIE
-from .movieclips import MovieClipsIE
-from .moviepilot import MoviepilotIE
-from .moviezine import MoviezineIE
-from .movingimage import MovingImageIE
-from .msn import MSNIE
-from .mtv import (
- MTVIE,
- MTVVideoIE,
- MTVServicesEmbeddedIE,
- MTVDEIE,
- MTVJapanIE,
- MTVItaliaIE,
- MTVItaliaProgrammaIE,
-)
-from .muenchentv import MuenchenTVIE
-from .murrtube import MurrtubeIE, MurrtubeUserIE
-from .musescore import MuseScoreIE
-from .musicdex import (
- MusicdexSongIE,
- MusicdexAlbumIE,
- MusicdexArtistIE,
- MusicdexPlaylistIE,
-)
-from .mwave import MwaveIE, MwaveMeetGreetIE
-from .mxplayer import (
- MxplayerIE,
- MxplayerShowIE,
-)
-from .mychannels import MyChannelsIE
-from .myspace import MySpaceIE, MySpaceAlbumIE
-from .myspass import MySpassIE
-from .myvi import (
- MyviIE,
- MyviEmbedIE,
-)
-from .myvideoge import MyVideoGeIE
-from .myvidster import MyVidsterIE
-from .n1 import (
- N1InfoAssetIE,
- N1InfoIIE,
-)
-from .nate import (
- NateIE,
- NateProgramIE,
-)
-from .nationalgeographic import (
- NationalGeographicVideoIE,
- NationalGeographicTVIE,
-)
-from .naver import (
- NaverIE,
- NaverLiveIE,
-)
-from .nba import (
- NBAWatchEmbedIE,
- NBAWatchIE,
- NBAWatchCollectionIE,
- NBAEmbedIE,
- NBAIE,
- NBAChannelIE,
-)
-from .nbc import (
- NBCIE,
- NBCNewsIE,
- NBCOlympicsIE,
- NBCOlympicsStreamIE,
- NBCSportsIE,
- NBCSportsStreamIE,
- NBCSportsVPlayerIE,
-)
-from .ndr import (
- NDRIE,
- NJoyIE,
- NDREmbedBaseIE,
- NDREmbedIE,
- NJoyEmbedIE,
-)
-from .ndtv import NDTVIE
-from .nebula import (
- NebulaIE,
- NebulaSubscriptionsIE,
- NebulaChannelIE,
-)
-from .nerdcubed import NerdCubedFeedIE
-from .netzkino import NetzkinoIE
-from .neteasemusic import (
- NetEaseMusicIE,
- NetEaseMusicAlbumIE,
- NetEaseMusicSingerIE,
- NetEaseMusicListIE,
- NetEaseMusicMvIE,
- NetEaseMusicProgramIE,
- NetEaseMusicDjRadioIE,
-)
-from .newgrounds import (
- NewgroundsIE,
- NewgroundsPlaylistIE,
- NewgroundsUserIE,
-)
-from .newstube import NewstubeIE
-from .newsy import NewsyIE
-from .nextmedia import (
- NextMediaIE,
- NextMediaActionNewsIE,
- AppleDailyIE,
- NextTVIE,
-)
-from .nexx import (
- NexxIE,
- NexxEmbedIE,
-)
-from .nfb import NFBIE
-from .nfhsnetwork import NFHSNetworkIE
-from .nfl import (
- NFLIE,
- NFLArticleIE,
-)
-from .nhk import (
- NhkVodIE,
- NhkVodProgramIE,
- NhkForSchoolBangumiIE,
- NhkForSchoolSubjectIE,
- NhkForSchoolProgramListIE,
-)
-from .nhl import NHLIE
-from .nick import (
- NickIE,
- NickBrIE,
- NickDeIE,
- NickNightIE,
- NickRuIE,
-)
-from .niconico import (
- NiconicoIE,
- NiconicoPlaylistIE,
- NiconicoUserIE,
- NiconicoSeriesIE,
- NiconicoHistoryIE,
- NicovideoSearchDateIE,
- NicovideoSearchIE,
- NicovideoSearchURLIE,
- NicovideoTagURLIE,
-)
-from .ninecninemedia import (
- NineCNineMediaIE,
- CPTwentyFourIE,
-)
-from .ninegag import NineGagIE
-from .ninenow import NineNowIE
-from .nintendo import NintendoIE
-from .nitter import NitterIE
-from .njpwworld import NJPWWorldIE
-from .nobelprize import NobelPrizeIE
-from .nonktube import NonkTubeIE
-from .noodlemagazine import NoodleMagazineIE
-from .noovo import NoovoIE
-from .normalboots import NormalbootsIE
-from .nosvideo import NosVideoIE
-from .nova import (
- NovaEmbedIE,
- NovaIE,
-)
-from .novaplay import NovaPlayIE
-from .nowness import (
- NownessIE,
- NownessPlaylistIE,
- NownessSeriesIE,
-)
-from .noz import NozIE
-from .npo import (
- AndereTijdenIE,
- NPOIE,
- NPOLiveIE,
- NPORadioIE,
- NPORadioFragmentIE,
- SchoolTVIE,
- HetKlokhuisIE,
- VPROIE,
- WNLIE,
-)
-from .npr import NprIE
-from .nrk import (
- NRKIE,
- NRKPlaylistIE,
- NRKSkoleIE,
- NRKTVIE,
- NRKTVDirekteIE,
- NRKRadioPodkastIE,
- NRKTVEpisodeIE,
- NRKTVEpisodesIE,
- NRKTVSeasonIE,
- NRKTVSeriesIE,
-)
-from .nrl import NRLTVIE
-from .ntvcojp import NTVCoJpCUIE
-from .ntvde import NTVDeIE
-from .ntvru import NTVRuIE
-from .nytimes import (
- NYTimesIE,
- NYTimesArticleIE,
- NYTimesCookingIE,
-)
-from .nuvid import NuvidIE
-from .nzherald import NZHeraldIE
-from .nzz import NZZIE
-from .odatv import OdaTVIE
-from .odnoklassniki import OdnoklassnikiIE
-from .oktoberfesttv import OktoberfestTVIE
-from .olympics import OlympicsReplayIE
-from .on24 import On24IE
-from .ondemandkorea import OnDemandKoreaIE
-from .onefootball import OneFootballIE
-from .onet import (
- OnetIE,
- OnetChannelIE,
- OnetMVPIE,
- OnetPlIE,
-)
-from .onionstudios import OnionStudiosIE
-from .ooyala import (
- OoyalaIE,
- OoyalaExternalIE,
-)
-from .opencast import (
- OpencastIE,
- OpencastPlaylistIE,
-)
-from .openrec import (
- OpenRecIE,
- OpenRecCaptureIE,
- OpenRecMovieIE,
-)
-from .ora import OraTVIE
-from .orf import (
- ORFTVthekIE,
- ORFFM4IE,
- ORFFM4StoryIE,
- ORFOE1IE,
- ORFOE3IE,
- ORFNOEIE,
- ORFWIEIE,
- ORFBGLIE,
- ORFOOEIE,
- ORFSTMIE,
- ORFKTNIE,
- ORFSBGIE,
- ORFTIRIE,
- ORFVBGIE,
- ORFIPTVIE,
-)
-from .outsidetv import OutsideTVIE
-from .packtpub import (
- PacktPubIE,
- PacktPubCourseIE,
-)
-from .palcomp3 import (
- PalcoMP3IE,
- PalcoMP3ArtistIE,
- PalcoMP3VideoIE,
-)
-from .pandoratv import PandoraTVIE
-from .panopto import (
- PanoptoIE,
- PanoptoListIE,
- PanoptoPlaylistIE
-)
-from .paramountplus import (
- ParamountPlusIE,
- ParamountPlusSeriesIE,
-)
-from .parliamentliveuk import ParliamentLiveUKIE
-from .parlview import ParlviewIE
-from .patreon import (
- PatreonIE,
- PatreonUserIE
-)
-from .pbs import PBSIE
-from .pearvideo import PearVideoIE
-from .peekvids import PeekVidsIE, PlayVidsIE
-from .peertube import (
- PeerTubeIE,
- PeerTubePlaylistIE,
-)
-from .peertv import PeerTVIE
-from .peloton import (
- PelotonIE,
- PelotonLiveIE
-)
-from .people import PeopleIE
-from .performgroup import PerformGroupIE
-from .periscope import (
- PeriscopeIE,
- PeriscopeUserIE,
-)
-from .philharmoniedeparis import PhilharmonieDeParisIE
-from .phoenix import PhoenixIE
-from .photobucket import PhotobucketIE
-from .piapro import PiaproIE
-from .picarto import (
- PicartoIE,
- PicartoVodIE,
-)
-from .piksel import PikselIE
-from .pinkbike import PinkbikeIE
-from .pinterest import (
- PinterestIE,
- PinterestCollectionIE,
-)
-from .pixivsketch import (
- PixivSketchIE,
- PixivSketchUserIE,
-)
-from .pladform import PladformIE
-from .planetmarathi import PlanetMarathiIE
-from .platzi import (
- PlatziIE,
- PlatziCourseIE,
-)
-from .playfm import PlayFMIE
-from .playplustv import PlayPlusTVIE
-from .plays import PlaysTVIE
-from .playstuff import PlayStuffIE
-from .playtvak import PlaytvakIE
-from .playvid import PlayvidIE
-from .playwire import PlaywireIE
-from .plutotv import PlutoTVIE
-from .pluralsight import (
- PluralsightIE,
- PluralsightCourseIE,
-)
-from .podchaser import PodchaserIE
-from .podomatic import PodomaticIE
-from .pokemon import (
- PokemonIE,
- PokemonWatchIE,
- PokemonSoundLibraryIE,
-)
-from .pokergo import (
- PokerGoIE,
- PokerGoCollectionIE,
-)
-from .polsatgo import PolsatGoIE
-from .polskieradio import (
- PolskieRadioIE,
- PolskieRadioCategoryIE,
- PolskieRadioPlayerIE,
- PolskieRadioPodcastIE,
- PolskieRadioPodcastListIE,
- PolskieRadioRadioKierowcowIE,
-)
-from .popcorntimes import PopcorntimesIE
-from .popcorntv import PopcornTVIE
-from .porn91 import Porn91IE
-from .porncom import PornComIE
-from .pornflip import PornFlipIE
-from .pornhd import PornHdIE
-from .pornhub import (
- PornHubIE,
- PornHubUserIE,
- PornHubPlaylistIE,
- PornHubPagedVideoListIE,
- PornHubUserVideosUploadIE,
-)
-from .pornotube import PornotubeIE
-from .pornovoisines import PornoVoisinesIE
-from .pornoxo import PornoXOIE
-from .pornez import PornezIE
-from .puhutv import (
- PuhuTVIE,
- PuhuTVSerieIE,
-)
-from .presstv import PressTVIE
-from .projectveritas import ProjectVeritasIE
-from .prosiebensat1 import ProSiebenSat1IE
-from .prx import (
- PRXStoryIE,
- PRXSeriesIE,
- PRXAccountIE,
- PRXStoriesSearchIE,
- PRXSeriesSearchIE
-)
-from .puls4 import Puls4IE
-from .pyvideo import PyvideoIE
-from .qqmusic import (
- QQMusicIE,
- QQMusicSingerIE,
- QQMusicAlbumIE,
- QQMusicToplistIE,
- QQMusicPlaylistIE,
-)
-from .r7 import (
- R7IE,
- R7ArticleIE,
-)
-from .radiko import RadikoIE, RadikoRadioIE
-from .radiocanada import (
- RadioCanadaIE,
- RadioCanadaAudioVideoIE,
-)
-from .radiode import RadioDeIE
-from .radiojavan import RadioJavanIE
-from .radiobremen import RadioBremenIE
-from .radiofrance import RadioFranceIE
-from .radiozet import RadioZetPodcastIE
-from .radiokapital import (
- RadioKapitalIE,
- RadioKapitalShowIE,
-)
-from .radlive import (
- RadLiveIE,
- RadLiveChannelIE,
- RadLiveSeasonIE,
-)
-from .rai import (
- RaiPlayIE,
- RaiPlayLiveIE,
- RaiPlayPlaylistIE,
- RaiPlaySoundIE,
- RaiPlaySoundLiveIE,
- RaiPlaySoundPlaylistIE,
- RaiIE,
-)
-from .raywenderlich import (
- RayWenderlichIE,
- RayWenderlichCourseIE,
-)
-from .rbmaradio import RBMARadioIE
-from .rcs import (
- RCSIE,
- RCSEmbedsIE,
- RCSVariousIE,
-)
-from .rcti import (
- RCTIPlusIE,
- RCTIPlusSeriesIE,
- RCTIPlusTVIE,
-)
-from .rds import RDSIE
-from .redbulltv import (
- RedBullTVIE,
- RedBullEmbedIE,
- RedBullTVRrnContentIE,
- RedBullIE,
-)
-from .reddit import RedditIE
-from .redgifs import (
- RedGifsIE,
- RedGifsSearchIE,
- RedGifsUserIE,
-)
-from .redtube import RedTubeIE
-from .regiotv import RegioTVIE
-from .rentv import (
- RENTVIE,
- RENTVArticleIE,
-)
-from .restudy import RestudyIE
-from .reuters import ReutersIE
-from .reverbnation import ReverbNationIE
-from .rice import RICEIE
-from .rmcdecouverte import RMCDecouverteIE
-from .rockstargames import RockstarGamesIE
-from .rokfin import (
- RokfinIE,
- RokfinStackIE,
- RokfinChannelIE,
- RokfinSearchIE,
-)
-from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE
-from .rottentomatoes import RottenTomatoesIE
-from .rozhlas import RozhlasIE
-from .rtbf import RTBFIE
-from .rte import RteIE, RteRadioIE
-from .rtlnl import RtlNlIE
-from .rtl2 import (
- RTL2IE,
- RTL2YouIE,
- RTL2YouSeriesIE,
-)
-from .rtnews import (
- RTNewsIE,
- RTDocumentryIE,
- RTDocumentryPlaylistIE,
- RuptlyIE,
-)
-from .rtp import RTPIE
-from .rtrfm import RTRFMIE
-from .rts import RTSIE
-from .rtve import (
- RTVEALaCartaIE,
- RTVEAudioIE,
- RTVELiveIE,
- RTVEInfantilIE,
- RTVETelevisionIE,
-)
-from .rtvnh import RTVNHIE
-from .rtvs import RTVSIE
-from .ruhd import RUHDIE
-from .rule34video import Rule34VideoIE
-from .rumble import (
- RumbleEmbedIE,
- RumbleChannelIE,
-)
-from .rutube import (
- RutubeIE,
- RutubeChannelIE,
- RutubeEmbedIE,
- RutubeMovieIE,
- RutubePersonIE,
- RutubePlaylistIE,
- RutubeTagsIE,
-)
-from .glomex import (
- GlomexIE,
- GlomexEmbedIE,
-)
-from .megatvcom import (
- MegaTVComIE,
- MegaTVComEmbedIE,
-)
-from .ant1newsgr import (
- Ant1NewsGrWatchIE,
- Ant1NewsGrArticleIE,
- Ant1NewsGrEmbedIE,
-)
-from .rutv import RUTVIE
-from .ruutu import RuutuIE
-from .ruv import (
- RuvIE,
- RuvSpilaIE
-)
-from .safari import (
- SafariIE,
- SafariApiIE,
- SafariCourseIE,
-)
-from .saitosan import SaitosanIE
-from .samplefocus import SampleFocusIE
-from .sapo import SapoIE
-from .savefrom import SaveFromIE
-from .sbs import SBSIE
-from .screencast import ScreencastIE
-from .screencastomatic import ScreencastOMaticIE
-from .scrippsnetworks import (
- ScrippsNetworksWatchIE,
- ScrippsNetworksIE,
-)
-from .scte import (
- SCTEIE,
- SCTECourseIE,
-)
-from .seeker import SeekerIE
-from .senategov import SenateISVPIE, SenateGovIE
-from .sendtonews import SendtoNewsIE
-from .servus import ServusIE
-from .sevenplus import SevenPlusIE
-from .sexu import SexuIE
-from .seznamzpravy import (
- SeznamZpravyIE,
- SeznamZpravyArticleIE,
-)
-from .shahid import (
- ShahidIE,
- ShahidShowIE,
-)
-from .shared import (
- SharedIE,
- VivoIE,
-)
-from .shemaroome import ShemarooMeIE
-from .showroomlive import ShowRoomLiveIE
-from .simplecast import (
- SimplecastIE,
- SimplecastEpisodeIE,
- SimplecastPodcastIE,
-)
-from .sina import SinaIE
-from .sixplay import SixPlayIE
-from .skeb import SkebIE
-from .skyit import (
- SkyItPlayerIE,
- SkyItVideoIE,
- SkyItVideoLiveIE,
- SkyItIE,
- SkyItAcademyIE,
- SkyItArteIE,
- CieloTVItIE,
- TV8ItIE,
-)
-from .skylinewebcams import SkylineWebcamsIE
-from .skynewsarabia import (
- SkyNewsArabiaIE,
- SkyNewsArabiaArticleIE,
-)
-from .skynewsau import SkyNewsAUIE
-from .sky import (
- SkyNewsIE,
- SkyNewsStoryIE,
- SkySportsIE,
- SkySportsNewsIE,
-)
-from .slideshare import SlideshareIE
-from .slideslive import SlidesLiveIE
-from .slutload import SlutloadIE
-from .snotr import SnotrIE
-from .sohu import SohuIE
-from .sonyliv import (
- SonyLIVIE,
- SonyLIVSeriesIE,
-)
-from .soundcloud import (
- SoundcloudEmbedIE,
- SoundcloudIE,
- SoundcloudSetIE,
- SoundcloudRelatedIE,
- SoundcloudUserIE,
- SoundcloudTrackStationIE,
- SoundcloudPlaylistIE,
- SoundcloudSearchIE,
-)
-from .soundgasm import (
- SoundgasmIE,
- SoundgasmProfileIE
-)
-from .southpark import (
- SouthParkIE,
- SouthParkDeIE,
- SouthParkDkIE,
- SouthParkEsIE,
- SouthParkNlIE
-)
-from .sovietscloset import (
- SovietsClosetIE,
- SovietsClosetPlaylistIE
-)
-from .spankbang import (
- SpankBangIE,
- SpankBangPlaylistIE,
-)
-from .spankwire import SpankwireIE
-from .spiegel import SpiegelIE
-from .spike import (
- BellatorIE,
- ParamountNetworkIE,
-)
-from .stitcher import (
- StitcherIE,
- StitcherShowIE,
-)
-from .sport5 import Sport5IE
-from .sportbox import SportBoxIE
-from .sportdeutschland import SportDeutschlandIE
-from .spotify import (
- SpotifyIE,
- SpotifyShowIE,
-)
-from .spreaker import (
- SpreakerIE,
- SpreakerPageIE,
- SpreakerShowIE,
- SpreakerShowPageIE,
-)
-from .springboardplatform import SpringboardPlatformIE
-from .sprout import SproutIE
-from .srgssr import (
- SRGSSRIE,
- SRGSSRPlayIE,
-)
-from .srmediathek import SRMediathekIE
-from .stanfordoc import StanfordOpenClassroomIE
-from .startv import StarTVIE
-from .steam import SteamIE
-from .storyfire import (
- StoryFireIE,
- StoryFireUserIE,
- StoryFireSeriesIE,
-)
-from .streamable import StreamableIE
-from .streamanity import StreamanityIE
-from .streamcloud import StreamcloudIE
-from .streamcz import StreamCZIE
-from .streamff import StreamFFIE
-from .streetvoice import StreetVoiceIE
-from .stretchinternet import StretchInternetIE
-from .stripchat import StripchatIE
-from .stv import STVPlayerIE
-from .sunporno import SunPornoIE
-from .sverigesradio import (
- SverigesRadioEpisodeIE,
- SverigesRadioPublicationIE,
-)
-from .svt import (
- SVTIE,
- SVTPageIE,
- SVTPlayIE,
- SVTSeriesIE,
-)
-from .swrmediathek import SWRMediathekIE
-from .syfy import SyfyIE
-from .sztvhu import SztvHuIE
-from .tagesschau import TagesschauIE
-from .tass import TassIE
-from .tbs import TBSIE
-from .tdslifeway import TDSLifewayIE
-from .teachable import (
- TeachableIE,
- TeachableCourseIE,
-)
-from .teachertube import (
- TeacherTubeIE,
- TeacherTubeUserIE,
-)
-from .teachingchannel import TeachingChannelIE
-from .teamcoco import TeamcocoIE
-from .teamtreehouse import TeamTreeHouseIE
-from .techtalks import TechTalksIE
-from .ted import (
- TedEmbedIE,
- TedPlaylistIE,
- TedSeriesIE,
- TedTalkIE,
-)
-from .tele5 import Tele5IE
-from .tele13 import Tele13IE
-from .telebruxelles import TeleBruxellesIE
-from .telecinco import TelecincoIE
-from .telegraaf import TelegraafIE
-from .telegram import TelegramEmbedIE
-from .telemb import TeleMBIE
-from .telemundo import TelemundoIE
-from .telequebec import (
- TeleQuebecIE,
- TeleQuebecSquatIE,
- TeleQuebecEmissionIE,
- TeleQuebecLiveIE,
- TeleQuebecVideoIE,
-)
-from .teletask import TeleTaskIE
-from .telewebion import TelewebionIE
-from .tennistv import TennisTVIE
-from .tenplay import TenPlayIE
-from .testurl import TestURLIE
-from .tf1 import TF1IE
-from .tfo import TFOIE
-from .theintercept import TheInterceptIE
-from .theplatform import (
- ThePlatformIE,
- ThePlatformFeedIE,
-)
-from .thestar import TheStarIE
-from .thesun import TheSunIE
-from .theta import (
- ThetaVideoIE,
- ThetaStreamIE,
-)
-from .theweatherchannel import TheWeatherChannelIE
-from .thisamericanlife import ThisAmericanLifeIE
-from .thisav import ThisAVIE
-from .thisoldhouse import ThisOldHouseIE
-from .threespeak import (
- ThreeSpeakIE,
- ThreeSpeakUserIE,
-)
-from .threeqsdn import ThreeQSDNIE
-from .tiktok import (
- TikTokIE,
- TikTokUserIE,
- TikTokSoundIE,
- TikTokEffectIE,
- TikTokTagIE,
- TikTokVMIE,
- DouyinIE,
-)
-from .tinypic import TinyPicIE
-from .tmz import TMZIE
-from .tnaflix import (
- TNAFlixNetworkEmbedIE,
- TNAFlixIE,
- EMPFlixIE,
- MovieFapIE,
-)
-from .toggle import (
- ToggleIE,
- MeWatchIE,
-)
-from .toggo import (
- ToggoIE,
-)
-from .tokentube import (
- TokentubeIE,
- TokentubeChannelIE
-)
-from .tonline import TOnlineIE
-from .toongoggles import ToonGogglesIE
-from .toutv import TouTvIE
-from .toypics import ToypicsUserIE, ToypicsIE
-from .traileraddict import TrailerAddictIE
-from .trilulilu import TriluliluIE
-from .trovo import (
- TrovoIE,
- TrovoVodIE,
- TrovoChannelVodIE,
- TrovoChannelClipIE,
-)
-from .trueid import TrueIDIE
-from .trunews import TruNewsIE
-from .trutv import TruTVIE
-from .tube8 import Tube8IE
-from .tubitv import (
- TubiTvIE,
- TubiTvShowIE,
-)
-from .tumblr import TumblrIE
-from .tunein import (
- TuneInClipIE,
- TuneInStationIE,
- TuneInProgramIE,
- TuneInTopicIE,
- TuneInShortenerIE,
-)
-from .tunepk import TunePkIE
-from .turbo import TurboIE
-from .tv2 import (
- TV2IE,
- TV2ArticleIE,
- KatsomoIE,
- MTVUutisetArticleIE,
-)
-from .tv2dk import (
- TV2DKIE,
- TV2DKBornholmPlayIE,
-)
-from .tv2hu import (
- TV2HuIE,
- TV2HuSeriesIE,
-)
-from .tv4 import TV4IE
-from .tv5mondeplus import TV5MondePlusIE
-from .tv5unis import (
- TV5UnisVideoIE,
- TV5UnisIE,
-)
-from .tva import (
- TVAIE,
- QubIE,
-)
-from .tvanouvelles import (
- TVANouvellesIE,
- TVANouvellesArticleIE,
-)
-from .tvc import (
- TVCIE,
- TVCArticleIE,
-)
-from .tver import TVerIE
-from .tvigle import TvigleIE
-from .tvland import TVLandIE
-from .tvn24 import TVN24IE
-from .tvnet import TVNetIE
-from .tvnoe import TVNoeIE
-from .tvnow import (
- TVNowIE,
- TVNowFilmIE,
- TVNowNewIE,
- TVNowSeasonIE,
- TVNowAnnualIE,
- TVNowShowIE,
-)
-from .tvopengr import (
- TVOpenGrWatchIE,
- TVOpenGrEmbedIE,
-)
-from .tvp import (
- TVPEmbedIE,
- TVPIE,
- TVPStreamIE,
- TVPWebsiteIE,
-)
-from .tvplay import (
- TVPlayIE,
- ViafreeIE,
- TVPlayHomeIE,
-)
-from .tvplayer import TVPlayerIE
-from .tweakers import TweakersIE
-from .twentyfourvideo import TwentyFourVideoIE
-from .twentymin import TwentyMinutenIE
-from .twentythreevideo import TwentyThreeVideoIE
-from .twitcasting import (
- TwitCastingIE,
- TwitCastingLiveIE,
- TwitCastingUserIE,
-)
-from .twitch import (
- TwitchVodIE,
- TwitchCollectionIE,
- TwitchVideosIE,
- TwitchVideosClipsIE,
- TwitchVideosCollectionsIE,
- TwitchStreamIE,
- TwitchClipsIE,
-)
-from .twitter import (
- TwitterCardIE,
- TwitterIE,
- TwitterAmplifyIE,
- TwitterBroadcastIE,
- TwitterShortenerIE,
-)
-from .udemy import (
- UdemyIE,
- UdemyCourseIE
-)
-from .udn import UDNEmbedIE
-from .ufctv import (
- UFCTVIE,
- UFCArabiaIE,
-)
-from .ukcolumn import UkColumnIE
-from .uktvplay import UKTVPlayIE
-from .digiteka import DigitekaIE
-from .dlive import (
- DLiveVODIE,
- DLiveStreamIE,
-)
-from .drooble import DroobleIE
-from .umg import UMGDeIE
-from .unistra import UnistraIE
-from .unity import UnityIE
-from .uol import UOLIE
-from .uplynk import (
- UplynkIE,
- UplynkPreplayIE,
-)
-from .urort import UrortIE
-from .urplay import URPlayIE
-from .usanetwork import USANetworkIE
-from .usatoday import USATodayIE
-from .ustream import UstreamIE, UstreamChannelIE
-from .ustudio import (
- UstudioIE,
- UstudioEmbedIE,
-)
-from .utreon import UtreonIE
-from .varzesh3 import Varzesh3IE
-from .vbox7 import Vbox7IE
-from .veehd import VeeHDIE
-from .veo import VeoIE
-from .veoh import VeohIE
-from .vesti import VestiIE
-from .vevo import (
- VevoIE,
- VevoPlaylistIE,
-)
-from .vgtv import (
- BTArticleIE,
- BTVestlendingenIE,
- VGTVIE,
-)
-from .vh1 import VH1IE
-from .vice import (
- ViceIE,
- ViceArticleIE,
- ViceShowIE,
-)
-from .vidbit import VidbitIE
-from .viddler import ViddlerIE
-from .videa import VideaIE
-from .videocampus_sachsen import VideocampusSachsenIE
-from .videodetective import VideoDetectiveIE
-from .videofyme import VideofyMeIE
-from .videomore import (
- VideomoreIE,
- VideomoreVideoIE,
- VideomoreSeasonIE,
-)
-from .videopress import VideoPressIE
-from .vidio import (
- VidioIE,
- VidioPremierIE,
- VidioLiveIE
-)
-from .vidlii import VidLiiIE
-from .vier import VierIE, VierVideosIE
-from .viewlift import (
- ViewLiftIE,
- ViewLiftEmbedIE,
-)
-from .viidea import ViideaIE
-from .vimeo import (
- VimeoIE,
- VimeoAlbumIE,
- VimeoChannelIE,
- VimeoGroupsIE,
- VimeoLikesIE,
- VimeoOndemandIE,
- VimeoReviewIE,
- VimeoUserIE,
- VimeoWatchLaterIE,
- VHXEmbedIE,
-)
-from .vimm import (
- VimmIE,
- VimmRecordingIE,
-)
-from .vimple import VimpleIE
-from .vine import (
- VineIE,
- VineUserIE,
-)
-from .viki import (
- VikiIE,
- VikiChannelIE,
-)
-from .viqeo import ViqeoIE
-from .viu import (
- ViuIE,
- ViuPlaylistIE,
- ViuOTTIE,
-)
-from .vk import (
- VKIE,
- VKUserVideosIE,
- VKWallPostIE,
-)
-from .vlive import (
- VLiveIE,
- VLivePostIE,
- VLiveChannelIE,
-)
-from .vodlocker import VodlockerIE
-from .vodpl import VODPlIE
-from .vodplatform import VODPlatformIE
-from .voicerepublic import VoiceRepublicIE
-from .voicy import (
- VoicyIE,
- VoicyChannelIE,
-)
-from .voot import (
- VootIE,
- VootSeriesIE,
-)
-from .voxmedia import (
- VoxMediaVolumeIE,
- VoxMediaIE,
-)
-from .vrt import VRTIE
-from .vrak import VrakIE
-from .vrv import (
- VRVIE,
- VRVSeriesIE,
-)
-from .vshare import VShareIE
-from .vtm import VTMIE
-from .medialaan import MedialaanIE
-from .vuclip import VuClipIE
-from .vupload import VuploadIE
-from .vvvvid import (
- VVVVIDIE,
- VVVVIDShowIE,
-)
-from .vyborymos import VyboryMosIE
-from .vzaar import VzaarIE
-from .wakanim import WakanimIE
-from .walla import WallaIE
-from .washingtonpost import (
- WashingtonPostIE,
- WashingtonPostArticleIE,
-)
-from .wasdtv import (
- WASDTVStreamIE,
- WASDTVRecordIE,
- WASDTVClipIE,
-)
-from .wat import WatIE
-from .watchbox import WatchBoxIE
-from .watchindianporn import WatchIndianPornIE
-from .wdr import (
- WDRIE,
- WDRPageIE,
- WDRElefantIE,
- WDRMobileIE,
-)
-from .webcaster import (
- WebcasterIE,
- WebcasterFeedIE,
-)
-from .webofstories import (
- WebOfStoriesIE,
- WebOfStoriesPlaylistIE,
-)
-from .weibo import (
- WeiboIE,
- WeiboMobileIE
-)
-from .weiqitv import WeiqiTVIE
-from .willow import WillowIE
-from .wimtv import WimTVIE
-from .whowatch import WhoWatchIE
-from .wistia import (
- WistiaIE,
- WistiaPlaylistIE,
-)
-from .worldstarhiphop import WorldStarHipHopIE
-from .wppilot import (
- WPPilotIE,
- WPPilotChannelsIE,
-)
-from .wsj import (
- WSJIE,
- WSJArticleIE,
-)
-from .wwe import WWEIE
-from .xbef import XBefIE
-from .xboxclips import XboxClipsIE
-from .xfileshare import XFileShareIE
-from .xhamster import (
- XHamsterIE,
- XHamsterEmbedIE,
- XHamsterUserIE,
-)
-from .xiami import (
- XiamiSongIE,
- XiamiAlbumIE,
- XiamiArtistIE,
- XiamiCollectionIE
-)
-from .ximalaya import (
- XimalayaIE,
- XimalayaAlbumIE
-)
-from .xinpianchang import XinpianchangIE
-from .xminus import XMinusIE
-from .xnxx import XNXXIE
-from .xstream import XstreamIE
-from .xtube import XTubeUserIE, XTubeIE
-from .xuite import XuiteIE
-from .xvideos import XVideosIE
-from .xxxymovies import XXXYMoviesIE
-from .yahoo import (
- YahooIE,
- YahooSearchIE,
- YahooGyaOPlayerIE,
- YahooGyaOIE,
- YahooJapanNewsIE,
-)
-from .yandexdisk import YandexDiskIE
-from .yandexmusic import (
- YandexMusicTrackIE,
- YandexMusicAlbumIE,
- YandexMusicPlaylistIE,
- YandexMusicArtistTracksIE,
- YandexMusicArtistAlbumsIE,
-)
-from .yandexvideo import (
- YandexVideoIE,
- YandexVideoPreviewIE,
- ZenYandexIE,
- ZenYandexChannelIE,
-)
-from .yapfiles import YapFilesIE
-from .yesjapan import YesJapanIE
-from .yinyuetai import YinYueTaiIE
-from .ynet import YnetIE
-from .youjizz import YouJizzIE
-from .youku import (
- YoukuIE,
- YoukuShowIE,
-)
-from .younow import (
- YouNowLiveIE,
- YouNowChannelIE,
- YouNowMomentIE,
-)
-from .youporn import YouPornIE
-from .yourporn import YourPornIE
-from .yourupload import YourUploadIE
-from .youtube import (
- YoutubeIE,
- YoutubeClipIE,
- YoutubeFavouritesIE,
- YoutubeNotificationsIE,
- YoutubeHistoryIE,
- YoutubeTabIE,
- YoutubeLivestreamEmbedIE,
- YoutubePlaylistIE,
- YoutubeRecommendedIE,
- YoutubeSearchDateIE,
- YoutubeSearchIE,
- YoutubeSearchURLIE,
- YoutubeMusicSearchURLIE,
- YoutubeSubscriptionsIE,
- YoutubeStoriesIE,
- YoutubeTruncatedIDIE,
- YoutubeTruncatedURLIE,
- YoutubeYtBeIE,
- YoutubeYtUserIE,
- YoutubeWatchLaterIE,
-)
-from .zapiks import ZapiksIE
-from .zattoo import (
- BBVTVIE,
- EinsUndEinsTVIE,
- EWETVIE,
- GlattvisionTVIE,
- MNetTVIE,
- NetPlusIE,
- OsnatelTVIE,
- QuantumTVIE,
- SaltTVIE,
- SAKTVIE,
- VTXTVIE,
- WalyTVIE,
- ZattooIE,
- ZattooLiveIE,
- ZattooMoviesIE,
- ZattooRecordingsIE,
-)
-from .zdf import ZDFIE, ZDFChannelIE
-from .zee5 import (
- Zee5IE,
- Zee5SeriesIE,
-)
-from .zhihu import ZhihuIE
-from .zingmp3 import (
- ZingMp3IE,
- ZingMp3AlbumIE,
- ZingMp3ChartHomeIE,
- ZingMp3WeekChartIE,
- ZingMp3ChartMusicVideoIE,
- ZingMp3UserIE,
-)
-from .zoom import ZoomIE
-from .zype import ZypeIE
+_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
+_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES
diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
index de45f9298..5b34f3bff 100644
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@@ -1,18 +1,18 @@
import json
import re
+import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
compat_str,
compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
)
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
error_to_compat_str,
- ExtractorError,
float_or_none,
get_element_by_id,
get_first,
@@ -467,7 +467,7 @@ class FacebookIE(InfoExtractor):
dash_manifest = video.get('dash_manifest')
if dash_manifest:
formats.extend(self._parse_mpd_formats(
- compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
+ compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest))))
def process_formats(formats):
# Downloads with browser's User-Agent are rate limited. Working around
diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py
index 225677b00..3501c4cf6 100644
--- a/yt_dlp/extractor/fc2.py
+++ b/yt_dlp/extractor/fc2.py
@@ -1,16 +1,13 @@
import re
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
-)
+from ..compat import compat_parse_qs
from ..dependencies import websockets
from ..utils import (
ExtractorError,
WebSocketsWrapper,
js_to_json,
sanitized_Request,
- std_headers,
traverse_obj,
update_url_query,
urlencode_postdata,
@@ -81,7 +78,7 @@ class FC2IE(InfoExtractor):
webpage = None
if not url.startswith('fc2:'):
webpage = self._download_webpage(url, video_id)
- self._downloader.cookiejar.clear_session_cookies() # must clear
+ self.cookiejar.clear_session_cookies() # must clear
self._login()
title, thumbnail, description = None, None, None
@@ -207,10 +204,10 @@ class FC2LiveIE(InfoExtractor):
'Cookie': str(self._get_cookies('https://live.fc2.com/'))[12:],
'Origin': 'https://live.fc2.com',
'Accept': '*/*',
- 'User-Agent': std_headers['User-Agent'],
+ 'User-Agent': self.get_param('http_headers')['User-Agent'],
})
- self.write_debug('[debug] Sending HLS server request')
+ self.write_debug('Sending HLS server request')
while True:
recv = ws.recv()
@@ -232,13 +229,10 @@ class FC2LiveIE(InfoExtractor):
if not data or not isinstance(data, dict):
continue
if data.get('name') == '_response_' and data.get('id') == 1:
- self.write_debug('[debug] Goodbye.')
+ self.write_debug('Goodbye')
playlist_data = data
break
- elif self._downloader.params.get('verbose', False):
- if len(recv) > 100:
- recv = recv[:100] + '...'
- self.to_screen('[debug] Server said: %s' % recv)
+ self.write_debug('Server said: %s%s' % (recv[:100], '...' if len(recv) > 100 else ''))
if not playlist_data:
raise ExtractorError('Unable to fetch HLS playlist info via WebSocket')
diff --git a/yt_dlp/extractor/flickr.py b/yt_dlp/extractor/flickr.py
index 552ecd43a..9f60a6b1f 100644
--- a/yt_dlp/extractor/flickr.py
+++ b/yt_dlp/extractor/flickr.py
@@ -94,7 +94,7 @@ class FlickrIE(InfoExtractor):
owner = video_info.get('owner', {})
uploader_id = owner.get('nsid')
uploader_path = owner.get('path_alias') or uploader_id
- uploader_url = format_field(uploader_path, template='https://www.flickr.com/photos/%s/')
+ uploader_url = format_field(uploader_path, None, 'https://www.flickr.com/photos/%s/')
return {
'id': video_id,
diff --git a/yt_dlp/extractor/fourzerostudio.py b/yt_dlp/extractor/fourzerostudio.py
new file mode 100644
index 000000000..e1804e39e
--- /dev/null
+++ b/yt_dlp/extractor/fourzerostudio.py
@@ -0,0 +1,107 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj, unified_timestamp
+
+
+class FourZeroStudioArchiveIE(InfoExtractor):
+ _VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/broadcasts/(?P<id>[^/]+)/archive'
+ IE_NAME = '0000studio:archive'
+ _TESTS = [{
+ 'url': 'https://0000.studio/mumeijiten/broadcasts/1290f433-fce0-4909-a24a-5f7df09665dc/archive',
+ 'info_dict': {
+ 'id': '1290f433-fce0-4909-a24a-5f7df09665dc',
+ 'title': 'noteで『canape』様へのファンレターを執筆します。(数秘術その2)',
+ 'timestamp': 1653802534,
+ 'release_timestamp': 1653796604,
+ 'thumbnails': 'count:1',
+ 'comments': 'count:7',
+ 'uploader': '『中崎雄心』の執務室。',
+ 'uploader_id': 'mumeijiten',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
+ webpage = self._download_webpage(url, video_id)
+ nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None)
+
+ pcb = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorBroadcast'), get_all=False)
+ uploader_internal_id = traverse_obj(nuxt_data, (
+ 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'id'), get_all=False)
+
+ formats, subs = self._extract_m3u8_formats_and_subtitles(pcb['archiveUrl'], video_id, ext='mp4')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': pcb.get('title'),
+ 'age_limit': 18 if pcb.get('isAdult') else None,
+ 'timestamp': unified_timestamp(pcb.get('finishTime')),
+ 'release_timestamp': unified_timestamp(pcb.get('createdAt')),
+ 'thumbnails': [{
+ 'url': pcb['thumbnailUrl'],
+ 'ext': 'png',
+ }] if pcb.get('thumbnailUrl') else None,
+ 'formats': formats,
+ 'subtitles': subs,
+ 'comments': [{
+ 'author': c.get('username'),
+ 'author_id': c.get('postedUserId'),
+ 'author_thumbnail': c.get('userThumbnailUrl'),
+ 'id': c.get('id'),
+ 'text': c.get('body'),
+ 'timestamp': unified_timestamp(c.get('createdAt')),
+ 'like_count': c.get('likeCount'),
+ 'is_favorited': c.get('isLikedByOwner'),
+ 'author_is_uploader': c.get('postedUserId') == uploader_internal_id,
+ } for c in traverse_obj(nuxt_data, (
+ 'ssrRefs', ..., lambda _, v: v['__typename'] == 'PublicCreatorBroadcastComment')) or []],
+ 'uploader_id': uploader_id,
+ 'uploader': traverse_obj(nuxt_data, (
+ 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False),
+ }
+
+
+class FourZeroStudioClipIE(InfoExtractor):
+ _VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/archive-clip/(?P<id>[^/]+)'
+ IE_NAME = '0000studio:clip'
+ _TESTS = [{
+ 'url': 'https://0000.studio/soeji/archive-clip/e46b0278-24cd-40a8-92e1-b8fc2b21f34f',
+ 'info_dict': {
+ 'id': 'e46b0278-24cd-40a8-92e1-b8fc2b21f34f',
+ 'title': 'わたベーさんからイラスト差し入れいただきました。ありがとうございました!',
+ 'timestamp': 1652109105,
+ 'like_count': 1,
+ 'uploader': 'ソエジマケイタ',
+ 'uploader_id': 'soeji',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
+ webpage = self._download_webpage(url, video_id)
+ nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None)
+
+ clip_info = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorArchivedClip'), get_all=False)
+
+ info = next((
+ m for m in self._parse_html5_media_entries(url, webpage, video_id)
+ if 'mp4' in traverse_obj(m, ('formats', ..., 'ext'))
+ ), None)
+ if not info:
+ self.report_warning('Failed to find a desired media element. Falling back to using NUXT data.')
+ info = {
+ 'formats': [{
+ 'ext': 'mp4',
+ 'url': url,
+ } for url in clip_info.get('mediaFiles') or [] if url],
+ }
+ return {
+ **info,
+ 'id': video_id,
+ 'title': clip_info.get('clipComment'),
+ 'timestamp': unified_timestamp(clip_info.get('createdAt')),
+ 'like_count': clip_info.get('likeCount'),
+ 'uploader_id': uploader_id,
+ 'uploader': traverse_obj(nuxt_data, (
+ 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False),
+ }
diff --git a/yt_dlp/extractor/foxgay.py b/yt_dlp/extractor/foxgay.py
index 4abc2cfd0..b285464ec 100644
--- a/yt_dlp/extractor/foxgay.py
+++ b/yt_dlp/extractor/foxgay.py
@@ -31,7 +31,7 @@ class FoxgayIE(InfoExtractor):
description = get_element_by_id('inf_tit', webpage)
# The default user-agent with foxgay cookies leads to pages without videos
- self._downloader.cookiejar.clear('.foxgay.com')
+ self.cookiejar.clear('.foxgay.com')
# Find the URL for the iFrame which contains the actual video.
iframe_url = self._html_search_regex(
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1', webpage,
diff --git a/yt_dlp/extractor/foxnews.py b/yt_dlp/extractor/foxnews.py
index cee4d6b49..e8513f2c2 100644
--- a/yt_dlp/extractor/foxnews.py
+++ b/yt_dlp/extractor/foxnews.py
@@ -59,10 +59,13 @@ class FoxNewsIE(AMPIE):
@staticmethod
def _extract_urls(webpage):
return [
- mobj.group('url')
+ f'https://video.foxnews.com/v/video-embed.html?video_id={mobj.group("video_id")}'
for mobj in re.finditer(
- r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1',
- webpage)]
+ r'''(?x)
+ <(?:script|(?:amp-)?iframe)[^>]+\bsrc=["\']
+ (?:https?:)?//video\.foxnews\.com/v/(?:video-embed\.html|embed\.js)\?
+ (?:[^>"\']+&)?(?:video_)?id=(?P<video_id>\d+)
+ ''', webpage)]
def _real_extract(self, url):
host, video_id = self._match_valid_url(url).groups()
diff --git a/yt_dlp/extractor/franceculture.py b/yt_dlp/extractor/franceculture.py
deleted file mode 100644
index 6bd9912f3..000000000
--- a/yt_dlp/extractor/franceculture.py
+++ /dev/null
@@ -1,125 +0,0 @@
-import re
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- extract_attributes,
- int_or_none,
- traverse_obj,
- unified_strdate,
-)
-
-
-class FranceCultureIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TESTS = [{
- # playlist
- 'url': 'https://www.franceculture.fr/emissions/serie/hasta-dente',
- 'playlist_count': 12,
- 'info_dict': {
- 'id': 'hasta-dente',
- 'title': 'Hasta Dente',
- 'description': 'md5:57479af50648d14e9bb649e6b1f8f911',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20201024',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': '3c1c2e55-41a0-11e5-9fe0-005056a87c89',
- 'ext': 'mp3',
- 'title': 'Jeudi, vous avez dit bizarre ?',
- 'description': 'md5:47cf1e00cc21c86b0210279996a812c6',
- 'duration': 604,
- 'upload_date': '20201024',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'timestamp': 1603576680
- },
- },
- ],
- }, {
- 'url': 'https://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
- 'info_dict': {
- 'id': 'rendez-vous-au-pays-des-geeks',
- 'display_id': 'rendez-vous-au-pays-des-geeks',
- 'ext': 'mp3',
- 'title': 'Rendez-vous au pays des geeks',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20140301',
- 'vcodec': 'none',
- 'duration': 3569,
- },
- }, {
- # no thumbnail
- 'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
-
- info = {
- 'id': display_id,
- 'title': self._html_search_regex(
- r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
- webpage, 'title', default=self._og_search_title(webpage)),
- 'description': self._html_search_regex(
- r'(?s)<div[^>]+class="excerpt"[^>]*>(.*?)</div>', webpage, 'description', default=None),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'uploader': self._html_search_regex(
- r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None),
- 'upload_date': unified_strdate(self._html_search_regex(
- r'(?s)class="teaser-text-date".*?(\d{2}/\d{2}/\d{4})', webpage, 'date', default=None)),
- }
-
- playlist_data = self._search_regex(
- r'''(?sx)
- <section[^>]+data-xiti-place="[^"]*?liste_episodes[^"?]*?"[^>]*>
- (.*?)
- </section>
- ''',
- webpage, 'playlist data', fatal=False, default=None)
-
- if playlist_data:
- entries = []
- for item, item_description in re.findall(
- r'(?s)(<button[^<]*class="[^"]*replay-button[^>]*>).*?<p[^>]*class="[^"]*teaser-text-chapo[^>]*>(.*?)</p>',
- playlist_data):
-
- item_attributes = extract_attributes(item)
- entries.append({
- 'id': item_attributes.get('data-emission-uuid'),
- 'url': item_attributes.get('data-url'),
- 'title': item_attributes.get('data-diffusion-title'),
- 'duration': int_or_none(traverse_obj(item_attributes, 'data-duration-seconds', 'data-duration-seconds')),
- 'description': item_description,
- 'timestamp': int_or_none(item_attributes.get('data-start-time')),
- 'thumbnail': info['thumbnail'],
- 'uploader': info['uploader'],
- })
-
- return {
- '_type': 'playlist',
- 'entries': entries,
- **info
- }
-
- video_data = extract_attributes(self._search_regex(
- r'''(?sx)
- (?:
- </h1>|
- <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
- ).*?
- (<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
- ''',
- webpage, 'video data'))
- video_url = traverse_obj(video_data, 'data-url', 'data-asset-source')
- ext = determine_ext(video_url.lower())
-
- return {
- 'display_id': display_id,
- 'url': video_url,
- 'ext': ext,
- 'vcodec': 'none' if ext == 'mp3' else None,
- 'duration': int_or_none(video_data.get('data-duration')),
- **info
- }
diff --git a/yt_dlp/extractor/freetv.py b/yt_dlp/extractor/freetv.py
new file mode 100644
index 000000000..f38bae90b
--- /dev/null
+++ b/yt_dlp/extractor/freetv.py
@@ -0,0 +1,141 @@
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj, urlencode_postdata
+
+
+class FreeTvBaseIE(InfoExtractor):
+ def _get_api_response(self, content_id, resource_type, postdata):
+ return self._download_json(
+ 'https://www.freetv.com/wordpress/wp-admin/admin-ajax.php',
+ content_id, data=urlencode_postdata(postdata),
+ note=f'Downloading {content_id} {resource_type} JSON')['data']
+
+
+class FreeTvMoviesIE(FreeTvBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?freetv\.com/peliculas/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.freetv.com/peliculas/atrapame-si-puedes/',
+ 'md5': 'dc62d5abf0514726640077cd1591aa92',
+ 'info_dict': {
+ 'id': '428021',
+ 'title': 'Atrápame Si Puedes',
+ 'description': 'md5:ca63bc00898aeb2f64ec87c6d3a5b982',
+ 'ext': 'mp4',
+ }
+ }, {
+ 'url': 'https://www.freetv.com/peliculas/monstruoso/',
+ 'md5': '509c15c68de41cb708d1f92d071f20aa',
+ 'info_dict': {
+ 'id': '377652',
+ 'title': 'Monstruoso',
+ 'description': 'md5:333fc19ee327b457b980e54a911ea4a3',
+ 'ext': 'mp4',
+ }
+ }]
+
+ def _extract_video(self, content_id, action='olyott_video_play'):
+ api_response = self._get_api_response(content_id, 'video', {
+ 'action': action,
+ 'contentID': content_id,
+ })
+
+ video_id, video_url = api_response['displayMeta']['contentID'], api_response['displayMeta']['streamURLVideo']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(api_response, ('displayMeta', 'title')),
+ 'description': traverse_obj(api_response, ('displayMeta', 'desc')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ return self._extract_video(
+ self._search_regex((
+ r'class=["\'][^>]+postid-(?P<video_id>\d+)',
+ r'<link[^>]+freetv.com/\?p=(?P<video_id>\d+)',
+ r'<div[^>]+data-params=["\'][^>]+post_id=(?P<video_id>\d+)',
+ ), webpage, 'video id', group='video_id'))
+
+
+class FreeTvIE(FreeTvBaseIE):
+ IE_NAME = 'freetv:series'
+ _VALID_URL = r'https?://(?:www\.)?freetv\.com/series/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.freetv.com/series/el-detective-l/',
+ 'info_dict': {
+ 'id': 'el-detective-l',
+ 'title': 'El Detective L',
+ 'description': 'md5:f9f1143bc33e9856ecbfcbfb97a759be'
+ },
+ 'playlist_count': 24,
+ }, {
+ 'url': 'https://www.freetv.com/series/esmeraldas/',
+ 'info_dict': {
+ 'id': 'esmeraldas',
+ 'title': 'Esmeraldas',
+ 'description': 'md5:43d7ec45bd931d8268a4f5afaf4c77bf'
+ },
+ 'playlist_count': 62,
+ }, {
+ 'url': 'https://www.freetv.com/series/las-aventuras-de-leonardo/',
+ 'info_dict': {
+ 'id': 'las-aventuras-de-leonardo',
+ 'title': 'Las Aventuras de Leonardo',
+ 'description': 'md5:0c47130846c141120a382aca059288f6'
+ },
+ 'playlist_count': 13,
+ },
+ ]
+
+ def _extract_series_season(self, season_id, series_title):
+ episodes = self._get_api_response(season_id, 'series', {
+ 'contentID': season_id,
+ 'action': 'olyott_get_dynamic_series_content',
+ 'type': 'list',
+ 'perPage': '1000',
+ })['1']
+
+ for episode in episodes:
+ video_id = str(episode['contentID'])
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(episode['streamURL'], video_id, 'mp4')
+ self._sort_formats(formats)
+
+ yield {
+ 'id': video_id,
+ 'title': episode.get('fullTitle'),
+ 'description': episode.get('description'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': episode.get('thumbnail'),
+ 'series': series_title,
+ 'series_id': traverse_obj(episode, ('contentMeta', 'displayMeta', 'seriesID')),
+ 'season_id': traverse_obj(episode, ('contentMeta', 'displayMeta', 'seasonID')),
+ 'season_number': traverse_obj(
+ episode, ('contentMeta', 'displayMeta', 'seasonNum'), expected_type=int_or_none),
+ 'episode_number': traverse_obj(
+ episode, ('contentMeta', 'displayMeta', 'episodeNum'), expected_type=int_or_none),
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._html_search_regex(
+ r'<h1[^>]+class=["\']synopis[^>]>(?P<title>[^<]+)', webpage, 'title', group='title', fatal=False)
+ description = self._html_search_regex(
+ r'<div[^>]+class=["\']+synopis content[^>]><p>(?P<description>[^<]+)',
+ webpage, 'description', group='description', fatal=False)
+
+ return self.playlist_result(
+ itertools.chain.from_iterable(
+ self._extract_series_season(season_id, title)
+ for season_id in re.findall(r'<option[^>]+value=["\'](\d+)["\']', webpage)),
+ display_id, title, description)
diff --git a/yt_dlp/extractor/fuyintv.py b/yt_dlp/extractor/fuyintv.py
new file mode 100644
index 000000000..197901d57
--- /dev/null
+++ b/yt_dlp/extractor/fuyintv.py
@@ -0,0 +1,30 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class FuyinTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fuyin\.tv/html/(?:\d+)/(?P<id>\d+)\.html'
+ _TESTS = [{
+ 'url': 'https://www.fuyin.tv/html/2733/44129.html',
+ 'info_dict': {
+ 'id': '44129',
+ 'ext': 'mp4',
+ 'title': '第1集',
+ 'description': 'md5:21a3d238dc8d49608e1308e85044b9c3',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ json_data = self._download_json(
+ 'https://www.fuyin.tv/api/api/tv.movie/url',
+ video_id, query={'urlid': f'{video_id}'})
+ webpage = self._download_webpage(url, video_id, fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(json_data, ('data', 'title')),
+ 'url': json_data['data']['url'],
+ 'ext': 'mp4',
+ 'description': self._html_search_meta('description', webpage),
+ }
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index f594d02c2..c2f754453 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -1,5 +1,6 @@
import os
import re
+import urllib.parse
import xml.etree.ElementTree
from .ant1newsgr import Ant1NewsGrEmbedIE
@@ -69,11 +70,13 @@ from .spankwire import SpankwireIE
from .sportbox import SportBoxIE
from .spotify import SpotifyBaseIE
from .springboardplatform import SpringboardPlatformIE
+from .substack import SubstackIE
from .svt import SVTIE
from .teachable import TeachableIE
from .ted import TedEmbedIE
from .theplatform import ThePlatformIE
from .threeqsdn import ThreeQSDNIE
+from .tiktok import TikTokIE
from .tnaflix import TNAFlixNetworkEmbedIE
from .tube8 import Tube8IE
from .tunein import TuneInBaseIE
@@ -104,12 +107,7 @@ from .yapfiles import YapFilesIE
from .youporn import YouPornIE
from .youtube import YoutubeIE
from .zype import ZypeIE
-from ..compat import (
- compat_etree_fromstring,
- compat_str,
- compat_urllib_parse_unquote,
- compat_urlparse,
-)
+from ..compat import compat_etree_fromstring
from ..utils import (
KNOWN_EXTENSIONS,
ExtractorError,
@@ -129,6 +127,7 @@ from ..utils import (
sanitized_Request,
smuggle_url,
str_or_none,
+ try_call,
unescapeHTML,
unified_timestamp,
unsmuggle_url,
@@ -2526,6 +2525,118 @@ class GenericIE(InfoExtractor):
'upload_date': '20220504',
},
},
+ {
+ # Webpage contains double BOM
+ 'url': 'https://www.filmarkivet.se/movies/paris-d-moll/',
+ 'md5': 'df02cadc719dcc63d43288366f037754',
+ 'info_dict': {
+ 'id': 'paris-d-moll',
+ 'ext': 'mp4',
+ 'upload_date': '20220518',
+ 'title': 'Paris d-moll',
+ 'description': 'md5:319e37ea5542293db37e1e13072fe330',
+ 'thumbnail': 'https://www.filmarkivet.se/wp-content/uploads/parisdmoll2.jpg',
+ 'timestamp': 1652833414,
+ 'age_limit': 0,
+ }
+ },
+ {
+ 'url': 'https://www.mollymovieclub.com/p/interstellar?s=r#details',
+ 'md5': '198bde8bed23d0b23c70725c83c9b6d9',
+ 'info_dict': {
+ 'id': '53602801',
+ 'ext': 'mpga',
+ 'title': 'Interstellar',
+ 'description': 'Listen now | Episode One',
+ 'thumbnail': 'md5:c30d9c83f738e16d8551d7219d321538',
+ 'uploader': 'Molly Movie Club',
+ 'uploader_id': '839621',
+ },
+ },
+ {
+ 'url': 'https://www.blockedandreported.org/p/episode-117-lets-talk-about-depp?s=r',
+ 'md5': 'c0cc44ee7415daeed13c26e5b56d6aa0',
+ 'info_dict': {
+ 'id': '57962052',
+ 'ext': 'mpga',
+ 'title': 'md5:855b2756f0ee10f6723fa00b16266f8d',
+ 'description': 'md5:fe512a5e94136ad260c80bde00ea4eef',
+ 'thumbnail': 'md5:2218f27dfe517bb5ac16c47d0aebac59',
+ 'uploader': 'Blocked and Reported',
+ 'uploader_id': '500230',
+ },
+ },
+ {
+ 'url': 'https://www.skimag.com/video/ski-people-1980/',
+ 'info_dict': {
+ 'id': 'ski-people-1980',
+ 'title': 'Ski People (1980)',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'md5': '022a7e31c70620ebec18deeab376ee03',
+ 'info_dict': {
+ 'id': 'YTmgRiNU',
+ 'ext': 'mp4',
+ 'title': '1980 Ski People',
+ 'timestamp': 1610407738,
+ 'description': 'md5:cf9c3d101452c91e141f292b19fe4843',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/YTmgRiNU/poster.jpg?width=720',
+ 'duration': 5688.0,
+ 'upload_date': '20210111',
+ }
+ }]
+ },
+ {
+ 'note': 'Rumble embed',
+ 'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
+ 'md5': '53af34098a7f92c4e51cf0bd1c33f009',
+ 'info_dict': {
+ 'id': 'vb0ofn',
+ 'ext': 'mp4',
+ 'timestamp': 1612662578,
+ 'uploader': 'LovingMontana',
+ 'channel': 'LovingMontana',
+ 'upload_date': '20210207',
+ 'title': 'Winter-loving dog helps girls dig a snow fort ',
+ 'channel_url': 'https://rumble.com/c/c-546523',
+ 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/f/x/x/5fxxb.OvCc.1-small-Moose-The-Dog-Helps-Girls-D.jpg',
+ 'duration': 103,
+ }
+ },
+ {
+ 'note': 'Rumble JS embed',
+ 'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it',
+ 'md5': '4701209ac99095592e73dbba21889690',
+ 'info_dict': {
+ 'id': 'v15eqxl',
+ 'ext': 'mp4',
+ 'channel': 'Mr Producer Media',
+ 'duration': 92,
+ 'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh',
+ 'channel_url': 'https://rumble.com/c/RichSementa',
+ 'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.OvCc-small-911-Audio-From-The-Man-Who-.jpg',
+ 'timestamp': 1654892716,
+ 'uploader': 'Mr Producer Media',
+ 'upload_date': '20220610',
+ }
+ },
+ {
+ 'note': 'JSON LD with multiple @type',
+ 'url': 'https://www.nu.nl/280161/video/hoe-een-bladvlo-dit-verwoestende-japanse-onkruid-moet-vernietigen.html',
+ 'md5': 'c7949f34f57273013fb7ccb1156393db',
+ 'info_dict': {
+ 'id': 'ipy2AcGL',
+ 'ext': 'mp4',
+ 'description': 'md5:6a9d644bab0dc2dc06849c2505d8383d',
+ 'thumbnail': r're:https://media\.nu\.nl/m/.+\.jpg',
+ 'title': 'Hoe een bladvlo dit verwoestende Japanse onkruid moet vernietigen',
+ 'timestamp': 1586577474,
+ 'upload_date': '20200411',
+ 'age_limit': 0,
+ 'duration': 111.0,
+ }
+ },
]
def report_following_redirect(self, new_url):
@@ -2536,66 +2647,44 @@ class GenericIE(InfoExtractor):
self._downloader.write_debug(f'Identified a {name}')
def _extract_rss(self, url, video_id, doc):
- playlist_title = doc.find('./channel/title').text
- playlist_desc_el = doc.find('./channel/description')
- playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
-
NS_MAP = {
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
}
entries = []
for it in doc.findall('./channel/item'):
- next_url = None
- enclosure_nodes = it.findall('./enclosure')
- for e in enclosure_nodes:
- next_url = e.attrib.get('url')
- if next_url:
- break
-
- if not next_url:
- next_url = xpath_text(it, 'link', fatal=False)
-
+ next_url = next(
+ (e.attrib.get('url') for e in it.findall('./enclosure')),
+ xpath_text(it, 'link', fatal=False))
if not next_url:
continue
- if it.find('guid').text is not None:
- next_url = smuggle_url(next_url, {'force_videoid': it.find('guid').text})
+ guid = try_call(lambda: it.find('guid').text)
+ if guid:
+ next_url = smuggle_url(next_url, {'force_videoid': guid})
def itunes(key):
- return xpath_text(
- it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
- default=None)
-
- duration = itunes('duration')
- explicit = (itunes('explicit') or '').lower()
- if explicit in ('true', 'yes'):
- age_limit = 18
- elif explicit in ('false', 'no'):
- age_limit = 0
- else:
- age_limit = None
+ return xpath_text(it, xpath_with_ns(f'./itunes:{key}', NS_MAP), default=None)
entries.append({
'_type': 'url_transparent',
'url': next_url,
- 'title': it.find('title').text,
+ 'title': try_call(lambda: it.find('title').text),
'description': xpath_text(it, 'description', default=None),
- 'timestamp': unified_timestamp(
- xpath_text(it, 'pubDate', default=None)),
- 'duration': int_or_none(duration) or parse_duration(duration),
+ 'timestamp': unified_timestamp(xpath_text(it, 'pubDate', default=None)),
+ 'duration': parse_duration(itunes('duration')),
'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
'episode': itunes('title'),
'episode_number': int_or_none(itunes('episode')),
'season_number': int_or_none(itunes('season')),
- 'age_limit': age_limit,
+ 'age_limit': {'true': 18, 'yes': 18, 'false': 0, 'no': 0}.get((itunes('explicit') or '').lower()),
})
return {
'_type': 'playlist',
'id': url,
- 'title': playlist_title,
- 'description': playlist_desc,
+ 'title': try_call(lambda: doc.find('./channel/title').text),
+ 'description': try_call(lambda: doc.find('./channel/description').text),
'entries': entries,
}
@@ -2610,7 +2699,7 @@ class GenericIE(InfoExtractor):
title = self._html_search_meta('DC.title', webpage, fatal=True)
- camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
+ camtasia_url = urllib.parse.urljoin(url, camtasia_cfg)
camtasia_cfg = self._download_xml(
camtasia_url, video_id,
note='Downloading camtasia configuration',
@@ -2626,7 +2715,7 @@ class GenericIE(InfoExtractor):
entries.append({
'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
'title': f'{title} - {n.tag}',
- 'url': compat_urlparse.urljoin(url, url_n.text),
+ 'url': urllib.parse.urljoin(url, url_n.text),
'duration': float_or_none(n.find('./duration').text),
})
@@ -2678,7 +2767,7 @@ class GenericIE(InfoExtractor):
if url.startswith('//'):
return self.url_result(self.http_scheme() + url)
- parsed_url = compat_urlparse.urlparse(url)
+ parsed_url = urllib.parse.urlparse(url)
if not parsed_url.scheme:
default_search = self.get_param('default_search')
if default_search is None:
@@ -2754,7 +2843,7 @@ class GenericIE(InfoExtractor):
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m:
self.report_detected('direct video link')
- format_id = compat_str(m.group('format_id'))
+ format_id = str(m.group('format_id'))
subtitles = {}
if format_id.endswith('mpegurl'):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
@@ -2873,7 +2962,7 @@ class GenericIE(InfoExtractor):
# Unescaping the whole page allows to handle those cases in a generic way
# FIXME: unescaping the whole page may break URLs, commenting out for now.
# There probably should be a second run of generic extractor on unescaped webpage.
- # webpage = compat_urllib_parse_unquote(webpage)
+ # webpage = urllib.parse.unquote(webpage)
# Unescape squarespace embeds to be detected by generic extractor,
# see https://github.com/ytdl-org/youtube-dl/issues/21294
@@ -2975,7 +3064,7 @@ class GenericIE(InfoExtractor):
if vimeo_urls:
return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
- vhx_url = VHXEmbedIE._extract_url(webpage)
+ vhx_url = VHXEmbedIE._extract_url(url, webpage)
if vhx_url:
return self.url_result(vhx_url, VHXEmbedIE.ie_key())
@@ -3023,6 +3112,7 @@ class GenericIE(InfoExtractor):
wistia_urls = WistiaIE._extract_urls(webpage)
if wistia_urls:
playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
+ playlist['entries'] = list(playlist['entries'])
for entry in playlist['entries']:
entry.update({
'_type': 'url_transparent',
@@ -3042,6 +3132,11 @@ class GenericIE(InfoExtractor):
# Don't set the extractor because it can be a track url or an album
return self.url_result(burl)
+ # Check for Substack custom domains
+ substack_url = SubstackIE._extract_url(webpage, url)
+ if substack_url:
+ return self.url_result(substack_url, SubstackIE)
+
# Look for embedded Vevo player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
@@ -3140,7 +3235,7 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'))
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
if mobj is not None:
- return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
+ return self.url_result(urllib.parse.unquote(mobj.group('url')))
# Look for funnyordie embed
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
@@ -3393,7 +3488,7 @@ class GenericIE(InfoExtractor):
r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
if mobj is not None:
return self.url_result(
- compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
+ urllib.parse.urljoin(url, mobj.group('url')), 'UDNEmbed')
# Look for Senate ISVP iframe
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
@@ -3626,7 +3721,7 @@ class GenericIE(InfoExtractor):
if mediasite_urls:
entries = [
self.url_result(smuggle_url(
- compat_urlparse.urljoin(url, mediasite_url),
+ urllib.parse.urljoin(url, mediasite_url),
{'UrlReferrer': url}), ie=MediasiteIE.ie_key())
for mediasite_url in mediasite_urls]
return self.playlist_result(entries, video_id, video_title)
@@ -3762,6 +3857,11 @@ class GenericIE(InfoExtractor):
if ruutu_urls:
return self.playlist_from_matches(ruutu_urls, video_id, video_title)
+ # Look for Tiktok embeds
+ tiktok_urls = TikTokIE._extract_urls(webpage)
+ if tiktok_urls:
+ return self.playlist_from_matches(tiktok_urls, video_id, video_title)
+
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:
@@ -3816,11 +3916,11 @@ class GenericIE(InfoExtractor):
subtitles = {}
for source in sources:
src = source.get('src')
- if not src or not isinstance(src, compat_str):
+ if not src or not isinstance(src, str):
continue
- src = compat_urlparse.urljoin(url, src)
+ src = urllib.parse.urljoin(url, src)
src_type = source.get('type')
- if isinstance(src_type, compat_str):
+ if isinstance(src_type, str):
src_type = src_type.lower()
ext = determine_ext(src).lower()
if src_type == 'video/youtube':
@@ -3854,7 +3954,7 @@ class GenericIE(InfoExtractor):
if not src:
continue
subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({
- 'url': compat_urlparse.urljoin(url, src),
+ 'url': urllib.parse.urljoin(url, src),
'name': sub.get('label'),
'http_headers': {
'Referer': full_response.geturl(),
@@ -3871,22 +3971,17 @@ class GenericIE(InfoExtractor):
json_ld = self._search_json_ld(webpage, video_id, default={})
if json_ld.get('url') not in (url, None):
self.report_detected('JSON LD')
- if determine_ext(json_ld['url']) == 'm3u8':
- json_ld['formats'], json_ld['subtitles'] = self._extract_m3u8_formats_and_subtitles(
- json_ld['url'], video_id, 'mp4')
- json_ld.pop('url')
- self._sort_formats(json_ld['formats'])
- else:
- json_ld['_type'] = 'url_transparent'
- json_ld['url'] = smuggle_url(json_ld['url'], {'force_videoid': video_id, 'to_generic': True})
- return merge_dicts(json_ld, info_dict)
+ return merge_dicts({
+ '_type': 'url_transparent',
+ 'url': smuggle_url(json_ld['url'], {'force_videoid': video_id, 'to_generic': True}),
+ }, json_ld, info_dict)
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
if RtmpIE.suitable(vurl):
return True
- vpath = compat_urlparse.urlparse(vurl).path
+ vpath = urllib.parse.urlparse(vurl).path
vext = determine_ext(vpath, None)
return vext not in (None, 'swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
@@ -4014,7 +4109,7 @@ class GenericIE(InfoExtractor):
if refresh_header:
found = re.search(REDIRECT_REGEX, refresh_header)
if found:
- new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
+ new_url = urllib.parse.urljoin(url, unescapeHTML(found.group(1)))
if new_url != url:
self.report_following_redirect(new_url)
return {
@@ -4040,8 +4135,8 @@ class GenericIE(InfoExtractor):
for video_url in orderedSet(found):
video_url = unescapeHTML(video_url)
video_url = video_url.replace('\\/', '/')
- video_url = compat_urlparse.urljoin(url, video_url)
- video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
+ video_url = urllib.parse.urljoin(url, video_url)
+ video_id = urllib.parse.unquote(os.path.basename(video_url))
# Sometimes, jwplayer extraction will result in a YouTube URL
if YoutubeIE.suitable(video_url):
diff --git a/yt_dlp/extractor/giga.py b/yt_dlp/extractor/giga.py
index 9e835a6da..e728598f7 100644
--- a/yt_dlp/extractor/giga.py
+++ b/yt_dlp/extractor/giga.py
@@ -1,13 +1,8 @@
import itertools
from .common import InfoExtractor
-from ..utils import (
- qualities,
- compat_str,
- parse_duration,
- parse_iso8601,
- str_to_int,
-)
+from ..compat import compat_str
+from ..utils import parse_duration, parse_iso8601, qualities, str_to_int
class GigaIE(InfoExtractor):
diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py
index c0905f86a..d7475b6da 100644
--- a/yt_dlp/extractor/googledrive.py
+++ b/yt_dlp/extractor/googledrive.py
@@ -264,7 +264,7 @@ class GoogleDriveIE(InfoExtractor):
subtitles_id = ttsurl.encode('utf-8').decode(
'unicode_escape').split('=')[-1]
- self._downloader.cookiejar.clear(domain='.google.com', path='/', name='NID')
+ self.cookiejar.clear(domain='.google.com', path='/', name='NID')
return {
'id': video_id,
@@ -276,3 +276,59 @@ class GoogleDriveIE(InfoExtractor):
'automatic_captions': self.extract_automatic_captions(
video_id, subtitles_id, hl),
}
+
+
+class GoogleDriveFolderIE(InfoExtractor):
+ IE_NAME = 'GoogleDrive:Folder'
+ _VALID_URL = r'https?://(?:docs|drive)\.google\.com/drive/folders/(?P<id>[\w-]{28,})'
+ _TESTS = [{
+ 'url': 'https://drive.google.com/drive/folders/1dQ4sx0-__Nvg65rxTSgQrl7VyW_FZ9QI',
+ 'info_dict': {
+ 'id': '1dQ4sx0-__Nvg65rxTSgQrl7VyW_FZ9QI',
+ 'title': 'Forrest'
+ },
+ 'playlist_count': 3,
+ }]
+ _BOUNDARY = '=====vc17a3rwnndj====='
+ _REQUEST = "/drive/v2beta/files?openDrive=true&reason=102&syncType=0&errorRecovery=false&q=trashed%20%3D%20false%20and%20'{folder_id}'%20in%20parents&fields=kind%2CnextPageToken%2Citems(kind%2CmodifiedDate%2CmodifiedByMeDate%2ClastViewedByMeDate%2CfileSize%2Cowners(kind%2CpermissionId%2Cid)%2ClastModifyingUser(kind%2CpermissionId%2Cid)%2ChasThumbnail%2CthumbnailVersion%2Ctitle%2Cid%2CresourceKey%2Cshared%2CsharedWithMeDate%2CuserPermission(role)%2CexplicitlyTrashed%2CmimeType%2CquotaBytesUsed%2Ccopyable%2CfileExtension%2CsharingUser(kind%2CpermissionId%2Cid)%2Cspaces%2Cversion%2CteamDriveId%2ChasAugmentedPermissions%2CcreatedDate%2CtrashingUser(kind%2CpermissionId%2Cid)%2CtrashedDate%2Cparents(id)%2CshortcutDetails(targetId%2CtargetMimeType%2CtargetLookupStatus)%2Ccapabilities(canCopy%2CcanDownload%2CcanEdit%2CcanAddChildren%2CcanDelete%2CcanRemoveChildren%2CcanShare%2CcanTrash%2CcanRename%2CcanReadTeamDrive%2CcanMoveTeamDriveItem)%2Clabels(starred%2Ctrashed%2Crestricted%2Cviewed))%2CincompleteSearch&appDataFilter=NO_APP_DATA&spaces=drive&pageToken={page_token}&maxResults=50&supportsTeamDrives=true&includeItemsFromAllDrives=true&corpora=default&orderBy=folder%2Ctitle_natural%20asc&retryCount=0&key={key} HTTP/1.1"
+ _DATA = f'''--{_BOUNDARY}
+content-type: application/http
+content-transfer-encoding: binary
+
+GET %s
+
+--{_BOUNDARY}
+'''
+
+ def _call_api(self, folder_id, key, data, **kwargs):
+ response = self._download_webpage(
+ 'https://clients6.google.com/batch/drive/v2beta',
+ folder_id, data=data.encode('utf-8'),
+ headers={
+ 'Content-Type': 'text/plain;charset=UTF-8;',
+ 'Origin': 'https://drive.google.com',
+ }, query={
+ '$ct': f'multipart/mixed; boundary="{self._BOUNDARY}"',
+ 'key': key
+ }, **kwargs)
+ return self._search_json('', response, 'api response', folder_id, **kwargs) or {}
+
+ def _get_folder_items(self, folder_id, key):
+ page_token = ''
+ while page_token is not None:
+ request = self._REQUEST.format(folder_id=folder_id, page_token=page_token, key=key)
+ page = self._call_api(folder_id, key, self._DATA % request)
+ yield from page['items']
+ page_token = page.get('nextPageToken')
+
+ def _real_extract(self, url):
+ folder_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, folder_id)
+ key = self._search_regex(r'"(\w{39})"', webpage, 'key')
+
+ folder_info = self._call_api(folder_id, key, self._DATA % f'/drive/v2beta/files/{folder_id} HTTP/1.1', fatal=False)
+
+ return self.playlist_from_matches(
+ self._get_folder_items(folder_id, key), folder_id, folder_info.get('title'),
+ ie=GoogleDriveIE, getter=lambda item: f'https://drive.google.com/file/d/{item["id"]}')
diff --git a/yt_dlp/extractor/hitbox.py b/yt_dlp/extractor/hitbox.py
index a7e4424b6..6ecdd390c 100644
--- a/yt_dlp/extractor/hitbox.py
+++ b/yt_dlp/extractor/hitbox.py
@@ -1,13 +1,13 @@
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
clean_html,
- parse_iso8601,
+ determine_ext,
float_or_none,
int_or_none,
- compat_str,
- determine_ext,
+ parse_iso8601,
)
diff --git a/yt_dlp/extractor/ina.py b/yt_dlp/extractor/ina.py
index 56038f1ca..9e2c9cf47 100644
--- a/yt_dlp/extractor/ina.py
+++ b/yt_dlp/extractor/ina.py
@@ -1,23 +1,19 @@
from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- int_or_none,
- strip_or_none,
- xpath_attr,
- xpath_text,
-)
+from ..utils import unified_strdate
class InaIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:[^/]+/)?(?:video|audio)/(?P<id>\w+)'
_TESTS = [{
- 'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
- 'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
+ 'url': 'https://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
+ 'md5': 'c5a09e5cb5604ed10709f06e7a377dda',
'info_dict': {
'id': 'I12055569',
'ext': 'mp4',
'title': 'François Hollande "Je crois que c\'est clair"',
- 'description': 'md5:3f09eb072a06cb286b8f7e4f77109663',
+ 'description': 'md5:08201f1c86fb250611f0ba415d21255a',
+ 'upload_date': '20070712',
+ 'thumbnail': 'https://cdn-hub.ina.fr/notice/690x517/3c4/I12055569.jpeg',
}
}, {
'url': 'https://www.ina.fr/video/S806544_001/don-d-organes-des-avancees-mais-d-importants-besoins-video.html',
@@ -31,53 +27,37 @@ class InaIE(InfoExtractor):
}, {
'url': 'http://m.ina.fr/video/I12055569',
'only_matching': True,
+ }, {
+ 'url': 'https://www.ina.fr/ina-eclaire-actu/video/cpb8205116303/les-jeux-electroniques',
+ 'md5': '4b8284a9a3a184fdc7e744225b8251e7',
+ 'info_dict': {
+ 'id': 'CPB8205116303',
+ 'ext': 'mp4',
+ 'title': 'Les jeux électroniques',
+ 'description': 'md5:e09f7683dad1cc60b74950490127d233',
+ 'upload_date': '19821204',
+ 'duration': 657,
+ 'thumbnail': 'https://cdn-hub.ina.fr/notice/690x517/203/CPB8205116303.jpeg',
+ }
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- info_doc = self._download_xml(
- 'http://player.ina.fr/notices/%s.mrss' % video_id, video_id)
- item = info_doc.find('channel/item')
- title = xpath_text(item, 'title', fatal=True)
- media_ns_xpath = lambda x: self._xpath_ns(x, 'http://search.yahoo.com/mrss/')
- content = item.find(media_ns_xpath('content'))
+ video_id = self._match_id(url).upper()
+ webpage = self._download_webpage(url, video_id)
- get_furl = lambda x: xpath_attr(content, media_ns_xpath(x), 'url')
- formats = []
- for q, w, h in (('bq', 400, 300), ('mq', 512, 384), ('hq', 768, 576)):
- q_url = get_furl(q)
- if not q_url:
- continue
- formats.append({
- 'format_id': q,
- 'url': q_url,
- 'width': w,
- 'height': h,
- })
- if not formats:
- furl = get_furl('player') or content.attrib['url']
- ext = determine_ext(furl)
- formats = [{
- 'url': furl,
- 'vcodec': 'none' if ext == 'mp3' else None,
- 'ext': ext,
- }]
+ api_url = self._html_search_regex(
+ r'asset-details-url\s*=\s*["\'](?P<api_url>[^"\']+)',
+ webpage, 'api_url').replace(video_id, f'{video_id}.json')
- thumbnails = []
- for thumbnail in content.findall(media_ns_xpath('thumbnail')):
- thumbnail_url = thumbnail.get('url')
- if not thumbnail_url:
- continue
- thumbnails.append({
- 'url': thumbnail_url,
- 'height': int_or_none(thumbnail.get('height')),
- 'width': int_or_none(thumbnail.get('width')),
- })
+ api_response = self._download_json(api_url, video_id)
return {
'id': video_id,
- 'formats': formats,
- 'title': title,
- 'description': strip_or_none(xpath_text(item, 'description')),
- 'thumbnails': thumbnails,
+ 'url': api_response['resourceUrl'],
+ 'ext': {'video': 'mp4', 'audio': 'mp3'}.get(api_response.get('type')),
+ 'title': api_response.get('title'),
+ 'description': api_response.get('description'),
+ 'upload_date': unified_strdate(api_response.get('dateOfBroadcast')),
+ 'duration': api_response.get('duration'),
+ 'thumbnail': api_response.get('resourceThumbnail'),
}
diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py
index 05000e2fb..5a824b500 100644
--- a/yt_dlp/extractor/instagram.py
+++ b/yt_dlp/extractor/instagram.py
@@ -410,7 +410,7 @@ class InstagramIE(InstagramBaseIE):
if nodes:
return self.playlist_result(
self._extract_nodes(nodes, True), video_id,
- format_field(username, template='Post by %s'), description)
+ format_field(username, None, 'Post by %s'), description)
video_url = self._og_search_video_url(webpage, secure=False)
diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py
index a0298f1a1..5c316687c 100644
--- a/yt_dlp/extractor/iqiyi.py
+++ b/yt_dlp/extractor/iqiyi.py
@@ -351,7 +351,7 @@ class IqIE(InfoExtractor):
'''
def _extract_vms_player_js(self, webpage, video_id):
- player_js_cache = self._downloader.cache.load('iq', 'player_js')
+ player_js_cache = self.cache.load('iq', 'player_js')
if player_js_cache:
return player_js_cache
webpack_js_url = self._proto_relative_url(self._search_regex(
@@ -364,7 +364,7 @@ class IqIE(InfoExtractor):
f'https://stc.iqiyipic.com/_next/static/chunks/{webpack_map1.get(module_index, module_index)}.{webpack_map2[module_index]}.js',
video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or ''
if 'vms request' in module_js:
- self._downloader.cache.store('iq', 'player_js', module_js)
+ self.cache.store('iq', 'player_js', module_js)
return module_js
raise ExtractorError('Unable to extract player JS')
@@ -440,7 +440,7 @@ class IqIE(InfoExtractor):
preview_time = traverse_obj(
initial_format_data, ('boss_ts', (None, 'data'), ('previewTime', 'rtime')), expected_type=float_or_none, get_all=False)
if traverse_obj(initial_format_data, ('boss_ts', 'data', 'prv'), expected_type=int_or_none):
- self.report_warning('This preview video is limited%s' % format_field(preview_time, template=' to %s seconds'))
+ self.report_warning('This preview video is limited%s' % format_field(preview_time, None, ' to %s seconds'))
# TODO: Extract audio-only formats
for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none, default=[])):
diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py
index 4b88da35f..f77c5d44d 100644
--- a/yt_dlp/extractor/iwara.py
+++ b/yt_dlp/extractor/iwara.py
@@ -1,15 +1,16 @@
+import itertools
import re
-import urllib
+import urllib.parse
from .common import InfoExtractor
from ..utils import (
int_or_none,
mimetype2ext,
remove_end,
+ strip_or_none,
+ unified_strdate,
url_or_none,
urljoin,
- unified_strdate,
- strip_or_none,
)
@@ -171,37 +172,70 @@ class IwaraUserIE(IwaraBaseIE):
IE_NAME = 'iwara:user'
_TESTS = [{
- 'url': 'https://ecchi.iwara.tv/users/CuteMMD',
+ 'note': 'number of all videos page is just 1 page. less than 40 videos',
+ 'url': 'https://ecchi.iwara.tv/users/infinityyukarip',
'info_dict': {
- 'id': 'CuteMMD',
+ 'title': 'Uploaded videos from Infinity_YukariP',
+ 'id': 'infinityyukarip',
+ 'uploader': 'Infinity_YukariP',
+ 'uploader_id': 'infinityyukarip',
},
- 'playlist_mincount': 198,
+ 'playlist_mincount': 39,
}, {
- # urlencoded
- 'url': 'https://ecchi.iwara.tv/users/%E5%92%95%E5%98%BF%E5%98%BF',
+ 'note': 'no even all videos page. probably less than 10 videos',
+ 'url': 'https://ecchi.iwara.tv/users/mmd-quintet',
'info_dict': {
- 'id': '咕嘿嘿',
+ 'title': 'Uploaded videos from mmd quintet',
+ 'id': 'mmd-quintet',
+ 'uploader': 'mmd quintet',
+ 'uploader_id': 'mmd-quintet',
},
- 'playlist_mincount': 141,
+ 'playlist_mincount': 6,
+ }, {
+ 'note': 'has paging. more than 40 videos',
+ 'url': 'https://ecchi.iwara.tv/users/theblackbirdcalls',
+ 'info_dict': {
+ 'title': 'Uploaded videos from TheBlackbirdCalls',
+ 'id': 'theblackbirdcalls',
+ 'uploader': 'TheBlackbirdCalls',
+ 'uploader_id': 'theblackbirdcalls',
+ },
+ 'playlist_mincount': 420,
+ }, {
+ 'note': 'foreign chars in URL. there must be foreign characters in URL',
+ 'url': 'https://ecchi.iwara.tv/users/ぶた丼',
+ 'info_dict': {
+ 'title': 'Uploaded videos from ぶた丼',
+ 'id': 'ぶた丼',
+ 'uploader': 'ぶた丼',
+ 'uploader_id': 'ぶた丼',
+ },
+ 'playlist_mincount': 170,
}]
- def _entries(self, playlist_id, base_url, webpage):
- yield from self._extract_playlist(base_url, webpage)
-
- page_urls = re.findall(
- r'class="pager-item"[^>]*>\s*<a[^<]+href="([^"]+)', webpage)
-
- for n, path in enumerate(page_urls, 2):
+ def _entries(self, playlist_id, base_url):
+ webpage = self._download_webpage(
+ f'{base_url}/users/{playlist_id}', playlist_id)
+ videos_url = self._search_regex(r'<a href="(/users/[^/]+/videos)(?:\?[^"]+)?">', webpage, 'all videos url', default=None)
+ if not videos_url:
+ yield from self._extract_playlist(base_url, webpage)
+ return
+
+ videos_url = urljoin(base_url, videos_url)
+
+ for n in itertools.count(1):
+ page = self._download_webpage(
+ videos_url, playlist_id, note=f'Downloading playlist page {n}',
+ query={'page': str(n - 1)} if n > 1 else {})
yield from self._extract_playlist(
- base_url, self._download_webpage(
- urljoin(base_url, path), playlist_id, note=f'Downloading playlist page {n}'))
+ base_url, page)
+
+ if f'page={n}' not in page:
+ break
def _real_extract(self, url):
playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url')
playlist_id = urllib.parse.unquote(playlist_id)
- webpage = self._download_webpage(
- f'{base_url}/users/{playlist_id}/videos', playlist_id)
-
return self.playlist_result(
- self._entries(playlist_id, base_url, webpage), playlist_id)
+ self._entries(playlist_id, base_url), playlist_id)
diff --git a/yt_dlp/extractor/ixigua.py b/yt_dlp/extractor/ixigua.py
new file mode 100644
index 000000000..163edf480
--- /dev/null
+++ b/yt_dlp/extractor/ixigua.py
@@ -0,0 +1,84 @@
+import base64
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ get_element_by_id,
+ int_or_none,
+ js_to_json,
+ str_or_none,
+ traverse_obj,
+)
+
+
+class IxiguaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)?ixigua\.com/(?:video/)?(?P<id>\d+).+'
+ _TESTS = [{
+ 'url': 'https://www.ixigua.com/6996881461559165471',
+ 'info_dict': {
+ 'id': '6996881461559165471',
+ 'ext': 'mp4',
+ 'title': '盲目涉水风险大,亲身示范高水位行车注意事项',
+ 'description': 'md5:8c82f46186299add4a1c455430740229',
+ 'tags': ['video_car'],
+ 'like_count': int,
+ 'dislike_count': int,
+ 'view_count': int,
+ 'uploader': '懂车帝原创',
+ 'uploader_id': '6480145787',
+ 'thumbnail': r're:^https?://.+\.(avif|webp)',
+ 'timestamp': 1629088414,
+ 'duration': 1030,
+ }
+ }]
+
+ def _get_json_data(self, webpage, video_id):
+ js_data = get_element_by_id('SSR_HYDRATED_DATA', webpage)
+ if not js_data:
+ if self._cookies_passed:
+ raise ExtractorError('Failed to get SSR_HYDRATED_DATA')
+ raise ExtractorError('Cookies (not necessarily logged in) are needed', expected=True)
+
+ return self._parse_json(
+ js_data.replace('window._SSR_HYDRATED_DATA=', ''), video_id, transform_source=js_to_json)
+
+ def _media_selector(self, json_data):
+ for path, override in (
+ (('video_list', ), {}),
+ (('dynamic_video', 'dynamic_video_list'), {'acodec': 'none'}),
+ (('dynamic_video', 'dynamic_audio_list'), {'vcodec': 'none', 'ext': 'm4a'}),
+ ):
+ for media in traverse_obj(json_data, (..., *path, lambda _, v: v['main_url'])):
+ yield {
+ 'url': base64.b64decode(media['main_url']).decode(),
+ 'width': int_or_none(media.get('vwidth')),
+ 'height': int_or_none(media.get('vheight')),
+ 'fps': int_or_none(media.get('fps')),
+ 'vcodec': media.get('codec_type'),
+ 'format_id': str_or_none(media.get('quality_type')),
+ 'filesize': int_or_none(media.get('size')),
+ 'ext': 'mp4',
+ **override,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ json_data = self._get_json_data(webpage, video_id)['anyVideo']['gidInformation']['packerData']['video']
+
+ formats = list(self._media_selector(json_data.get('videoResource')))
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': json_data.get('title'),
+ 'description': json_data.get('video_abstract'),
+ 'formats': formats,
+ 'like_count': json_data.get('video_like_count'),
+ 'duration': int_or_none(json_data.get('duration')),
+ 'tags': [json_data.get('tag')],
+ 'uploader_id': traverse_obj(json_data, ('user_info', 'user_id')),
+ 'uploader': traverse_obj(json_data, ('user_info', 'name')),
+ 'view_count': json_data.get('video_watch_count'),
+ 'dislike_count': json_data.get('video_unlike_count'),
+ 'timestamp': int_or_none(json_data.get('video_publish_time')),
+ }
diff --git a/yt_dlp/extractor/joj.py b/yt_dlp/extractor/joj.py
index a01411be1..1c4676e95 100644
--- a/yt_dlp/extractor/joj.py
+++ b/yt_dlp/extractor/joj.py
@@ -70,7 +70,7 @@ class JojIE(InfoExtractor):
r'(\d+)[pP]\.', format_url, 'height', default=None)
formats.append({
'url': format_url,
- 'format_id': format_field(height, template='%sp'),
+ 'format_id': format_field(height, None, '%sp'),
'height': int(height),
})
if not formats:
diff --git a/yt_dlp/extractor/jwplatform.py b/yt_dlp/extractor/jwplatform.py
index 8dbbb2926..2cb7ca3d7 100644
--- a/yt_dlp/extractor/jwplatform.py
+++ b/yt_dlp/extractor/jwplatform.py
@@ -5,7 +5,7 @@ from ..utils import unsmuggle_url
class JWPlatformIE(InfoExtractor):
- _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
+ _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
_TESTS = [{
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
@@ -37,6 +37,9 @@ class JWPlatformIE(InfoExtractor):
webpage)
if ret:
return ret
+ mobj = re.search(r'<div\b[^>]* data-video-jw-id="([a-zA-Z0-9]{8})"', webpage)
+ if mobj:
+ return [f'jwplatform:{mobj.group(1)}']
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py
index afad279bd..f4092aa71 100644
--- a/yt_dlp/extractor/kaltura.py
+++ b/yt_dlp/extractor/kaltura.py
@@ -382,5 +382,5 @@ class KalturaIE(InfoExtractor):
'duration': info.get('duration'),
'timestamp': info.get('createdAt'),
'uploader_id': format_field(info, 'userId', ignore=('None', None)),
- 'view_count': info.get('plays'),
+ 'view_count': int_or_none(info.get('plays')),
}
diff --git a/yt_dlp/extractor/keezmovies.py b/yt_dlp/extractor/keezmovies.py
index 79f9c7fa7..1c2d5c01c 100644
--- a/yt_dlp/extractor/keezmovies.py
+++ b/yt_dlp/extractor/keezmovies.py
@@ -68,7 +68,7 @@ class KeezMoviesIE(InfoExtractor):
video_url, title, 32).decode('utf-8')
formats.append({
'url': format_url,
- 'format_id': format_field(height, template='%dp'),
+ 'format_id': format_field(height, None, '%dp'),
'height': height,
'tbr': tbr,
})
diff --git a/yt_dlp/extractor/kicker.py b/yt_dlp/extractor/kicker.py
new file mode 100644
index 000000000..a2c7dd4e8
--- /dev/null
+++ b/yt_dlp/extractor/kicker.py
@@ -0,0 +1,55 @@
+from .common import InfoExtractor
+from .dailymotion import DailymotionIE
+
+
+class KickerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)kicker\.(?:de)/(?P<id>[\w-]+)/video'
+ _TESTS = [{
+ 'url': 'https://www.kicker.de/pogba-dembel-co-die-top-11-der-abloesefreien-spieler-905049/video',
+ 'info_dict': {
+ 'id': 'km04mrK0DrRAVxy2GcA',
+ 'title': 'md5:b91d145bac5745ac58d5479d8347a875',
+ 'ext': 'mp4',
+ 'duration': 350,
+ 'description': 'md5:a5a3dd77dbb6550dbfb997be100b9998',
+ 'uploader_id': 'x2dfupo',
+ 'timestamp': 1654677626,
+ 'like_count': int,
+ 'uploader': 'kicker.de',
+ 'view_count': int,
+ 'age_limit': 0,
+ 'thumbnail': r're:https://s\d+\.dmcdn\.net/v/T-x741YeYAx8aSZ0Z/x1080',
+ 'tags': ['published', 'category.InternationalSoccer'],
+ 'upload_date': '20220608'
+ }
+ }, {
+ 'url': 'https://www.kicker.de/ex-unioner-in-der-bezirksliga-felix-kroos-vereinschallenge-in-pankow-902825/video',
+ 'info_dict': {
+ 'id': 'k2omNsJKdZ3TxwxYSFJ',
+ 'title': 'md5:72ec24d7f84b8436fe1e89d198152adf',
+ 'ext': 'mp4',
+ 'uploader_id': 'x2dfupo',
+ 'duration': 331,
+ 'timestamp': 1652966015,
+ 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/TxU4Z1YYCmtisTbMq/x1080',
+ 'tags': ['FELIX KROOS', 'EINFACH MAL LUPPEN', 'KROOS', 'FSV FORTUNA PANKOW', 'published', 'category.Amateurs', 'marketingpreset.Spreekick'],
+ 'age_limit': 0,
+ 'view_count': int,
+ 'upload_date': '20220519',
+ 'uploader': 'kicker.de',
+ 'description': 'md5:0c2060c899a91c8bf40f578f78c5846f',
+ 'like_count': int,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_slug = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_slug)
+ dailymotion_video_id = self._search_regex(
+ r'data-dmprivateid\s*=\s*[\'"](?P<video_id>\w+)', webpage,
+ 'video id', group='video_id')
+
+ return self.url_result(
+ f'https://www.dailymotion.com/video/{dailymotion_video_id}',
+ ie=DailymotionIE, video_title=self._html_extract_title(webpage))
diff --git a/yt_dlp/extractor/kth.py b/yt_dlp/extractor/kth.py
new file mode 100644
index 000000000..e17c6db91
--- /dev/null
+++ b/yt_dlp/extractor/kth.py
@@ -0,0 +1,28 @@
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class KTHIE(InfoExtractor):
+ _VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
+ _TEST = {
+ 'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
+ 'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
+ 'info_dict': {
+ 'id': '0_uoop6oz9',
+ 'ext': 'mp4',
+ 'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
+ 'thumbnail': 're:https?://.+/thumbnail/.+',
+ 'duration': 3516,
+ 'timestamp': 1647345358,
+ 'upload_date': '20220315',
+ 'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ result = self.url_result(
+ smuggle_url('kaltura:308:%s' % video_id, {
+ 'service_url': 'https://api.kaltura.nordu.net'}),
+ 'Kaltura')
+ return result
diff --git a/yt_dlp/extractor/kusi.py b/yt_dlp/extractor/kusi.py
index f1221ef1b..4fec2c2b2 100644
--- a/yt_dlp/extractor/kusi.py
+++ b/yt_dlp/extractor/kusi.py
@@ -1,10 +1,10 @@
import random
+import urllib.parse
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote_plus
from ..utils import (
- int_or_none,
float_or_none,
+ int_or_none,
timeconvert,
update_url_query,
xpath_text,
@@ -66,7 +66,7 @@ class KUSIIE(InfoExtractor):
formats = []
for quality in quality_options:
formats.append({
- 'url': compat_urllib_parse_unquote_plus(quality.attrib['url']),
+ 'url': urllib.parse.unquote_plus(quality.attrib['url']),
'height': int_or_none(quality.attrib.get('height')),
'width': int_or_none(quality.attrib.get('width')),
'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000),
diff --git a/yt_dlp/extractor/lastfm.py b/yt_dlp/extractor/lastfm.py
index 7ba666d06..f14198cfd 100644
--- a/yt_dlp/extractor/lastfm.py
+++ b/yt_dlp/extractor/lastfm.py
@@ -15,7 +15,7 @@ class LastFMPlaylistBaseIE(InfoExtractor):
for page_number in range(start_page_number, (last_page_number or start_page_number) + 1):
webpage = self._download_webpage(
url, playlist_id,
- note='Downloading page %d%s' % (page_number, format_field(last_page_number, template=' of %d')),
+ note='Downloading page %d%s' % (page_number, format_field(last_page_number, None, ' of %d')),
query={'page': page_number})
page_entries = [
self.url_result(player_url, 'Youtube')
diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py
index 953ce2e18..909720e8b 100644
--- a/yt_dlp/extractor/lbry.py
+++ b/yt_dlp/extractor/lbry.py
@@ -192,10 +192,11 @@ class LBRYIE(LBRYBaseIE):
claim_id, is_live = result['signing_channel']['claim_id'], True
headers = {'referer': 'https://player.odysee.live/'}
live_data = self._download_json(
- f'https://api.live.odysee.com/v1/odysee/live/{claim_id}', claim_id,
+ 'https://api.odysee.live/livestream/is_live', claim_id,
+ query={'channel_claim_id': claim_id},
note='Downloading livestream JSON metadata')['data']
- streaming_url = final_url = live_data.get('url')
- if not final_url and not live_data.get('live'):
+ streaming_url = final_url = live_data.get('VideoURL')
+ if not final_url and not live_data.get('Live'):
self.raise_no_formats('This stream is not live', True, claim_id)
else:
raise UnsupportedError(url)
diff --git a/yt_dlp/extractor/line.py b/yt_dlp/extractor/line.py
index 63b6c002a..09c512e50 100644
--- a/yt_dlp/extractor/line.py
+++ b/yt_dlp/extractor/line.py
@@ -34,7 +34,7 @@ class LineLiveBaseIE(InfoExtractor):
'timestamp': int_or_none(item.get('createdAt')),
'channel': channel.get('name'),
'channel_id': channel_id,
- 'channel_url': format_field(channel_id, template='https://live.line.me/channels/%s'),
+ 'channel_url': format_field(channel_id, None, 'https://live.line.me/channels/%s'),
'duration': int_or_none(item.get('archiveDuration')),
'view_count': int_or_none(item.get('viewerCount')),
'comment_count': int_or_none(item.get('chatCount')),
diff --git a/yt_dlp/extractor/lnkgo.py b/yt_dlp/extractor/lnkgo.py
index 3bb52777f..9ea08ec5a 100644
--- a/yt_dlp/extractor/lnkgo.py
+++ b/yt_dlp/extractor/lnkgo.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
clean_html,
- compat_str,
format_field,
int_or_none,
parse_iso8601,
diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py
index 527b50cb0..5f0a9b42f 100644
--- a/yt_dlp/extractor/medaltv.py
+++ b/yt_dlp/extractor/medaltv.py
@@ -116,7 +116,7 @@ class MedalTVIE(InfoExtractor):
author = try_get(
hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
author_id = str_or_none(author.get('id'))
- author_url = format_field(author_id, template='https://medal.tv/users/%s')
+ author_url = format_field(author_id, None, 'https://medal.tv/users/%s')
return {
'id': video_id,
diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py
index 60c454dda..f396c1bd3 100644
--- a/yt_dlp/extractor/mediaset.py
+++ b/yt_dlp/extractor/mediaset.py
@@ -20,10 +20,10 @@ class MediasetIE(ThePlatformBaseIE):
(?:
mediaset:|
https?://
- (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
+ (?:\w+\.)+mediaset\.it/
(?:
(?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
- player/index\.html\?.*?\bprogramGuid=
+ player/(?:v\d+/)?index\.html\?.*?\bprogramGuid=
)
)(?P<id>[0-9A-Z]{16,})
'''
@@ -159,6 +159,12 @@ class MediasetIE(ThePlatformBaseIE):
}, {
'url': 'https://www.mediasetplay.mediaset.it/movie/herculeslaleggendahainizio/hercules-la-leggenda-ha-inizio_F305927501000102',
'only_matching': True,
+ }, {
+ 'url': 'https://mediasetinfinity.mediaset.it/video/braveandbeautiful/episodio-113_F310948005000402',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://static3.mediasetplay.mediaset.it/player/v2/index.html?partnerId=wittytv&configId=&programGuid=FD00000000153323',
+ 'only_matching': True,
}]
@staticmethod
@@ -286,7 +292,7 @@ class MediasetShowIE(MediasetIE):
_VALID_URL = r'''(?x)
(?:
https?://
- (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
+ (\w+\.)+mediaset\.it/
(?:
(?:fiction|programmi-tv|serie-tv|kids)/(?:.+?/)?
(?:[a-z-]+)_SE(?P<id>\d{12})
diff --git a/yt_dlp/extractor/metacafe.py b/yt_dlp/extractor/metacafe.py
index 31fec86d2..048c74e68 100644
--- a/yt_dlp/extractor/metacafe.py
+++ b/yt_dlp/extractor/metacafe.py
@@ -1,17 +1,14 @@
import json
import re
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse,
- compat_urllib_parse_unquote,
-)
+from ..compat import compat_parse_qs, compat_urllib_parse_unquote
from ..utils import (
- determine_ext,
ExtractorError,
- int_or_none,
+ determine_ext,
get_element_by_attribute,
+ int_or_none,
mimetype2ext,
)
@@ -143,7 +140,7 @@ class MetacafeIE(InfoExtractor):
headers = {
# Disable family filter
- 'Cookie': 'user=%s; ' % compat_urllib_parse.quote(json.dumps({'ffilter': False}))
+ 'Cookie': 'user=%s; ' % urllib.parse.quote(json.dumps({'ffilter': False}))
}
# AnyClip videos require the flashversion cookie so that we get the link
diff --git a/yt_dlp/extractor/minds.py b/yt_dlp/extractor/minds.py
index 393d20604..8079bbb39 100644
--- a/yt_dlp/extractor/minds.py
+++ b/yt_dlp/extractor/minds.py
@@ -118,7 +118,7 @@ class MindsIE(MindsBaseIE):
'timestamp': int_or_none(entity.get('time_created')),
'uploader': strip_or_none(owner.get('name')),
'uploader_id': uploader_id,
- 'uploader_url': format_field(uploader_id, template='https://www.minds.com/%s'),
+ 'uploader_url': format_field(uploader_id, None, 'https://www.minds.com/%s'),
'view_count': int_or_none(entity.get('play:count')),
'like_count': int_or_none(entity.get('thumbs:up:count')),
'dislike_count': int_or_none(entity.get('thumbs:down:count')),
diff --git a/yt_dlp/extractor/mirrorcouk.py b/yt_dlp/extractor/mirrorcouk.py
new file mode 100644
index 000000000..7b4f95b4b
--- /dev/null
+++ b/yt_dlp/extractor/mirrorcouk.py
@@ -0,0 +1,98 @@
+from .common import InfoExtractor
+from ..utils import unescapeHTML
+
+
+class MirrorCoUKIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mirror\.co\.uk/[/+[\w-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.mirror.co.uk/tv/tv-news/love-island-fans-baffled-after-27163139',
+ 'info_dict': {
+ 'id': 'voyyS7SV',
+ 'ext': 'mp4',
+ 'title': 'Love Island: Gemma Owen enters the villa',
+ 'description': 'Love Island: Michael Owen\'s daughter Gemma Owen enters the villa.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/voyyS7SV/poster.jpg?width=720',
+ 'display_id': '27163139',
+ 'timestamp': 1654547895,
+ 'duration': 57.0,
+ 'upload_date': '20220606',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/3am/celebrity-news/michael-jacksons-son-blankets-new-25344890',
+ 'info_dict': {
+ 'id': 'jyXpdvxp',
+ 'ext': 'mp4',
+ 'title': 'Michael Jackson’s son Bigi calls for action on climate change',
+ 'description': 'md5:d39ceaba2b7a615b4ca6557e7bc40222',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/jyXpdvxp/poster.jpg?width=720',
+ 'display_id': '25344890',
+ 'timestamp': 1635749907,
+ 'duration': 56.0,
+ 'upload_date': '20211101',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/sport/football/news/antonio-conte-next-tottenham-manager-25346042',
+ 'info_dict': {
+ 'id': 'q6FkKa4p',
+ 'ext': 'mp4',
+ 'title': 'Nuno sacked by Tottenham after fifth Premier League defeat of the season',
+ 'description': 'Nuno Espirito Santo has been sacked as Tottenham boss after only four months in charge.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/q6FkKa4p/poster.jpg?width=720',
+ 'display_id': '25346042',
+ 'timestamp': 1635763157,
+ 'duration': 40.0,
+ 'upload_date': '20211101',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/3am/celebrity-news/johnny-depp-splashes-50k-curry-27160737',
+ 'info_dict': {
+ 'id': 'IT0oa1nH',
+ 'ext': 'mp4',
+ 'title': 'Johnny Depp Leaves The Grand Hotel in Birmingham',
+ 'description': 'Johnny Depp Leaves The Grand Hotel in Birmingham.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/IT0oa1nH/poster.jpg?width=720',
+ 'display_id': '27160737',
+ 'timestamp': 1654524120,
+ 'duration': 65.0,
+ 'upload_date': '20220606',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/tv/tv-news/love-islands-liam-could-first-27162602',
+ 'info_dict': {
+ 'id': 'EaPr5Z2j',
+ 'ext': 'mp4',
+ 'title': 'Love Island: Davide reveals plot twist after receiving text',
+ 'description': 'Love Island: Davide reveals plot twist after receiving text',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/EaPr5Z2j/poster.jpg?width=720',
+ 'display_id': '27162602',
+ 'timestamp': 1654552597,
+ 'duration': 23.0,
+ 'upload_date': '20220606',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/news/uk-news/william-kate-sent-message-george-27160572',
+ 'info_dict': {
+ 'id': 'ygtceXIu',
+ 'ext': 'mp4',
+ 'title': 'Prince William and Kate arrive in Wales with George and Charlotte',
+ 'description': 'Prince William and Kate Middleton arrive in Wales with children Prince George and Princess Charlotte.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/ygtceXIu/poster.jpg?width=720',
+ 'display_id': '27160572',
+ 'timestamp': 1654349678,
+ 'duration': 106.0,
+ 'upload_date': '20220604',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ data = self._search_json(r'div\s+class="json-placeholder"\s+data-json="',
+ webpage, 'data', display_id, transform_source=unescapeHTML)['videoData']
+
+ return {
+ '_type': 'url_transparent',
+ 'url': f'jwplatform:{data["videoId"]}',
+ 'ie_key': 'JWPlatform',
+ 'display_id': display_id,
+ }
diff --git a/yt_dlp/extractor/mixcloud.py b/yt_dlp/extractor/mixcloud.py
index 796f268f4..a77d7e682 100644
--- a/yt_dlp/extractor/mixcloud.py
+++ b/yt_dlp/extractor/mixcloud.py
@@ -3,7 +3,6 @@ import itertools
from .common import InfoExtractor
from ..compat import (
compat_b64decode,
- compat_chr,
compat_ord,
compat_str,
compat_urllib_parse_unquote,
@@ -72,7 +71,7 @@ class MixcloudIE(MixcloudBaseIE):
def _decrypt_xor_cipher(key, ciphertext):
"""Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
return ''.join([
- compat_chr(compat_ord(ch) ^ compat_ord(k))
+ chr(compat_ord(ch) ^ compat_ord(k))
for ch, k in zip(ciphertext, itertools.cycle(key))])
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py
index a230d9cdd..c3b063ffe 100644
--- a/yt_dlp/extractor/naver.py
+++ b/yt_dlp/extractor/naver.py
@@ -1,13 +1,19 @@
+import itertools
import re
+from urllib.parse import urlparse, parse_qs
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
clean_html,
dict_get,
- ExtractorError,
int_or_none,
+ merge_dicts,
parse_duration,
+ traverse_obj,
+ try_call,
try_get,
+ unified_timestamp,
update_url_query,
)
@@ -247,3 +253,134 @@ class NaverLiveIE(InfoExtractor):
'categories': [meta.get('categoryId')],
'is_live': True
}
+
+
+class NaverNowIE(NaverBaseIE):
+ IE_NAME = 'navernow'
+ _VALID_URL = r'https?://now\.naver\.com/show/(?P<id>[0-9]+)'
+ _PAGE_SIZE = 30
+ _API_URL = 'https://apis.naver.com/now_web/nowcms-api-xhmac/cms/v1'
+ _TESTS = [{
+ 'url': 'https://now.naver.com/show/4759?shareReplayId=5901#replay=',
+ 'md5': 'e05854162c21c221481de16b2944a0bc',
+ 'info_dict': {
+ 'id': '4759-5901',
+ 'title': '아이키X노제\r\n💖꽁냥꽁냥💖(1)',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1650369600,
+ 'upload_date': '20220419',
+ 'uploader_id': 'now',
+ 'view_count': int,
+ },
+ 'params': {
+ 'noplaylist': True,
+ }
+ }, {
+ 'url': 'https://now.naver.com/show/4759?shareHightlight=1078#highlight=',
+ 'md5': '9f6118e398aa0f22b2152f554ea7851b',
+ 'info_dict': {
+ 'id': '4759-1078',
+ 'title': '아이키: 나 리정한테 흔들렸어,,, 질투 폭발하는 노제 여보😾 [아이키의 떰즈업]ㅣ네이버 NOW.',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20220504',
+ 'timestamp': 1651648042,
+ 'uploader_id': 'now',
+ 'view_count': int,
+ },
+ 'params': {
+ 'noplaylist': True,
+ },
+ }, {
+ 'url': 'https://now.naver.com/show/4759',
+ 'info_dict': {
+ 'id': '4759',
+ 'title': '아이키의 떰즈업',
+ },
+ 'playlist_mincount': 48
+ }, {
+ 'url': 'https://now.naver.com/show/4759?shareReplayId=5901#replay',
+ 'info_dict': {
+ 'id': '4759',
+ 'title': '아이키의 떰즈업',
+ },
+ 'playlist_mincount': 48,
+ }, {
+ 'url': 'https://now.naver.com/show/4759?shareHightlight=1078#highlight=',
+ 'info_dict': {
+ 'id': '4759',
+ 'title': '아이키의 떰즈업',
+ },
+ 'playlist_mincount': 48,
+ }]
+
+ def _extract_replay(self, show_id, replay_id):
+ vod_info = self._download_json(f'{self._API_URL}/shows/{show_id}/vod/{replay_id}', replay_id)
+ in_key = self._download_json(f'{self._API_URL}/shows/{show_id}/vod/{replay_id}/inkey', replay_id)['inKey']
+ return merge_dicts({
+ 'id': f'{show_id}-{replay_id}',
+ 'title': traverse_obj(vod_info, ('episode', 'title')),
+ 'timestamp': unified_timestamp(traverse_obj(vod_info, ('episode', 'start_time'))),
+ 'thumbnail': vod_info.get('thumbnail_image_url'),
+ }, self._extract_video_info(replay_id, vod_info['video_id'], in_key))
+
+ def _extract_show_replays(self, show_id):
+ page = 0
+ while True:
+ show_vod_info = self._download_json(
+ f'{self._API_URL}/vod-shows/{show_id}', show_id,
+ query={'offset': page * self._PAGE_SIZE, 'limit': self._PAGE_SIZE},
+ note=f'Downloading JSON vod list for show {show_id} - page {page}'
+ )['response']['result']
+ for v in show_vod_info.get('vod_list') or []:
+ yield self._extract_replay(show_id, v['id'])
+
+ if try_call(lambda: show_vod_info['count'] <= self._PAGE_SIZE * (page + 1)):
+ break
+ page += 1
+
+ def _extract_show_highlights(self, show_id, highlight_id=None):
+ page = 0
+ while True:
+ highlights_videos = self._download_json(
+ f'{self._API_URL}/shows/{show_id}/highlights/videos/', show_id,
+ query={'offset': page * self._PAGE_SIZE, 'limit': self._PAGE_SIZE},
+ note=f'Downloading JSON highlights for show {show_id} - page {page}')
+
+ for highlight in highlights_videos.get('results') or []:
+ if highlight_id and highlight.get('id') != int(highlight_id):
+ continue
+ yield merge_dicts({
+ 'id': f'{show_id}-{highlight["id"]}',
+ 'title': highlight.get('title'),
+ 'timestamp': unified_timestamp(highlight.get('regdate')),
+ 'thumbnail': highlight.get('thumbnail_url'),
+ }, self._extract_video_info(highlight['id'], highlight['video_id'], highlight['video_inkey']))
+
+ if try_call(lambda: highlights_videos['count'] <= self._PAGE_SIZE * (page + 1)):
+ break
+ page += 1
+
+ def _extract_highlight(self, show_id, highlight_id):
+ try:
+ return next(self._extract_show_highlights(show_id, highlight_id))
+ except StopIteration:
+ raise ExtractorError(f'Unable to find highlight {highlight_id} for show {show_id}')
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ qs = parse_qs(urlparse(url).query)
+
+ if not self._yes_playlist(show_id, qs.get('shareHightlight')):
+ return self._extract_highlight(show_id, qs['shareHightlight'][0])
+ elif not self._yes_playlist(show_id, qs.get('shareReplayId')):
+ return self._extract_replay(show_id, qs['shareReplayId'][0])
+
+ show_info = self._download_json(
+ f'{self._API_URL}/shows/{show_id}', show_id,
+ note=f'Downloading JSON vod list for show {show_id}')
+
+ return self.playlist_result(
+ itertools.chain(self._extract_show_replays(show_id), self._extract_show_highlights(show_id)),
+ show_id, show_info.get('title'))
diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py
index de0142ccf..ad8dbd7a7 100644
--- a/yt_dlp/extractor/ndr.py
+++ b/yt_dlp/extractor/ndr.py
@@ -1,11 +1,15 @@
+import re
+
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
from ..utils import (
determine_ext,
+ ExtractorError,
int_or_none,
- parse_duration,
+ merge_dicts,
+ parse_iso8601,
qualities,
try_get,
- unified_strdate,
urljoin,
)
@@ -14,120 +18,139 @@ class NDRBaseIE(InfoExtractor):
def _real_extract(self, url):
mobj = self._match_valid_url(url)
display_id = next(group for group in mobj.groups() if group)
- id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
- return self._extract_embed(webpage, display_id, id)
+ return self._extract_embed(webpage, display_id, url)
class NDRIE(NDRBaseIE):
IE_NAME = 'ndr'
IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
- _VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<display_id>[^/?#]+),(?P<id>[\da-z]+)\.html'
+ _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
_TESTS = [{
+ # httpVideo, same content id
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
+ 'md5': '6515bc255dc5c5f8c85bbc38e035a659',
'info_dict': {
'id': 'hafengeburtstag988',
+ 'display_id': 'Party-Poette-und-Parade',
'ext': 'mp4',
'title': 'Party, Pötte und Parade',
- 'thumbnail': 'https://www.ndr.de/fernsehen/hafengeburtstag990_v-contentxl.jpg',
'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
- 'series': None,
- 'channel': 'NDR Fernsehen',
- 'upload_date': '20150508',
+ 'uploader': 'ndrtv',
+ 'timestamp': 1431255671,
+ 'upload_date': '20150510',
'duration': 3498,
},
- }, {
- 'url': 'https://www.ndr.de/sport/fussball/Rostocks-Matchwinner-Froede-Ein-Hansa-Debuet-wie-im-Maerchen,hansa10312.html',
- 'only_matching': True
- }, {
- 'url': 'https://www.ndr.de/nachrichten/niedersachsen/kommunalwahl_niedersachsen_2021/Grosse-Parteien-zufrieden-mit-Ergebnissen-der-Kommunalwahl,kommunalwahl1296.html',
- 'info_dict': {
- 'id': 'kommunalwahl1296',
- 'ext': 'mp4',
- 'title': 'Die Spitzenrunde: Die Wahl aus Sicht der Landespolitik',
- 'thumbnail': 'https://www.ndr.de/fernsehen/screenshot1194912_v-contentxl.jpg',
- 'description': 'md5:5c6e2ad744cef499135735a1036d7aa7',
- 'series': 'Hallo Niedersachsen',
- 'channel': 'NDR Fernsehen',
- 'upload_date': '20210913',
- 'duration': 438,
+ 'params': {
+ 'skip_download': True,
},
+ 'expected_warnings': ['Unable to download f4m manifest'],
}, {
- 'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
+ # httpVideo, different content id
+ 'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
+ 'md5': '1043ff203eab307f0c51702ec49e9a71',
'info_dict': {
- 'id': 'sendung1091858',
+ 'id': 'osna272',
+ 'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch',
'ext': 'mp4',
- 'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
- 'thumbnail': 'https://www.ndr.de/fernsehen/screenshot983938_v-contentxl.jpg',
- 'description': 'md5:700f6de264010585012a72f97b0ac0c9',
- 'series': 'extra 3',
- 'channel': 'NDR Fernsehen',
- 'upload_date': '20201111',
- 'duration': 1749,
- }
+ 'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights',
+ 'description': 'md5:32e9b800b3d2d4008103752682d5dc01',
+ 'uploader': 'ndrtv',
+ 'timestamp': 1442059200,
+ 'upload_date': '20150912',
+ 'duration': 510,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'No longer available',
}, {
+ # httpAudio, same content id
'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
+ 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
'info_dict': {
'id': 'audio51535',
+ 'display_id': 'La-Valette-entgeht-der-Hinrichtung',
'ext': 'mp3',
'title': 'La Valette entgeht der Hinrichtung',
- 'thumbnail': 'https://www.ndr.de/mediathek/mediathekbild140_v-podcast.jpg',
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
- 'upload_date': '20140729',
- 'duration': 884.0,
+ 'uploader': 'ndrinfo',
+ 'timestamp': 1631711863,
+ 'upload_date': '20210915',
+ 'duration': 884,
},
- 'expected_warnings': ['unable to extract json url'],
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # with subtitles
+ 'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
+ 'info_dict': {
+ 'id': 'extra18674',
+ 'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
+ 'ext': 'mp4',
+ 'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
+ 'description': 'md5:700f6de264010585012a72f97b0ac0c9',
+ 'uploader': 'ndrtv',
+ 'upload_date': '20201207',
+ 'timestamp': 1614349457,
+ 'duration': 1749,
+ 'subtitles': {
+ 'de': [{
+ 'ext': 'ttml',
+ 'url': r're:^https://www\.ndr\.de.+',
+ }],
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
+ 'only_matching': True,
}]
- def _extract_embed(self, webpage, display_id, id):
- formats = []
- base_url = 'https://www.ndr.de'
- json_url = self._search_regex(r'<iframe[^>]+src=\"([^\"]+)_theme-ndrde[^\.]*\.html\"', webpage,
- 'json url', fatal=False)
- if json_url:
- data_json = self._download_json(base_url + json_url.replace('ardplayer_image', 'ardjson_image') + '.json',
- id, fatal=False)
- info_json = data_json.get('_info', {})
- media_json = try_get(data_json, lambda x: x['_mediaArray'][0]['_mediaStreamArray'])
- for media in media_json:
- if media.get('_quality') == 'auto':
- formats.extend(self._extract_m3u8_formats(media['_stream'], id))
- subtitles = {}
- sub_url = data_json.get('_subtitleUrl')
- if sub_url:
- subtitles.setdefault('de', []).append({
- 'url': base_url + sub_url,
- })
- self._sort_formats(formats)
- return {
- 'id': id,
- 'title': info_json.get('clipTitle'),
- 'thumbnail': base_url + data_json.get('_previewImage'),
- 'description': info_json.get('clipDescription'),
- 'series': info_json.get('seriesTitle') or None,
- 'channel': info_json.get('channelTitle'),
- 'upload_date': unified_strdate(info_json.get('clipDate')),
- 'duration': data_json.get('_duration'),
- 'formats': formats,
- 'subtitles': subtitles,
- }
- else:
- json_url = base_url + self._search_regex(r'apiUrl\s?=\s?\'([^\']+)\'', webpage, 'json url').replace(
- '_belongsToPodcast-', '')
- data_json = self._download_json(json_url, id, fatal=False)
- return {
- 'id': id,
- 'title': data_json.get('title'),
- 'thumbnail': base_url + data_json.get('poster'),
- 'description': data_json.get('summary'),
- 'upload_date': unified_strdate(data_json.get('publicationDate')),
- 'duration': parse_duration(data_json.get('duration')),
- 'formats': [{
- 'url': try_get(data_json, (lambda x: x['audio'][0]['url'], lambda x: x['files'][0]['url'])),
- 'vcodec': 'none',
- 'ext': 'mp3',
- }],
- }
+ def _extract_embed(self, webpage, display_id, url):
+ embed_url = (
+ self._html_search_meta(
+ 'embedURL', webpage, 'embed URL',
+ default=None)
+ or self._search_regex(
+ r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'embed URL', group='url', default=None)
+ or self._search_regex(
+ r'\bvar\s*sophoraID\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'embed URL', group='url', default=''))
+ # some more work needed if we only found sophoraID
+ if re.match(r'^[a-z]+\d+$', embed_url):
+ # get the initial part of the url path,. eg /panorama/archiv/2022/
+ parsed_url = compat_urllib_parse_urlparse(url)
+ path = self._search_regex(r'(.+/)%s' % display_id, parsed_url.path or '', 'embed URL', default='')
+ # find tell-tale image with the actual ID
+ ndr_id = self._search_regex(r'%s([a-z]+\d+)(?!\.)\b' % (path, ), webpage, 'embed URL', default=None)
+ # or try to use special knowledge!
+ NDR_INFO_URL_TPL = 'https://www.ndr.de/info/%s-player.html'
+ embed_url = 'ndr:%s' % (ndr_id, ) if ndr_id else NDR_INFO_URL_TPL % (embed_url, )
+ if not embed_url:
+ raise ExtractorError('Unable to extract embedUrl')
+
+ description = self._search_regex(
+ r'<p[^>]+itemprop="description">([^<]+)</p>',
+ webpage, 'description', default=None) or self._og_search_description(webpage)
+ timestamp = parse_iso8601(
+ self._search_regex(
+ (r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="(?P<cont>[^"]+)"',
+ r'\bvar\s*pdt\s*=\s*(?P<q>["\'])(?P<cont>(?:(?!(?P=q)).)+)(?P=q)', ),
+ webpage, 'upload date', group='cont', default=None))
+ info = self._search_json_ld(webpage, display_id, default={})
+ return merge_dicts({
+ '_type': 'url_transparent',
+ 'url': embed_url,
+ 'display_id': display_id,
+ 'description': description,
+ 'timestamp': timestamp,
+ }, info)
class NJoyIE(NDRBaseIE):
@@ -151,19 +174,19 @@ class NJoyIE(NDRBaseIE):
'params': {
'skip_download': True,
},
+ 'skip': 'No longer available',
}, {
# httpVideo, different content id
'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
'md5': '417660fffa90e6df2fda19f1b40a64d8',
'info_dict': {
- 'id': 'dockville882',
+ 'id': 'livestream283',
'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
- 'ext': 'mp4',
- 'title': '"Ich hab noch nie" mit Felix Jaehn',
- 'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
+ 'ext': 'mp3',
+ 'title': 'Das frueheste DJ Set des Nordens live mit Felix Jaehn',
+ 'description': 'md5:681698f527b8601e511e7b79edde7d2c',
'uploader': 'njoy',
- 'upload_date': '20150822',
- 'duration': 211,
+ 'upload_date': '20210830',
},
'params': {
'skip_download': True,
@@ -173,18 +196,25 @@ class NJoyIE(NDRBaseIE):
'only_matching': True,
}]
- def _extract_embed(self, webpage, display_id, id):
+ def _extract_embed(self, webpage, display_id, url=None):
+ # find tell-tale URL with the actual ID, or ...
video_id = self._search_regex(
- r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
- description = self._search_regex(
- r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
- webpage, 'description', fatal=False)
+ (r'''\bsrc\s*=\s*["']?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
+ r'<iframe[^>]+id="pp_([\da-z]+)"', ),
+ webpage, 'NDR id', default=None)
+
+ description = (
+ self._html_search_meta('description', webpage)
+ or self._search_regex(
+ r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
+ webpage, 'description', fatal=False))
return {
'_type': 'url_transparent',
'ie_key': 'NDREmbedBase',
'url': 'ndr:%s' % video_id,
'display_id': display_id,
'description': description,
+ 'title': display_id.replace('-', ' ').strip(),
}
@@ -287,7 +317,7 @@ class NDREmbedBaseIE(InfoExtractor):
class NDREmbedIE(NDREmbedBaseIE):
IE_NAME = 'ndr:embed'
- _VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
+ _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html'
_TESTS = [{
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
@@ -300,6 +330,7 @@ class NDREmbedIE(NDREmbedBaseIE):
'upload_date': '20150907',
'duration': 132,
},
+ 'skip': 'No longer available',
}, {
'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
'md5': '002085c44bae38802d94ae5802a36e78',
@@ -315,6 +346,7 @@ class NDREmbedIE(NDREmbedBaseIE):
'params': {
'skip_download': True,
},
+ 'skip': 'No longer available',
}, {
'url': 'http://www.ndr.de/info/audio51535-player.html',
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
@@ -324,7 +356,7 @@ class NDREmbedIE(NDREmbedBaseIE):
'title': 'La Valette entgeht der Hinrichtung',
'is_live': False,
'uploader': 'ndrinfo',
- 'upload_date': '20140729',
+ 'upload_date': '20210915',
'duration': 884,
},
'params': {
@@ -345,15 +377,17 @@ class NDREmbedIE(NDREmbedBaseIE):
'params': {
'skip_download': True,
},
+ 'skip': 'No longer available',
}, {
# httpVideoLive
'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
'info_dict': {
'id': 'livestream217',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
- 'upload_date': '20150910',
+ 'upload_date': '20210409',
+ 'uploader': 'ndrtv',
},
'params': {
'skip_download': True,
@@ -391,9 +425,10 @@ class NJoyEmbedIE(NDREmbedBaseIE):
'ext': 'mp4',
'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
'is_live': False,
- 'upload_date': '20150807',
+ 'upload_date': '20200826',
'duration': 1011,
},
+ 'expected_warnings': ['Unable to download f4m manifest'],
}, {
# httpAudio
'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
@@ -410,6 +445,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
'params': {
'skip_download': True,
},
+ 'skip': 'No longer available',
}, {
# httpAudioLive, no explicit ext
'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
@@ -419,7 +455,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
'uploader': 'njoy',
- 'upload_date': '20150810',
+ 'upload_date': '20210830',
},
'params': {
'skip_download': True,
diff --git a/yt_dlp/extractor/ndtv.py b/yt_dlp/extractor/ndtv.py
index fbb033169..bfe52f77d 100644
--- a/yt_dlp/extractor/ndtv.py
+++ b/yt_dlp/extractor/ndtv.py
@@ -1,13 +1,7 @@
+import urllib.parse
+
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote_plus
-)
-from ..utils import (
- parse_duration,
- remove_end,
- unified_strdate,
- urljoin
-)
+from ..utils import parse_duration, remove_end, unified_strdate, urljoin
class NDTVIE(InfoExtractor):
@@ -80,7 +74,7 @@ class NDTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
# '__title' does not contain extra words such as sub-site name, "Video" etc.
- title = compat_urllib_parse_unquote_plus(
+ title = urllib.parse.unquote_plus(
self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None)
or self._og_search_title(webpage))
diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py
index ff9a2adf0..7057b8b26 100644
--- a/yt_dlp/extractor/nebula.py
+++ b/yt_dlp/extractor/nebula.py
@@ -1,14 +1,11 @@
import itertools
import json
import time
-import urllib
+import urllib.error
+import urllib.parse
-from ..utils import (
- ExtractorError,
- parse_iso8601,
- try_get,
-)
from .common import InfoExtractor
+from ..utils import ExtractorError, parse_iso8601, try_get
class NebulaBaseIE(InfoExtractor):
diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py
index 4def7e76b..f9a67876a 100644
--- a/yt_dlp/extractor/neteasemusic.py
+++ b/yt_dlp/extractor/neteasemusic.py
@@ -1,18 +1,12 @@
-from hashlib import md5
+import itertools
+import re
from base64 import b64encode
from datetime import datetime
-import re
+from hashlib import md5
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlencode,
- compat_str,
- compat_itertools_count,
-)
-from ..utils import (
- sanitized_Request,
- float_or_none,
-)
+from ..compat import compat_str, compat_urllib_parse_urlencode
+from ..utils import float_or_none, sanitized_Request
class NetEaseMusicBaseIE(InfoExtractor):
@@ -449,7 +443,7 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
name = None
desc = None
entries = []
- for offset in compat_itertools_count(start=0, step=self._PAGE_SIZE):
+ for offset in itertools.count(start=0, step=self._PAGE_SIZE):
info = self.query_api(
'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
% (self._PAGE_SIZE, dj_id, offset),
diff --git a/yt_dlp/extractor/netverse.py b/yt_dlp/extractor/netverse.py
new file mode 100644
index 000000000..f529682a3
--- /dev/null
+++ b/yt_dlp/extractor/netverse.py
@@ -0,0 +1,176 @@
+import functools
+
+from .common import InfoExtractor
+from .dailymotion import DailymotionIE
+from ..utils import (
+ InAdvancePagedList,
+ smuggle_url,
+ traverse_obj,
+)
+
+
+class NetverseBaseIE(InfoExtractor):
+ _ENDPOINTS = {
+ 'watch': 'watchvideo',
+ 'video': 'watchvideo',
+ 'webseries': 'webseries',
+ }
+
+ def _call_api(self, url, query={}):
+ display_id, sites_type = self._match_valid_url(url).group('display_id', 'type')
+
+ json_data = self._download_json(
+ f'https://api.netverse.id/medias/api/v2/{self._ENDPOINTS[sites_type]}/{display_id}',
+ display_id, query=query)
+
+ return display_id, json_data
+
+
+class NetverseIE(NetverseBaseIE):
+ _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>watch|video)/(?P<display_id>[^/?#&]+)'
+ _TESTS = [{
+ # Watch video
+ 'url': 'https://www.netverse.id/watch/waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
+ 'info_dict': {
+ 'id': 'k4yhqUwINAGtmHx3NkL',
+ 'title': 'Waktu Indonesia Bercanda - Edisi Spesial Lebaran 2016',
+ 'ext': 'mp4',
+ 'season': 'Season 2016',
+ 'description': 'md5:fc27747c0aa85067b6967c816f01617c',
+ 'thumbnail': 'https://vplayed-uat.s3-ap-southeast-1.amazonaws.com/images/webseries/thumbnails/2021/11/619cfce45c827.jpeg',
+ 'episode_number': 22,
+ 'series': 'Waktu Indonesia Bercanda',
+ 'episode': 'Episode 22',
+ 'uploader_id': 'x2ir3vq',
+ 'age_limit': 0,
+ 'tags': [],
+ 'view_count': int,
+ 'display_id': 'waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
+ 'duration': 2990,
+ 'upload_date': '20210722',
+ 'timestamp': 1626919804,
+ 'like_count': int,
+ 'uploader': 'Net Prime',
+ }
+ }, {
+ # series
+ 'url': 'https://www.netverse.id/watch/jadoo-seorang-model',
+ 'info_dict': {
+ 'id': 'x88izwc',
+ 'title': 'Jadoo Seorang Model',
+ 'ext': 'mp4',
+ 'season': 'Season 2',
+ 'description': 'md5:c616e8e59d3edf2d3d506e3736120d99',
+ 'thumbnail': 'https://storage.googleapis.com/netprime-live/images/webseries/thumbnails/2021/11/619cf63f105d3.jpeg',
+ 'episode_number': 2,
+ 'series': 'Hello Jadoo',
+ 'episode': 'Episode 2',
+ 'view_count': int,
+ 'like_count': int,
+ 'display_id': 'jadoo-seorang-model',
+ 'uploader_id': 'x2ir3vq',
+ 'duration': 635,
+ 'timestamp': 1646372927,
+ 'tags': ['PG069497-hellojadooseason2eps2'],
+ 'upload_date': '20220304',
+ 'uploader': 'Net Prime',
+ 'age_limit': 0,
+ },
+ 'skip': 'video get Geo-blocked for some country'
+ }, {
+ # non www host
+ 'url': 'https://netverse.id/watch/tetangga-baru',
+ 'info_dict': {
+ 'id': 'k4CNGz7V0HJ7vfwZbXy',
+ 'ext': 'mp4',
+ 'title': 'Tetangga Baru',
+ 'season': 'Season 1',
+ 'description': 'md5:ed6dd355bed84d139b1154c3d8d65957',
+ 'thumbnail': 'https://vplayed-uat.s3-ap-southeast-1.amazonaws.com/images/webseries/thumbnails/2021/11/619cfd9d32c5f.jpeg',
+ 'episode_number': 1,
+ 'series': 'Tetangga Masa Gitu',
+ 'episode': 'Episode 1',
+ 'timestamp': 1624538169,
+ 'view_count': int,
+ 'upload_date': '20210624',
+ 'age_limit': 0,
+ 'uploader_id': 'x2ir3vq',
+ 'like_count': int,
+ 'uploader': 'Net Prime',
+ 'tags': ['PG008534', 'tetangga', 'Baru'],
+ 'display_id': 'tetangga-baru',
+ 'duration': 1406,
+ },
+ }, {
+ # /video url
+ 'url': 'https://www.netverse.id/video/pg067482-hellojadoo-season1',
+ 'title': 'Namaku Choi Jadoo',
+ 'info_dict': {
+ 'id': 'x887jzz',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://storage.googleapis.com/netprime-live/images/webseries/thumbnails/2021/11/619cf63f105d3.jpeg',
+ 'season': 'Season 1',
+ 'episode_number': 1,
+ 'description': 'md5:c616e8e59d3edf2d3d506e3736120d99',
+ 'title': 'Namaku Choi Jadoo',
+ 'series': 'Hello Jadoo',
+ 'episode': 'Episode 1',
+ 'age_limit': 0,
+ 'like_count': int,
+ 'view_count': int,
+ 'tags': ['PG067482', 'PG067482-HelloJadoo-season1'],
+ 'duration': 780,
+ 'display_id': 'pg067482-hellojadoo-season1',
+ 'uploader_id': 'x2ir3vq',
+ 'uploader': 'Net Prime',
+ 'timestamp': 1645764984,
+ 'upload_date': '20220225',
+ },
+ 'skip': 'This video get Geo-blocked for some country'
+ }]
+
+ def _real_extract(self, url):
+ display_id, program_json = self._call_api(url)
+ videos = program_json['response']['videos']
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': DailymotionIE.ie_key(),
+ 'url': smuggle_url(videos['dailymotion_url'], {'query': {'embedder': 'https://www.netverse.id'}}),
+ 'display_id': display_id,
+ 'title': videos.get('title'),
+ 'season': videos.get('season_name'),
+ 'thumbnail': traverse_obj(videos, ('program_detail', 'thumbnail_image')),
+ 'description': traverse_obj(videos, ('program_detail', 'description')),
+ 'episode_number': videos.get('episode_order'),
+ 'series': traverse_obj(videos, ('program_detail', 'title')),
+ }
+
+
+class NetversePlaylistIE(NetverseBaseIE):
+ _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>webseries)/(?P<display_id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://netverse.id/webseries/tetangga-masa-gitu',
+ 'info_dict': {
+ 'id': 'tetangga-masa-gitu',
+ 'title': 'Tetangga Masa Gitu',
+ },
+ 'playlist_count': 46,
+ }
+
+ def parse_playlist(self, url, page_num):
+ _, playlist_json = self._call_api(url, query={'page': page_num + 1})
+ for slug in traverse_obj(playlist_json, ('response', 'related', 'data', ..., 'slug')):
+ yield self.url_result(f'https://www.netverse.id/video/{slug}', NetverseIE)
+
+ def _real_extract(self, url):
+ _, playlist_data = self._call_api(url)
+ webseries_related_info = playlist_data['response']['related']
+ # TODO: get video from other season
+ # The season has id and the next season video is located at api_url/<season_id>?page=<page>
+ return self.playlist_result(
+ InAdvancePagedList(functools.partial(self.parse_playlist, url),
+ webseries_related_info['last_page'],
+ webseries_related_info['to'] - webseries_related_info['from'] + 1),
+ traverse_obj(playlist_data, ('response', 'webseries_info', 'slug')),
+ traverse_obj(playlist_data, ('response', 'webseries_info', 'title')))
diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py
index cf2ec7b79..60d76d1b1 100644
--- a/yt_dlp/extractor/nhk.py
+++ b/yt_dlp/extractor/nhk.py
@@ -11,7 +11,7 @@ from ..utils import (
class NhkBaseIE(InfoExtractor):
- _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json'
+ _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
_TYPE_REGEX = r'/(?P<type>video|audio)/'
@@ -27,7 +27,7 @@ class NhkBaseIE(InfoExtractor):
def _extract_episode_info(self, url, episode=None):
fetch_episode = episode is None
lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups()
- if episode_id.isdigit():
+ if len(episode_id) == 7:
episode_id = episode_id[:4] + '-' + episode_id[4:]
is_video = m_type == 'video'
@@ -89,7 +89,8 @@ class NhkBaseIE(InfoExtractor):
class NhkVodIE(NhkBaseIE):
- _VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
+ # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
+ _VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
# Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{
@@ -129,6 +130,19 @@ class NhkVodIE(NhkBaseIE):
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
'only_matching': True,
+ }, {
+ # video, alphabetic character in ID #29670
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
+ 'only_matching': True,
+ 'info_dict': {
+ 'id': 'qfjay6cg',
+ 'ext': 'mp4',
+ 'title': 'DESIGN TALKS plus - Fishermen’s Finery',
+ 'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
+ 'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
+ 'upload_date': '20210615',
+ 'timestamp': 1623722008,
+ }
}]
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index a80b544f8..82fb27631 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -647,14 +647,14 @@ class NiconicoSeriesIE(InfoExtractor):
'id': '110226',
'title': 'ご立派ァ!のシリーズ',
},
- 'playlist_mincount': 10, # as of 2021/03/17
+ 'playlist_mincount': 10,
}, {
'url': 'https://www.nicovideo.jp/series/12312/',
'info_dict': {
'id': '12312',
'title': 'バトルスピリッツ お勧めカード紹介(調整中)',
},
- 'playlist_mincount': 97, # as of 2021/03/17
+ 'playlist_mincount': 103,
}, {
'url': 'https://nico.ms/series/203559',
'only_matching': True,
@@ -672,7 +672,7 @@ class NiconicoSeriesIE(InfoExtractor):
title = unescapeHTML(title)
playlist = [
self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id)
- for v_id in re.findall(r'href="/watch/([a-z0-9]+)" data-href="/watch/\1', webpage)]
+ for v_id in re.findall(r'data-href=[\'"](?:https://www\.nicovideo\.jp)?/watch/([a-z0-9]+)', webpage)]
return self.playlist_result(playlist, list_id, title)
diff --git a/yt_dlp/extractor/npr.py b/yt_dlp/extractor/npr.py
index 6d93f154c..e677e862d 100644
--- a/yt_dlp/extractor/npr.py
+++ b/yt_dlp/extractor/npr.py
@@ -1,9 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- qualities,
- url_or_none,
-)
+from ..utils import int_or_none, qualities, traverse_obj, url_or_none
class NprIE(InfoExtractor):
@@ -51,6 +47,15 @@ class NprIE(InfoExtractor):
# multimedia, no formats, stream
'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert',
'only_matching': True,
+ }, {
+ 'url': 'https://www.npr.org/2022/03/15/1084896560/bonobo-tiny-desk-home-concert',
+ 'info_dict': {
+ 'id': '1086468851',
+ 'ext': 'mp4',
+ 'title': 'Bonobo: Tiny Desk (Home) Concert',
+ 'duration': 1061,
+ 'thumbnail': r're:^https?://media.npr.org/assets/img/.*\.jpg$',
+ },
}]
def _real_extract(self, url):
@@ -110,6 +115,12 @@ class NprIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
stream_url, stream_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
+
+ if not formats:
+ raw_json_ld = self._yield_json_ld(self._download_webpage(url, playlist_id), playlist_id, fatal=False)
+ m3u8_url = traverse_obj(list(raw_json_ld), (..., 'subjectOf', ..., 'embedUrl'), get_all=False)
+ formats = self._extract_m3u8_formats(m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
+
self._sort_formats(formats)
entries.append({
diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py
index 553c55132..fcbafe418 100644
--- a/yt_dlp/extractor/nrk.py
+++ b/yt_dlp/extractor/nrk.py
@@ -3,18 +3,17 @@ import random
import re
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import compat_HTTPError, compat_str
from ..utils import (
- compat_HTTPError,
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
parse_duration,
parse_iso8601,
str_or_none,
try_get,
- urljoin,
url_or_none,
+ urljoin,
)
diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py
index 61e3a8b86..79dad09e3 100644
--- a/yt_dlp/extractor/openload.py
+++ b/yt_dlp/extractor/openload.py
@@ -9,7 +9,6 @@ from ..utils import (
ExtractorError,
Popen,
check_executable,
- encodeArgument,
get_exe_version,
is_outdated_version,
)
@@ -132,7 +131,7 @@ class PhantomJSwrapper:
os.remove(self._TMP_FILES[name].name)
def _save_cookies(self, url):
- cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
+ cookies = cookie_jar_to_list(self.extractor.cookiejar)
for cookie in cookies:
if 'path' not in cookie:
cookie['path'] = '/'
@@ -213,16 +212,14 @@ class PhantomJSwrapper:
else:
self.extractor.to_screen(f'{video_id}: {note2}')
- p = Popen(
+ stdout, stderr, returncode = Popen.run(
[self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- out, err = p.communicate_or_kill()
- if p.returncode != 0:
- raise ExtractorError(
- 'Executing JS failed\n:' + encodeArgument(err))
+ text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ if returncode:
+ raise ExtractorError(f'Executing JS failed:\n{stderr}')
with open(self._TMP_FILES['html'].name, 'rb') as f:
html = f.read().decode('utf-8')
self._load_cookies()
- return (html, encodeArgument(out))
+ return html, stdout
diff --git a/yt_dlp/extractor/peloton.py b/yt_dlp/extractor/peloton.py
index 8e50ffc7f..3fc05d1f2 100644
--- a/yt_dlp/extractor/peloton.py
+++ b/yt_dlp/extractor/peloton.py
@@ -1,11 +1,9 @@
import json
import re
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_urllib_parse,
-)
+from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
float_or_none,
@@ -125,7 +123,7 @@ class PelotonIE(InfoExtractor):
is_live = False
if ride_data.get('content_format') == 'audio':
- url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), compat_urllib_parse.quote(token))
+ url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), urllib.parse.quote(token))
formats = [{
'url': url,
'ext': 'm4a',
@@ -138,9 +136,9 @@ class PelotonIE(InfoExtractor):
url = 'https://members.onepeloton.com/.netlify/functions/m3u8-proxy?displayLanguage=en&acceptedSubtitles=%s&url=%s?hdnea=%s' % (
','.join([re.sub('^([a-z]+)-([A-Z]+)$', r'\1', caption) for caption in ride_data['captions']]),
ride_data['vod_stream_url'],
- compat_urllib_parse.quote(compat_urllib_parse.quote(token)))
+ urllib.parse.quote(urllib.parse.quote(token)))
elif ride_data.get('live_stream_url'):
- url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), compat_urllib_parse.quote(token))
+ url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), urllib.parse.quote(token))
is_live = True
else:
raise ExtractorError('Missing video URL')
diff --git a/yt_dlp/extractor/playsuisse.py b/yt_dlp/extractor/playsuisse.py
new file mode 100644
index 000000000..a635ac92f
--- /dev/null
+++ b/yt_dlp/extractor/playsuisse.py
@@ -0,0 +1,147 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj
+
+
+class PlaySuisseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/watch/(?P<id>[0-9]+)'
+ _TESTS = [
+ {
+ 'url': 'https://www.playsuisse.ch/watch/763211/0',
+ 'md5': '82df2a470b2dfa60c2d33772a8a60cf8',
+ 'info_dict': {
+ 'id': '763211',
+ 'ext': 'mp4',
+ 'title': 'Knochen',
+ 'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8',
+ 'duration': 3344,
+ 'series': 'Wilder',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Knochen',
+ 'episode_number': 1,
+ 'thumbnail': 'md5:9260abe0c0ec9b69914d0a10d54c5878'
+ }
+ },
+ {
+ 'url': 'https://www.playsuisse.ch/watch/808675/0',
+ 'md5': '818b94c1d2d7c4beef953f12cb8f3e75',
+ 'info_dict': {
+ 'id': '808675',
+ 'ext': 'mp4',
+ 'title': 'Der Läufer',
+ 'description': 'md5:9f61265c7e6dcc3e046137a792b275fd',
+ 'duration': 5280,
+ 'episode': 'Der Läufer',
+ 'thumbnail': 'md5:44af7d65ee02bbba4576b131868bb783'
+ }
+ },
+ {
+ 'url': 'https://www.playsuisse.ch/watch/817193/0',
+ 'md5': '1d6c066f92cd7fffd8b28a53526d6b59',
+ 'info_dict': {
+ 'id': '817193',
+ 'ext': 'mp4',
+ 'title': 'Die Einweihungsparty',
+ 'description': 'md5:91ebf04d3a42cb3ab70666acf750a930',
+ 'duration': 1380,
+ 'series': 'Nr. 47',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Die Einweihungsparty',
+ 'episode_number': 1,
+ 'thumbnail': 'md5:637585fb106e3a4bcd991958924c7e44'
+ }
+ }
+ ]
+
+ _GRAPHQL_QUERY = '''
+ query AssetWatch($assetId: ID!) {
+ assetV2(id: $assetId) {
+ ...Asset
+ episodes {
+ ...Asset
+ }
+ }
+ }
+ fragment Asset on AssetV2 {
+ id
+ name
+ description
+ duration
+ episodeNumber
+ seasonNumber
+ seriesName
+ medias {
+ type
+ url
+ }
+ thumbnail16x9 {
+ ...ImageDetails
+ }
+ thumbnail2x3 {
+ ...ImageDetails
+ }
+ thumbnail16x9WithTitle {
+ ...ImageDetails
+ }
+ thumbnail2x3WithTitle {
+ ...ImageDetails
+ }
+ }
+ fragment ImageDetails on AssetImage {
+ id
+ url
+ }'''
+
+ def _get_media_data(self, media_id):
+ # NOTE In the web app, the "locale" header is used to switch between languages,
+ # However this doesn't seem to take effect when passing the header here.
+ response = self._download_json(
+ 'https://4bbepzm4ef.execute-api.eu-central-1.amazonaws.com/prod/graphql',
+ media_id, data=json.dumps({
+ 'operationName': 'AssetWatch',
+ 'query': self._GRAPHQL_QUERY,
+ 'variables': {'assetId': media_id}
+ }).encode('utf-8'),
+ headers={'Content-Type': 'application/json', 'locale': 'de'})
+
+ return response['data']['assetV2']
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ media_data = self._get_media_data(media_id)
+ info = self._extract_single(media_data)
+ if media_data.get('episodes'):
+ info.update({
+ '_type': 'playlist',
+ 'entries': map(self._extract_single, media_data['episodes']),
+ })
+ return info
+
+ def _extract_single(self, media_data):
+ thumbnails = traverse_obj(media_data, lambda k, _: k.startswith('thumbnail'))
+
+ formats, subtitles = [], {}
+ for media in traverse_obj(media_data, 'medias', default=[]):
+ if not media.get('url') or media.get('type') != 'HLS':
+ continue
+ f, subs = self._extract_m3u8_formats_and_subtitles(
+ media['url'], media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
+ formats.extend(f)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': media_data['id'],
+ 'title': media_data.get('name'),
+ 'description': media_data.get('description'),
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(media_data.get('duration')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'series': media_data.get('seriesName'),
+ 'season_number': int_or_none(media_data.get('seasonNumber')),
+ 'episode': media_data.get('name'),
+ 'episode_number': int_or_none(media_data.get('episodeNumber')),
+ }
diff --git a/yt_dlp/extractor/playvid.py b/yt_dlp/extractor/playvid.py
index 5ffefc934..18aeda7de 100644
--- a/yt_dlp/extractor/playvid.py
+++ b/yt_dlp/extractor/playvid.py
@@ -1,14 +1,9 @@
import re
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
-)
-from ..utils import (
- clean_html,
- ExtractorError,
-)
+from ..compat import compat_urllib_parse_unquote
+from ..utils import ExtractorError, clean_html
class PlayvidIE(InfoExtractor):
@@ -62,7 +57,7 @@ class PlayvidIE(InfoExtractor):
val = videovars_match.group(2)
if key == 'title':
- video_title = compat_urllib_parse_unquote_plus(val)
+ video_title = urllib.parse.unquote_plus(val)
if key == 'duration':
try:
duration = int(val)
diff --git a/yt_dlp/extractor/pokemon.py b/yt_dlp/extractor/pokemon.py
index eef0d02ca..0911893d4 100644
--- a/yt_dlp/extractor/pokemon.py
+++ b/yt_dlp/extractor/pokemon.py
@@ -1,5 +1,3 @@
-import re
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -136,42 +134,3 @@ class PokemonWatchIE(InfoExtractor):
'episode': video_data.get('title'),
'episode_number': int_or_none(video_data.get('episode')),
})
-
-
-class PokemonSoundLibraryIE(InfoExtractor):
- _VALID_URL = r'https?://soundlibrary\.pokemon\.co\.jp'
-
- _TESTS = [{
- 'url': 'https://soundlibrary.pokemon.co.jp/',
- 'info_dict': {
- 'title': 'Pokémon Diamond and Pearl Sound Tracks',
- },
- 'playlist_mincount': 149,
- }]
-
- def _real_extract(self, url):
- musicbox_webpage = self._download_webpage(
- 'https://soundlibrary.pokemon.co.jp/musicbox', None,
- 'Downloading list of songs')
- song_titles = [x.group(1) for x in re.finditer(r'<span>([^>]+?)</span><br/>をてもち曲に加えます。', musicbox_webpage)]
- song_titles = song_titles[4::2]
-
- # each songs don't have permalink; instead we return all songs at once
- song_entries = [{
- 'id': f'pokemon-soundlibrary-{song_id}',
- 'url': f'https://soundlibrary.pokemon.co.jp/api/assets/signing/sounds/wav/{song_id}.wav',
- # note: the server always serves MP3 files, despite its extension of the URL above
- 'ext': 'mp3',
- 'acodec': 'mp3',
- 'vcodec': 'none',
- 'title': song_title,
- 'track': song_title,
- 'artist': 'Nintendo / Creatures Inc. / GAME FREAK inc.',
- 'uploader': 'Pokémon',
- 'release_year': 2006,
- 'release_date': '20060928',
- 'track_number': song_id,
- 'album': 'Pokémon Diamond and Pearl',
- } for song_id, song_title in enumerate(song_titles, 1)]
-
- return self.playlist_result(song_entries, playlist_title='Pokémon Diamond and Pearl Sound Tracks')
diff --git a/yt_dlp/extractor/popcorntimes.py b/yt_dlp/extractor/popcorntimes.py
index ed741a07b..ddc5ec8c8 100644
--- a/yt_dlp/extractor/popcorntimes.py
+++ b/yt_dlp/extractor/popcorntimes.py
@@ -1,8 +1,5 @@
from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_chr,
-)
+from ..compat import compat_b64decode
from ..utils import int_or_none
@@ -50,7 +47,7 @@ class PopcorntimesIE(InfoExtractor):
c_ord += 13
if upper < c_ord:
c_ord -= 26
- loc_b64 += compat_chr(c_ord)
+ loc_b64 += chr(c_ord)
video_url = compat_b64decode(loc_b64).decode('utf-8')
diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py
index d296ccacb..35468b4fc 100644
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -3,28 +3,26 @@ import itertools
import math
import operator
import re
+import urllib.request
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
- compat_urllib_request,
-)
from .openload import PhantomJSwrapper
+from ..compat import compat_HTTPError, compat_str
from ..utils import (
+ NO_DEFAULT,
+ ExtractorError,
clean_html,
determine_ext,
- ExtractorError,
format_field,
int_or_none,
merge_dicts,
- NO_DEFAULT,
orderedSet,
remove_quotes,
+ remove_start,
str_to_int,
update_url_query,
- urlencode_postdata,
url_or_none,
+ urlencode_postdata,
)
@@ -49,7 +47,7 @@ class PornHubBaseIE(InfoExtractor):
r'document\.location\.reload\(true\)')):
url_or_request = args[0]
url = (url_or_request.get_full_url()
- if isinstance(url_or_request, compat_urllib_request.Request)
+ if isinstance(url_or_request, urllib.request.Request)
else url_or_request)
phantom = PhantomJSwrapper(self, required_version='2.0')
phantom.get(url, html=webpage)
@@ -199,6 +197,16 @@ class PornHubIE(PornHubBaseIE):
},
'skip': 'This video has been disabled',
}, {
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a',
+ 'info_dict': {
+ 'id': 'ph601dc30bae19a',
+ 'uploader': 'Projekt Melody',
+ 'uploader_id': 'projekt-melody',
+ 'upload_date': '20210205',
+ 'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)',
+ 'thumbnail': r're:https?://.+',
+ },
+ }, {
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
'only_matching': True,
}, {
@@ -429,7 +437,7 @@ class PornHubIE(PornHubBaseIE):
default=None))
formats.append({
'url': format_url,
- 'format_id': format_field(height, template='%dp'),
+ 'format_id': format_field(height, None, '%dp'),
'height': height,
})
@@ -457,9 +465,11 @@ class PornHubIE(PornHubBaseIE):
self._sort_formats(
formats, field_preference=('height', 'width', 'fps', 'format_id'))
+ model_profile = self._search_json(
+ r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False)
video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
- webpage, 'uploader', default=None)
+ webpage, 'uploader', default=None) or model_profile.get('username')
def extract_vote_count(kind, name):
return self._extract_count(
@@ -488,6 +498,7 @@ class PornHubIE(PornHubBaseIE):
return merge_dicts({
'id': video_id,
'uploader': video_uploader,
+ 'uploader_id': remove_start(model_profile.get('modelProfileLink'), '/model/'),
'upload_date': upload_date,
'title': title,
'thumbnail': thumbnail,
diff --git a/yt_dlp/extractor/premiershiprugby.py b/yt_dlp/extractor/premiershiprugby.py
new file mode 100644
index 000000000..67d41fdfd
--- /dev/null
+++ b/yt_dlp/extractor/premiershiprugby.py
@@ -0,0 +1,39 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj
+
+
+class PremiershipRugbyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)premiershiprugby\.(?:com)/watch/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.premiershiprugby.com/watch/full-match-harlequins-v-newcastle-falcons',
+ 'info_dict': {
+ 'id': '0_mbkb7ldt',
+ 'title': 'Full Match: Harlequins v Newcastle Falcons',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://open.http.mp.streamamg.com/p/3000914/sp/300091400/thumbnail/entry_id/0_mbkb7ldt//width/960/height/540/type/1/quality/75',
+ 'duration': 6093.0,
+ 'tags': ['video'],
+ 'categories': ['Full Match', 'Harlequins', 'Newcastle Falcons', 'gallaher premiership'],
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ json_data = self._download_json(
+ f'https://article-cms-api.incrowdsports.com/v2/articles/slug/{display_id}',
+ display_id, query={'clientId': 'PRL'})['data']['article']
+
+ formats, subs = self._extract_m3u8_formats_and_subtitles(
+ json_data['heroMedia']['content']['videoLink'], display_id)
+
+ return {
+ 'id': json_data['heroMedia']['content']['sourceSystemId'],
+ 'display_id': display_id,
+ 'title': traverse_obj(json_data, ('heroMedia', 'title')),
+ 'formats': formats,
+ 'subtitles': subs,
+ 'thumbnail': traverse_obj(json_data, ('heroMedia', 'content', 'videoThumbnail')),
+ 'duration': int_or_none(traverse_obj(json_data, ('heroMedia', 'content', 'metadata', 'msDuration')), scale=1000),
+ 'tags': json_data.get('tags'),
+ 'categories': traverse_obj(json_data, ('categories', ..., 'text')),
+ }
diff --git a/yt_dlp/extractor/puls4.py b/yt_dlp/extractor/puls4.py
index 3c13d1f56..38c5d1109 100644
--- a/yt_dlp/extractor/puls4.py
+++ b/yt_dlp/extractor/puls4.py
@@ -1,9 +1,6 @@
from .prosiebensat1 import ProSiebenSat1BaseIE
-from ..utils import (
- unified_strdate,
- parse_duration,
- compat_str,
-)
+from ..compat import compat_str
+from ..utils import parse_duration, unified_strdate
class Puls4IE(ProSiebenSat1BaseIE):
diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py
index dbb748715..498cc6be9 100644
--- a/yt_dlp/extractor/radiko.py
+++ b/yt_dlp/extractor/radiko.py
@@ -43,7 +43,7 @@ class RadikoBaseIE(InfoExtractor):
}).split(',')[0]
auth_data = (auth_token, area_id)
- self._downloader.cache.store('radiko', 'auth_data', auth_data)
+ self.cache.store('radiko', 'auth_data', auth_data)
return auth_data
def _extract_full_key(self):
@@ -150,7 +150,7 @@ class RadikoIE(RadikoBaseIE):
vid_int = unified_timestamp(video_id, False)
prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int)
- auth_cache = self._downloader.cache.load('radiko', 'auth_data')
+ auth_cache = self.cache.load('radiko', 'auth_data')
for attempt in range(2):
auth_token, area_id = (not attempt and auth_cache) or self._auth_client()
formats = self._extract_formats(
diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py
index 8fef54dab..7b60b2617 100644
--- a/yt_dlp/extractor/radiofrance.py
+++ b/yt_dlp/extractor/radiofrance.py
@@ -1,6 +1,7 @@
import re
from .common import InfoExtractor
+from ..utils import parse_duration, unified_strdate
class RadioFranceIE(InfoExtractor):
@@ -54,3 +55,51 @@ class RadioFranceIE(InfoExtractor):
'description': description,
'uploader': uploader,
}
+
+
+class FranceCultureIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?radiofrance\.fr/(?:franceculture|fip|francemusique|mouv|franceinter)/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d+)($|[?#])'
+ _TESTS = [
+ {
+ 'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487',
+ 'info_dict': {
+ 'id': '8440487',
+ 'display_id': 'la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau',
+ 'ext': 'mp3',
+ 'title': 'La physique d’Einstein aiderait-elle à comprendre le cerveau ?',
+ 'description': 'Existerait-il un pont conceptuel entre la physique de l’espace-temps et les neurosciences ?',
+ 'thumbnail': 'https://cdn.radiofrance.fr/s3/cruiser-production/2022/05/d184e7a3-4827-4494-bf94-04ed7b120db4/1200x630_gettyimages-200171095-001.jpg',
+ 'upload_date': '20220514',
+ 'duration': 2750,
+ },
+ },
+ {
+ 'url': 'https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+ webpage = self._download_webpage(url, display_id)
+
+ # _search_json_ld doesn't correctly handle this. See https://github.com/yt-dlp/yt-dlp/pull/3874#discussion_r891903846
+ video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'\s*"@type"\s*:\s*"AudioObject"\s*.+')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_data['contentUrl'],
+ 'ext': video_data.get('encodingFormat'),
+ 'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None,
+ 'duration': parse_duration(video_data.get('duration')),
+ 'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
+ webpage, 'title', default=self._og_search_title(webpage)),
+ 'description': self._html_search_regex(
+ r'(?s)<meta name="description"\s*content="([^"]+)', webpage, 'description', default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'uploader': self._html_search_regex(
+ r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None),
+ 'upload_date': unified_strdate(self._search_regex(
+ r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False))
+ }
diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py
index dc9897305..d89c9563b 100644
--- a/yt_dlp/extractor/radlive.py
+++ b/yt_dlp/extractor/radlive.py
@@ -80,7 +80,7 @@ class RadLiveIE(InfoExtractor):
'release_timestamp': release_date,
'channel': channel.get('name'),
'channel_id': channel_id,
- 'channel_url': format_field(channel_id, template='https://rad.live/content/channel/%s'),
+ 'channel_url': format_field(channel_id, None, 'https://rad.live/content/channel/%s'),
}
if content_type == 'episode':
diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py
index ad53d697e..119c5ea3c 100644
--- a/yt_dlp/extractor/rokfin.py
+++ b/yt_dlp/extractor/rokfin.py
@@ -146,7 +146,7 @@ class RokfinIE(InfoExtractor):
for page_n in itertools.count():
raw_comments = self._download_json(
f'{_API_BASE_URL}comment?postId={video_id[5:]}&page={page_n}&size=50',
- video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, template=" of %s")}',
+ video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, None, " of %s")}',
fatal=False) or {}
for comment in raw_comments.get('content') or []:
@@ -318,7 +318,7 @@ class RokfinChannelIE(RokfinPlaylistBaseIE):
data_url = f'{_API_BASE_URL}post/search/{tab}?page={page_n}&size=50&creator={channel_id}'
metadata = self._download_json(
data_url, channel_name,
- note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, template=" of %s")}')
+ note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, None, " of %s")}')
yield from self._get_video_data(metadata)
pages_total = int_or_none(metadata.get('totalPages')) or None
@@ -360,7 +360,7 @@ class RokfinSearchIE(SearchInfoExtractor):
_db_access_key = None
def _real_initialize(self):
- self._db_url, self._db_access_key = self._downloader.cache.load(self.ie_key(), 'auth', default=(None, None))
+ self._db_url, self._db_access_key = self.cache.load(self.ie_key(), 'auth', default=(None, None))
if not self._db_url:
self._get_db_access_credentials()
@@ -369,7 +369,7 @@ class RokfinSearchIE(SearchInfoExtractor):
for page_number in itertools.count(1):
search_results = self._run_search_query(
query, data={'query': query, 'page': {'size': 100, 'current': page_number}},
- note=f'Downloading page {page_number}{format_field(total_pages, template=" of ~%s")}')
+ note=f'Downloading page {page_number}{format_field(total_pages, None, " of ~%s")}')
total_pages = traverse_obj(search_results, ('meta', 'page', 'total_pages'), expected_type=int_or_none)
for result in search_results.get('results') or []:
@@ -405,6 +405,6 @@ class RokfinSearchIE(SearchInfoExtractor):
self._db_url = url_or_none(f'{auth_data["ENDPOINT_BASE"]}/api/as/v1/engines/rokfin-search/search.json')
self._db_access_key = f'Bearer {auth_data["SEARCH_KEY"]}'
- self._downloader.cache.store(self.ie_key(), 'auth', (self._db_url, self._db_access_key))
+ self.cache.store(self.ie_key(), 'auth', (self._db_url, self._db_access_key))
return
raise ExtractorError('Unable to extract access credentials')
diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py
index 42a602968..798dde7fa 100644
--- a/yt_dlp/extractor/rtve.py
+++ b/yt_dlp/extractor/rtve.py
@@ -1,14 +1,12 @@
import base64
import io
+import struct
from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_struct_unpack,
-)
+from ..compat import compat_b64decode
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
float_or_none,
qualities,
remove_end,
@@ -73,7 +71,7 @@ class RTVEALaCartaIE(InfoExtractor):
def _decrypt_url(png):
encrypted_data = io.BytesIO(compat_b64decode(png)[8:])
while True:
- length = compat_struct_unpack('!I', encrypted_data.read(4))[0]
+ length = struct.unpack('!I', encrypted_data.read(4))[0]
chunk_type = encrypted_data.read(4)
if chunk_type == b'IEND':
break
diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py
index 50c383d79..924f9829f 100644
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@@ -24,6 +24,11 @@ class RumbleEmbedIE(InfoExtractor):
'title': 'WMAR 2 News Latest Headlines | October 20, 6pm',
'timestamp': 1571611968,
'upload_date': '20191020',
+ 'channel_url': 'https://rumble.com/c/WMAR',
+ 'channel': 'WMAR',
+ 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.OvCc-small-WMAR-2-News-Latest-Headline.jpg',
+ 'duration': 234,
+ 'uploader': 'WMAR',
}
}, {
'url': 'https://rumble.com/embed/vslb7v',
@@ -38,19 +43,21 @@ class RumbleEmbedIE(InfoExtractor):
'channel': 'CTNews',
'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg',
'duration': 901,
+ 'uploader': 'CTNews',
}
}, {
'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>%s)' % RumbleEmbedIE._VALID_URL,
- webpage)]
+ @classmethod
+ def _extract_urls(cls, webpage):
+ embeds = tuple(re.finditer(
+ fr'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>{cls._VALID_URL})', webpage))
+ if embeds:
+ return [mobj.group('url') for mobj in embeds]
+ return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
+ r'<script>\s*Rumble\(\s*"play"\s*,\s*{\s*[\'"]video[\'"]\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -77,17 +84,26 @@ class RumbleEmbedIE(InfoExtractor):
formats.append(f)
self._sort_formats(formats)
+ subtitles = {
+ lang: [{
+ 'url': sub_info['path'],
+ 'name': sub_info.get('language') or '',
+ }] for lang, sub_info in (video.get('cc') or {}).items() if sub_info.get('path')
+ }
+
author = video.get('author') or {}
return {
'id': video_id,
'title': title,
'formats': formats,
+ 'subtitles': subtitles,
'thumbnail': video.get('i'),
'timestamp': parse_iso8601(video.get('pubDate')),
'channel': author.get('name'),
'channel_url': author.get('url'),
'duration': int_or_none(video.get('duration')),
+ 'uploader': author.get('name'),
}
diff --git a/yt_dlp/extractor/screencast.py b/yt_dlp/extractor/screencast.py
index e3dbaab69..df5e79bef 100644
--- a/yt_dlp/extractor/screencast.py
+++ b/yt_dlp/extractor/screencast.py
@@ -1,11 +1,8 @@
+import urllib.request
+
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urllib_request,
-)
-from ..utils import (
- ExtractorError,
-)
+from ..compat import compat_parse_qs
+from ..utils import ExtractorError
class ScreencastIE(InfoExtractor):
@@ -75,7 +72,7 @@ class ScreencastIE(InfoExtractor):
flash_vars_s = flash_vars_s.replace(',', '&')
if flash_vars_s:
flash_vars = compat_parse_qs(flash_vars_s)
- video_url_raw = compat_urllib_request.quote(
+ video_url_raw = urllib.request.quote(
flash_vars['content'][0])
video_url = video_url_raw.replace('http%3A', 'http:')
diff --git a/yt_dlp/extractor/shared.py b/yt_dlp/extractor/shared.py
index 5bc097b0d..9a237b320 100644
--- a/yt_dlp/extractor/shared.py
+++ b/yt_dlp/extractor/shared.py
@@ -1,14 +1,13 @@
+import urllib.parse
+
from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_urllib_parse_unquote_plus,
-)
+from ..compat import compat_b64decode
from ..utils import (
- determine_ext,
+ KNOWN_EXTENSIONS,
ExtractorError,
+ determine_ext,
int_or_none,
js_to_json,
- KNOWN_EXTENSIONS,
parse_filesize,
rot47,
url_or_none,
@@ -130,7 +129,7 @@ class VivoIE(SharedBaseIE):
return stream_url
def decode_url(encoded_url):
- return rot47(compat_urllib_parse_unquote_plus(encoded_url))
+ return rot47(urllib.parse.unquote_plus(encoded_url))
return decode_url(self._parse_json(
self._search_regex(
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index 6dfa50c60..9e4c8cf25 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -67,7 +67,7 @@ class SoundcloudBaseIE(InfoExtractor):
_HEADERS = {}
def _store_client_id(self, client_id):
- self._downloader.cache.store('soundcloud', 'client_id', client_id)
+ self.cache.store('soundcloud', 'client_id', client_id)
def _update_client_id(self):
webpage = self._download_webpage('https://soundcloud.com/', None)
@@ -104,7 +104,7 @@ class SoundcloudBaseIE(InfoExtractor):
raise
def _initialize_pre_login(self):
- self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
+ self._CLIENT_ID = self.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
def _perform_login(self, username, password):
if username != 'oauth':
diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py
index 855f1d6d3..7381ac362 100644
--- a/yt_dlp/extractor/southpark.py
+++ b/yt_dlp/extractor/southpark.py
@@ -109,6 +109,49 @@ class SouthParkDeIE(SouthParkIE):
return
+class SouthParkLatIE(SouthParkIE):
+ IE_NAME = 'southpark.lat'
+ _VALID_URL = r'https?://(?:www\.)?southpark\.lat/(?:en/)?(?:video-?clips?|collections|episod(?:e|io)s)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.southpark.lat/en/video-clips/ct46op/south-park-tooth-fairy-cartman',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.southpark.lat/episodios/9h0qbg/south-park-orgia-gatuna-temporada-3-ep-7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.southpark.lat/en/collections/29ve08/south-park-heating-up/lydbrc',
+ 'only_matching': True,
+ }, {
+ # clip
+ 'url': 'https://www.southpark.lat/en/video-clips/ct46op/south-park-tooth-fairy-cartman',
+ 'info_dict': {
+ 'id': 'e99d45ea-ed00-11e0-aca6-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'Tooth Fairy Cartman',
+ 'description': 'md5:db02e23818b4dc9cb5f0c5a7e8833a68',
+ },
+ }, {
+ # episode
+ 'url': 'https://www.southpark.lat/episodios/9h0qbg/south-park-orgia-gatuna-temporada-3-ep-7',
+ 'info_dict': {
+ 'id': 'f5fbd823-04bc-11eb-9b1b-0e40cf2fc285',
+ 'ext': 'mp4',
+ 'title': 'South Park',
+ 'description': 'md5:ae0d875eff169dcbed16b21531857ac1',
+ },
+ }]
+
+ def _get_feed_url(self, uri, url=None):
+ video_id = self._id_from_uri(uri)
+ config = self._download_json(
+ f'http://media.mtvnservices.com/pmt/e1/access/index.html?uri={uri}&configtype=edge&ref={url}',
+ video_id)
+ return self._remove_template_parameter(config['feedWithQueryParams'])
+
+ def _get_feed_query(self, uri):
+ return
+
+
class SouthParkNlIE(SouthParkIE):
IE_NAME = 'southpark.nl'
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
diff --git a/yt_dlp/extractor/spotify.py b/yt_dlp/extractor/spotify.py
index a2068a1b6..fef8d8dd2 100644
--- a/yt_dlp/extractor/spotify.py
+++ b/yt_dlp/extractor/spotify.py
@@ -1,12 +1,15 @@
+import functools
import json
import re
from .common import InfoExtractor
from ..utils import (
+ OnDemandPagedList,
clean_podcast_url,
float_or_none,
int_or_none,
strip_or_none,
+ traverse_obj,
try_get,
unified_strdate,
)
@@ -25,7 +28,7 @@ class SpotifyBaseIE(InfoExtractor):
self._ACCESS_TOKEN = self._download_json(
'https://open.spotify.com/get_access_token', None)['accessToken']
- def _call_api(self, operation, video_id, variables):
+ def _call_api(self, operation, video_id, variables, **kwargs):
return self._download_json(
'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
'operationName': 'query' + operation,
@@ -35,7 +38,8 @@ class SpotifyBaseIE(InfoExtractor):
'sha256Hash': self._OPERATION_HASHES[operation],
},
})
- }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data']
+ }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN},
+ **kwargs)['data']
def _extract_episode(self, episode, series):
episode_id = episode['id']
@@ -143,22 +147,25 @@ class SpotifyShowIE(SpotifyBaseIE):
},
'playlist_mincount': 36,
}
+ _PER_PAGE = 100
+
+ def _fetch_page(self, show_id, page=0):
+ return self._call_api('ShowEpisodes', show_id, {
+ 'limit': 100,
+ 'offset': page * self._PER_PAGE,
+ 'uri': f'spotify:show:{show_id}',
+ }, note=f'Downloading page {page + 1} JSON metadata')['podcast']
def _real_extract(self, url):
show_id = self._match_id(url)
- podcast = self._call_api('ShowEpisodes', show_id, {
- 'limit': 1000000000,
- 'offset': 0,
- 'uri': 'spotify:show:' + show_id,
- })['podcast']
- podcast_name = podcast.get('name')
-
- entries = []
- for item in (try_get(podcast, lambda x: x['episodes']['items']) or []):
- episode = item.get('episode')
- if not episode:
- continue
- entries.append(self._extract_episode(episode, podcast_name))
+ first_page = self._fetch_page(show_id)
+
+ def _entries(page):
+ podcast = self._fetch_page(show_id, page) if page else first_page
+ yield from map(
+ functools.partial(self._extract_episode, series=podcast.get('name')),
+ traverse_obj(podcast, ('episodes', 'items', ..., 'episode')))
return self.playlist_result(
- entries, show_id, podcast_name, podcast.get('description'))
+ OnDemandPagedList(_entries, self._PER_PAGE),
+ show_id, first_page.get('name'), first_page.get('description'))
diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py
index 716190220..035747c31 100644
--- a/yt_dlp/extractor/storyfire.py
+++ b/yt_dlp/extractor/storyfire.py
@@ -44,7 +44,7 @@ class StoryFireBaseIE(InfoExtractor):
'timestamp': int_or_none(video.get('publishDate')),
'uploader': video.get('username'),
'uploader_id': uploader_id,
- 'uploader_url': format_field(uploader_id, template='https://storyfire.com/user/%s/video'),
+ 'uploader_url': format_field(uploader_id, None, 'https://storyfire.com/user/%s/video'),
'episode_number': int_or_none(video.get('episodeNumber') or video.get('episode_number')),
}
diff --git a/yt_dlp/extractor/streamcz.py b/yt_dlp/extractor/streamcz.py
index 85fc3a3c3..849a9882d 100644
--- a/yt_dlp/extractor/streamcz.py
+++ b/yt_dlp/extractor/streamcz.py
@@ -52,8 +52,8 @@ class StreamCZIE(InfoExtractor):
def _extract_formats(self, spl_url, video):
for ext, pref, streams in (
- ('ts', -1, traverse_obj(video, ('http_stream', 'qualities'))),
- ('mp4', 1, video.get('mp4'))):
+ ('ts', -1, traverse_obj(video, ('http_stream', 'qualities')) or {}),
+ ('mp4', 1, video.get('mp4') or {})):
for format_id, stream in streams.items():
if not stream.get('url'):
continue
diff --git a/yt_dlp/extractor/stv.py b/yt_dlp/extractor/stv.py
index 618dc4329..c879fb52e 100644
--- a/yt_dlp/extractor/stv.py
+++ b/yt_dlp/extractor/stv.py
@@ -1,6 +1,6 @@
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
- compat_str,
float_or_none,
int_or_none,
smuggle_url,
diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py
new file mode 100644
index 000000000..70cf10515
--- /dev/null
+++ b/yt_dlp/extractor/substack.py
@@ -0,0 +1,100 @@
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import str_or_none, traverse_obj
+
+
+class SubstackIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<username>[\w-]+)\.substack\.com/p/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://haleynahman.substack.com/p/i-made-a-vlog?s=r',
+ 'md5': 'f27e4fc6252001d48d479f45e65cdfd5',
+ 'info_dict': {
+ 'id': '47660949',
+ 'ext': 'mp4',
+ 'title': 'I MADE A VLOG',
+ 'description': 'md5:10c01ff93439a62e70ce963b2aa0b7f6',
+ 'thumbnail': 'md5:bec758a34d8ee9142d43bcebdf33af18',
+ 'uploader': 'Maybe Baby',
+ 'uploader_id': '33628',
+ }
+ }, {
+ 'url': 'https://haleynahman.substack.com/p/-dear-danny-i-found-my-boyfriends?s=r',
+ 'md5': '0a63eacec877a1171a62cfa69710fcea',
+ 'info_dict': {
+ 'id': '51045592',
+ 'ext': 'mpga',
+ 'title': "🎧 Dear Danny: I found my boyfriend's secret Twitter account",
+ 'description': 'md5:a57f2439319e56e0af92dd0c95d75797',
+ 'thumbnail': 'md5:daa40b6b79249417c14ff8103db29639',
+ 'uploader': 'Maybe Baby',
+ 'uploader_id': '33628',
+ }
+ }, {
+ 'url': 'https://andrewzimmern.substack.com/p/mussels-with-black-bean-sauce-recipe',
+ 'md5': 'fd3c07077b02444ff0130715b5f632bb',
+ 'info_dict': {
+ 'id': '47368578',
+ 'ext': 'mp4',
+ 'title': 'Mussels with Black Bean Sauce: Recipe of the Week #7',
+ 'description': 'md5:b96234a2906c7d854d5229818d889515',
+ 'thumbnail': 'md5:e30bfaa9da40e82aa62354263a9dd232',
+ 'uploader': "Andrew Zimmern's Spilled Milk ",
+ 'uploader_id': '577659',
+ }
+ }]
+
+ @classmethod
+ def _extract_url(cls, webpage, url):
+ if not re.search(r'<script[^>]+src=["\']https://substackcdn.com/[^"\']+\.js', webpage):
+ return
+
+ mobj = re.search(r'{[^}]*["\']subdomain["\']\s*:\s*["\'](?P<subdomain>[^"]+)', webpage)
+ if mobj:
+ parsed = urllib.parse.urlparse(url)
+ return parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl()
+
+ def _extract_video_formats(self, video_id, username):
+ formats, subtitles = [], {}
+ for video_format in ('hls', 'mp4'):
+ video_url = f'https://{username}.substack.com/api/v1/video/upload/{video_id}/src?type={video_format}'
+
+ if video_format == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({
+ 'url': video_url,
+ 'ext': video_format,
+ })
+
+ return formats, subtitles
+
+ def _real_extract(self, url):
+ display_id, username = self._match_valid_url(url).group('id', 'username')
+ webpage = self._download_webpage(url, display_id)
+
+ webpage_info = self._search_json(r'<script[^>]*>\s*window\._preloads\s*=', webpage, 'preloads', display_id)
+
+ post_type = webpage_info['post']['type']
+ formats, subtitles = [], {}
+ if post_type == 'podcast':
+ formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {}
+ elif post_type == 'video':
+ formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], username)
+ else:
+ self.raise_no_formats(f'Page type "{post_type}" is not supported')
+
+ self._sort_formats(formats)
+ return {
+ 'id': str(webpage_info['post']['id']),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': traverse_obj(webpage_info, ('post', 'title')),
+ 'description': traverse_obj(webpage_info, ('post', 'description')),
+ 'thumbnail': traverse_obj(webpage_info, ('post', 'cover_image')),
+ 'uploader': traverse_obj(webpage_info, ('pub', 'name')),
+ 'uploader_id': str_or_none(traverse_obj(webpage_info, ('post', 'publication_id'))),
+ }
diff --git a/yt_dlp/extractor/tennistv.py b/yt_dlp/extractor/tennistv.py
index 80acaf190..3bd7ce3c4 100644
--- a/yt_dlp/extractor/tennistv.py
+++ b/yt_dlp/extractor/tennistv.py
@@ -1,16 +1,17 @@
-import json
+import urllib.parse
from .common import InfoExtractor
-
from ..utils import (
ExtractorError,
+ random_uuidv4,
unified_timestamp,
+ urlencode_postdata,
)
class TennisTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tennistv\.com/videos/(?P<id>[-a-z0-9]+)'
- _TEST = {
+ _TESTS = [{
'url': 'https://www.tennistv.com/videos/indian-wells-2018-verdasco-fritz',
'info_dict': {
'id': 'indian-wells-2018-verdasco-fritz',
@@ -25,86 +26,132 @@ class TennisTVIE(InfoExtractor):
'skip_download': True,
},
'skip': 'Requires email and password of a subscribed account',
- }
+ }, {
+ 'url': 'https://www.tennistv.com/videos/2650480/best-matches-of-2022-part-5',
+ 'info_dict': {
+ 'id': '2650480',
+ 'ext': 'mp4',
+ 'title': 'Best Matches of 2022 - Part 5',
+ 'description': 'md5:36dec3bfae7ed74bd79e48045b17264c',
+ 'thumbnail': 'https://open.http.mp.streamamg.com/p/3001482/sp/300148200/thumbnail/entry_id/0_myef18pd/version/100001/height/1920',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ 'skip': 'Requires email and password of a subscribed account',
+ }]
_NETRC_MACHINE = 'tennistv'
- _session_token = None
-
- def _perform_login(self, username, password):
- login_form = {
- 'Email': username,
- 'Password': password,
- }
- login_json = json.dumps(login_form).encode('utf-8')
- headers = {
- 'content-type': 'application/json',
- 'Referer': 'https://www.tennistv.com/login',
- 'Origin': 'https://www.tennistv.com',
- }
-
- login_result = self._download_json(
- 'https://www.tennistv.com/api/users/v1/login', None,
- note='Logging in',
- errnote='Login failed (wrong password?)',
- headers=headers,
- data=login_json)
+ access_token, refresh_token = None, None
+ _PARTNER_ID = 3001482
+ _FORMAT_URL = 'https://open.http.mp.streamamg.com/p/{partner}/sp/{partner}00/playManifest/entryId/{entry}/format/applehttp/protocol/https/a.m3u8?ks={session}'
+ _AUTH_BASE_URL = 'https://sso.tennistv.com/auth/realms/TennisTV/protocol/openid-connect'
+ _HEADERS = {
+ 'origin': 'https://www.tennistv.com',
+ 'referer': 'https://www.tennistv.com/',
+ 'content-Type': 'application/x-www-form-urlencoded'
+ }
- if login_result['error']['errorCode']:
- raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, login_result['error']['errorMessage']))
+ def _perform_login(self, username, password):
+ login_page = self._download_webpage(
+ f'{self._AUTH_BASE_URL}/auth', None, 'Downloading login page',
+ query={
+ 'client_id': 'tennis-tv-web',
+ 'redirect_uri': 'https://tennistv.com',
+ 'response_mode': 'fragment',
+ 'response_type': 'code',
+ 'scope': 'openid'
+ })
+
+ post_url = self._html_search_regex(r'action=["\']([^"\']+?)["\']\s+method=["\']post["\']', login_page, 'login POST url')
+ temp_page = self._download_webpage(
+ post_url, None, 'Sending login data', 'Unable to send login data',
+ headers=self._HEADERS, data=urlencode_postdata({
+ 'username': username,
+ 'password': password,
+ 'submitAction': 'Log In'
+ }))
+ if 'Your username or password was incorrect' in temp_page:
+ raise ExtractorError('Your username or password was incorrect', expected=True)
+
+ handle = self._request_webpage(
+ f'{self._AUTH_BASE_URL}/auth', None, 'Logging in', headers=self._HEADERS,
+ query={
+ 'client_id': 'tennis-tv-web',
+ 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html',
+ 'state': random_uuidv4(),
+ 'response_mode': 'fragment',
+ 'response_type': 'code',
+ 'scope': 'openid',
+ 'nonce': random_uuidv4(),
+ 'prompt': 'none'
+ })
+
+ self.get_token(None, {
+ 'code': urllib.parse.parse_qs(handle.geturl())['code'][-1],
+ 'grant_type': 'authorization_code',
+ 'client_id': 'tennis-tv-web',
+ 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html'
+ })
+
+ def get_token(self, video_id, payload):
+ res = self._download_json(
+ f'{self._AUTH_BASE_URL}/token', video_id, 'Fetching tokens',
+ 'Unable to fetch tokens', headers=self._HEADERS, data=urlencode_postdata(payload))
+
+ self.access_token = res.get('access_token') or self.access_token
+ self.refresh_token = res.get('refresh_token') or self.refresh_token
- if login_result['entitlement'] != 'SUBSCRIBED':
- self.report_warning('%s may not be subscribed to %s.' % (username, self.IE_NAME))
+ def _real_initialize(self):
+ if self.access_token and self.refresh_token:
+ return
- self._session_token = login_result['sessionToken']
+ cookies = self._get_cookies('https://www.tennistv.com/')
+ if not cookies.get('access_token') or not cookies.get('refresh_token'):
+ self.raise_login_required()
+ self.access_token, self.refresh_token = cookies['access_token'].value, cookies['refresh_token'].value
- def _real_initialize(self):
- if not self._session_token:
- raise self.raise_login_required('Login info is needed for this website', method='password')
+ def _download_session_json(self, video_id, entryid,):
+ return self._download_json(
+ f'https://atppayments.streamamg.com/api/v1/session/ksession/?lang=en&apijwttoken={self.access_token}&entryId={entryid}',
+ video_id, 'Downloading ksession token', 'Failed to download ksession token', headers=self._HEADERS)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- internal_id = self._search_regex(r'video=([\w-]+)', webpage, 'internal video id')
+ entryid = self._search_regex(r'data-entry-id=["\']([^"\']+)', webpage, 'entryID')
+ session_json = self._download_session_json(video_id, entryid)
- headers = {
- 'Origin': 'https://www.tennistv.com',
- 'authorization': 'ATP %s' % self._session_token,
- 'content-type': 'application/json',
- 'Referer': url,
- }
- check_data = {
- 'videoID': internal_id,
- 'VideoUrlType': 'HLS',
- }
- check_json = json.dumps(check_data).encode('utf-8')
- check_result = self._download_json(
- 'https://www.tennistv.com/api/users/v1/entitlementchecknondiva',
- video_id, note='Checking video authorization', headers=headers, data=check_json)
- formats = self._extract_m3u8_formats(check_result['contentUrl'], video_id, ext='mp4')
- self._sort_formats(formats)
+ k_session = session_json.get('KSession')
+ if k_session is None:
+ self.get_token(video_id, {
+ 'grant_type': 'refresh_token',
+ 'refresh_token': self.refresh_token,
+ 'client_id': 'tennis-tv-web'
+ })
+ k_session = self._download_session_json(video_id, entryid).get('KSession')
+ if k_session is None:
+ raise ExtractorError('Failed to get KSession, possibly a premium video', expected=True)
- vdata = self._download_json(
- 'https://www.tennistv.com/api/en/v2/none/common/video/%s' % video_id,
- video_id, headers=headers)
+ if session_json.get('ErrorMessage'):
+ self.report_warning(session_json['ErrorMessage'])
- timestamp = unified_timestamp(vdata['timestamp'])
- thumbnail = vdata['video']['thumbnailUrl']
- description = vdata['displayText']['description']
- title = vdata['video']['title']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ self._FORMAT_URL.format(partner=self._PARTNER_ID, entry=entryid, session=k_session), video_id)
- series = vdata['tour']
- venue = vdata['displayText']['venue']
- round_str = vdata['seo']['round']
+ self._sort_formats(formats)
return {
'id': video_id,
- 'title': title,
- 'description': description,
+ 'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
+ 'description': self._html_search_regex(
+ (r'<span itemprop="description" content=["\']([^"\']+)["\']>', *self._og_regexes('description')),
+ webpage, 'description', fatal=False),
+ 'thumbnail': f'https://open.http.mp.streamamg.com/p/{self._PARTNER_ID}/sp/{self._PARTNER_ID}00/thumbnail/entry_id/{entryid}/version/100001/height/1920',
+ 'timestamp': unified_timestamp(self._html_search_regex(
+ r'<span itemprop="description" content=["\']([^"\']+)["\']>', webpage, 'upload time')),
+ 'series': self._html_search_regex(r'data-series\s*?=\s*?"(.*?)"', webpage, 'series', fatal=False) or None,
+ 'season': self._html_search_regex(r'data-tournament-city\s*?=\s*?"(.*?)"', webpage, 'season', fatal=False) or None,
+ 'episode': self._html_search_regex(r'data-round\s*?=\s*?"(.*?)"', webpage, 'round', fatal=False) or None,
'formats': formats,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'series': series,
- 'season': venue,
- 'episode': round_str,
+ 'subtitles': subtitles,
}
diff --git a/yt_dlp/extractor/testurl.py b/yt_dlp/extractor/testurl.py
index 32cae429e..d205fe053 100644
--- a/yt_dlp/extractor/testurl.py
+++ b/yt_dlp/extractor/testurl.py
@@ -11,7 +11,7 @@ class TestURLIE(InfoExtractor):
_VALID_URL = r'test(?:url)?:(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?$'
def _real_extract(self, url):
- from ..extractor import gen_extractor_classes
+ from . import gen_extractor_classes
extractor_id, num = self._match_valid_url(url).group('extractor', 'num')
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index 4ba993582..680358d5e 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -1,28 +1,27 @@
import itertools
+import json
import random
+import re
import string
import time
-import json
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlparse
-)
+from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
from ..utils import (
ExtractorError,
HEADRequest,
+ LazyList,
UnsupportedError,
+ get_element_by_id,
get_first,
int_or_none,
join_nonempty,
- LazyList,
+ qualities,
srt_subtitles_timecode,
str_or_none,
traverse_obj,
try_get,
url_or_none,
- qualities,
)
@@ -35,6 +34,21 @@ class TikTokBaseIE(InfoExtractor):
_UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s'
_WEBPAGE_HOST = 'https://www.tiktok.com/'
QUALITIES = ('360p', '540p', '720p', '1080p')
+ _session_initialized = False
+
+ @staticmethod
+ def _create_url(user_id, video_id):
+ return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
+
+ def _get_sigi_state(self, webpage, display_id):
+ return self._parse_json(get_element_by_id(
+ 'SIGI_STATE|sigi-persisted-data', webpage, escape_value=False), display_id)
+
+ def _real_initialize(self):
+ if self._session_initialized:
+ return
+ self._request_webpage(HEADRequest('https://www.tiktok.com'), None, note='Setting up session', fatal=False)
+ TikTokBaseIE._session_initialized = True
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'):
@@ -261,6 +275,9 @@ class TikTokBaseIE(InfoExtractor):
return {
'id': aweme_id,
+ 'extractor_key': TikTokIE.ie_key(),
+ 'extractor': TikTokIE.IE_NAME,
+ 'webpage_url': self._create_url(author_info.get('uid'), aweme_id),
'title': aweme_detail.get('desc'),
'description': aweme_detail.get('desc'),
'view_count': int_or_none(stats_info.get('play_count')),
@@ -361,7 +378,7 @@ class TikTokBaseIE(InfoExtractor):
class TikTokIE(TikTokBaseIE):
- _VALID_URL = r'https?://www\.tiktok\.com/@[\w\.-]+/video/(?P<id>\d+)'
+ _VALID_URL = r'https?://www\.tiktok\.com/(?:embed|@(?P<user_id>[\w\.-]+)/video)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
@@ -459,14 +476,14 @@ class TikTokIE(TikTokBaseIE):
'repost_count': int,
'comment_count': int,
},
- 'expected_warnings': ['Video not available']
+ 'expected_warnings': ['trying with webpage', 'Unable to find video in feed']
}, {
# Video without title and description
'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694',
'info_dict': {
'id': '7059698374567611694',
'ext': 'mp4',
- 'title': 'tiktok video #7059698374567611694',
+ 'title': 'TikTok video #7059698374567611694',
'description': '',
'uploader': 'pokemonlife22',
'creator': 'Pokemon',
@@ -483,13 +500,40 @@ class TikTokIE(TikTokBaseIE):
'repost_count': int,
'comment_count': int,
},
- 'expected_warnings': ['Video not available', 'Creating a generic title']
+ }, {
+ # hydration JSON is sent in a <script> element
+ 'url': 'https://www.tiktok.com/@denidil6/video/7065799023130643713',
+ 'info_dict': {
+ 'id': '7065799023130643713',
+ 'ext': 'mp4',
+ 'title': '#denidil#денидил',
+ 'description': '#denidil#денидил',
+ 'uploader': 'denidil6',
+ 'uploader_id': '7046664115636405250',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAsvMSzFdQ4ikl3uR2TEJwMBbB2yZh2Zxwhx-WCo3rbDpAharE3GQCrFuJArI3C8QJ',
+ 'artist': 'Holocron Music',
+ 'album': 'Wolf Sounds (1 Hour) Enjoy the Company of the Animal That Is the Majestic King of the Night',
+ 'track': 'Wolf Sounds (1 Hour) Enjoy the Company of the Animal That Is the Majestic King of the Night',
+ 'timestamp': 1645134536,
+ 'duration': 26,
+ 'upload_date': '20220217',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ 'expected_warnings': ['trying feed workaround', 'Unable to find video in feed']
}, {
# Auto-captions available
'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',
'only_matching': True
}]
+ @classmethod
+ def _extract_urls(cls, webpage):
+ return [mobj.group('url') for mobj in re.finditer(
+ rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{cls._VALID_URL})', webpage)]
+
def _extract_aweme_app(self, aweme_id):
try:
aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id,
@@ -497,7 +541,7 @@ class TikTokIE(TikTokBaseIE):
if not aweme_detail:
raise ExtractorError('Video not available', video_id=aweme_id)
except ExtractorError as e:
- self.report_warning(f'{e}; Retrying with feed workaround')
+ self.report_warning(f'{e.orig_msg}; trying feed workaround')
feed_list = self._call_api('feed', {'aweme_id': aweme_id}, aweme_id,
note='Downloading video feed', errnote='Unable to download video feed').get('aweme_list') or []
aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None)
@@ -506,26 +550,20 @@ class TikTokIE(TikTokBaseIE):
return self._parse_aweme_video_app(aweme_detail)
def _real_extract(self, url):
- video_id = self._match_id(url)
-
+ video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
try:
return self._extract_aweme_app(video_id)
except ExtractorError as e:
- self.report_warning(f'{e}; Retrying with webpage')
+ self.report_warning(f'{e}; trying with webpage')
- # If we only call once, we get a 403 when downlaoding the video.
- self._download_webpage(url, video_id)
- webpage = self._download_webpage(url, video_id, note='Downloading video webpage')
+ url = self._create_url(user_id, video_id)
+ webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'User-Agent:Mozilla/5.0'})
next_data = self._search_nextjs_data(webpage, video_id, default='{}')
-
if next_data:
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode'), expected_type=int) or 0
video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct'), expected_type=dict)
else:
- sigi_json = self._search_regex(
- r'>\s*window\[[\'"]SIGI_STATE[\'"]\]\s*=\s*(?P<sigi_state>{.+});',
- webpage, 'sigi data', group='sigi_state')
- sigi_data = self._parse_json(sigi_json, video_id)
+ sigi_data = self._get_sigi_state(webpage, video_id)
status = traverse_obj(sigi_data, ('VideoPage', 'statusCode'), expected_type=int) or 0
video_data = traverse_obj(sigi_data, ('ItemModule', video_id), expected_type=dict)
@@ -841,7 +879,7 @@ class DouyinIE(TikTokIE):
try:
return self._extract_aweme_app(video_id)
except ExtractorError as e:
- self.report_warning(f'{e}; Retrying with webpage')
+ self.report_warning(f'{e}; trying with webpage')
webpage = self._download_webpage(url, video_id)
render_data_json = self._search_regex(
diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py
index c049025a3..d43411928 100644
--- a/yt_dlp/extractor/trovo.py
+++ b/yt_dlp/extractor/trovo.py
@@ -38,7 +38,7 @@ class TrovoBaseIE(InfoExtractor):
return {
'uploader': streamer_info.get('nickName'),
'uploader_id': str_or_none(streamer_info.get('uid')),
- 'uploader_url': format_field(username, template='https://trovo.live/%s'),
+ 'uploader_url': format_field(username, None, 'https://trovo.live/%s'),
}
diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py
index b04575bd5..cebd027c8 100644
--- a/yt_dlp/extractor/tver.py
+++ b/yt_dlp/extractor/tver.py
@@ -54,9 +54,24 @@ class TVerIE(InfoExtractor):
video_id = self._match_id(self._search_regex(
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
webpage, 'url regex'))
+
+ episode_info = self._download_json(
+ f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
+ video_id, fatal=False,
+ query={
+ 'platform_uid': self._PLATFORM_UID,
+ 'platform_token': self._PLATFORM_TOKEN,
+ }, headers={
+ 'x-tver-platform-type': 'web'
+ })
+ episode_content = traverse_obj(
+ episode_info, ('result', 'episode', 'content')) or {}
+
video_info = self._download_json(
f'https://statics.tver.jp/content/episode/{video_id}.json', video_id,
- query={'v': '5'}, headers={
+ query={
+ 'v': str_or_none(episode_content.get('version')) or '5',
+ }, headers={
'Origin': 'https://tver.jp',
'Referer': 'https://tver.jp/',
})
@@ -67,25 +82,13 @@ class TVerIE(InfoExtractor):
if not r_id.isdigit():
r_id = f'ref:{r_id}'
- additional_info = self._download_json(
- f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
- video_id, fatal=False,
- query={
- 'platform_uid': self._PLATFORM_UID,
- 'platform_token': self._PLATFORM_TOKEN,
- }, headers={
- 'x-tver-platform-type': 'web'
- })
-
- additional_content_info = traverse_obj(
- additional_info, ('result', 'episode', 'content'), get_all=False) or {}
- episode = strip_or_none(additional_content_info.get('title'))
- series = str_or_none(additional_content_info.get('seriesTitle'))
+ episode = strip_or_none(episode_content.get('title'))
+ series = str_or_none(episode_content.get('seriesTitle'))
title = (
join_nonempty(series, episode, delim=' ')
or str_or_none(video_info.get('title')))
- provider = str_or_none(additional_content_info.get('productionProviderName'))
- onair_label = str_or_none(additional_content_info.get('broadcastDateLabel'))
+ provider = str_or_none(episode_content.get('productionProviderName'))
+ onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
return {
'_type': 'url_transparent',
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index af6750333..d516aafa2 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -470,7 +470,7 @@ class TwitterIE(TwitterBaseIE):
'uploader': uploader,
'timestamp': unified_timestamp(status.get('created_at')),
'uploader_id': uploader_id,
- 'uploader_url': format_field(uploader_id, template='https://twitter.com/%s'),
+ 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
'like_count': int_or_none(status.get('favorite_count')),
'repost_count': int_or_none(status.get('retweet_count')),
'comment_count': int_or_none(status.get('reply_count')),
diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py
index d35cd0d43..1dc2dbdc4 100644
--- a/yt_dlp/extractor/udemy.py
+++ b/yt_dlp/extractor/udemy.py
@@ -1,16 +1,12 @@
import re
+import urllib.request
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
- compat_urllib_request,
- compat_urlparse,
-)
+from ..compat import compat_HTTPError, compat_str, compat_urlparse
from ..utils import (
+ ExtractorError,
determine_ext,
extract_attributes,
- ExtractorError,
float_or_none,
int_or_none,
js_to_json,
@@ -148,14 +144,14 @@ class UdemyIE(InfoExtractor):
'X-Udemy-Snail-Case': 'true',
'X-Requested-With': 'XMLHttpRequest',
}
- for cookie in self._downloader.cookiejar:
+ for cookie in self.cookiejar:
if cookie.name == 'client_id':
headers['X-Udemy-Client-Id'] = cookie.value
elif cookie.name == 'access_token':
headers['X-Udemy-Bearer-Token'] = cookie.value
headers['X-Udemy-Authorization'] = 'Bearer %s' % cookie.value
- if isinstance(url_or_request, compat_urllib_request.Request):
+ if isinstance(url_or_request, urllib.request.Request):
for header, value in headers.items():
url_or_request.add_header(header, value)
else:
diff --git a/yt_dlp/extractor/urort.py b/yt_dlp/extractor/urort.py
index 296799d38..3f687f737 100644
--- a/yt_dlp/extractor/urort.py
+++ b/yt_dlp/extractor/urort.py
@@ -1,10 +1,7 @@
+import urllib.parse
+
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
-)
-from ..utils import (
- unified_strdate,
-)
+from ..utils import unified_strdate
class UrortIE(InfoExtractor):
@@ -31,7 +28,7 @@ class UrortIE(InfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
- fstr = compat_urllib_parse.quote("InternalBandUrl eq '%s'" % playlist_id)
+ fstr = urllib.parse.quote("InternalBandUrl eq '%s'" % playlist_id)
json_url = 'http://urort.p3.no/breeze/urort/TrackDTOViews?$filter=%s&$orderby=Released%%20desc&$expand=Tags%%2CFiles' % fstr
songs = self._download_json(json_url, playlist_id)
entries = []
diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py
index bc0187511..825089f47 100644
--- a/yt_dlp/extractor/vevo.py
+++ b/yt_dlp/extractor/vevo.py
@@ -33,10 +33,124 @@ class VevoIE(VevoBaseIE):
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
https?://embed\.vevo\.com/.*?[?&]isrc=|
+ https?://tv\.vevo\.com/watch/artist/(?:[^/]+)/|
vevo:)
(?P<id>[^&?#]+)'''
- _TESTS = []
+ _TESTS = [{
+ 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
+ 'md5': '95ee28ee45e70130e3ab02b0f579ae23',
+ 'info_dict': {
+ 'id': 'GB1101300280',
+ 'ext': 'mp4',
+ 'title': 'Hurts - Somebody to Die For',
+ 'timestamp': 1372057200,
+ 'upload_date': '20130624',
+ 'uploader': 'Hurts',
+ 'track': 'Somebody to Die For',
+ 'artist': 'Hurts',
+ 'genre': 'Pop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'v3 SMIL format',
+ 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
+ 'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
+ 'info_dict': {
+ 'id': 'USUV71302923',
+ 'ext': 'mp4',
+ 'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
+ 'timestamp': 1392796919,
+ 'upload_date': '20140219',
+ 'uploader': 'Cassadee Pope',
+ 'track': 'I Wish I Could Break Your Heart',
+ 'artist': 'Cassadee Pope',
+ 'genre': 'Country',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Age-limited video',
+ 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
+ 'info_dict': {
+ 'id': 'USRV81300282',
+ 'ext': 'mp4',
+ 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
+ 'age_limit': 18,
+ 'timestamp': 1372888800,
+ 'upload_date': '20130703',
+ 'uploader': 'Justin Timberlake',
+ 'track': 'Tunnel Vision (Explicit)',
+ 'artist': 'Justin Timberlake',
+ 'genre': 'Pop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'No video_info',
+ 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
+ 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0',
+ 'info_dict': {
+ 'id': 'USUV71503000',
+ 'ext': 'mp4',
+ 'title': 'K Camp ft. T.I. - Till I Die',
+ 'age_limit': 18,
+ 'timestamp': 1449468000,
+ 'upload_date': '20151207',
+ 'uploader': 'K Camp',
+ 'track': 'Till I Die',
+ 'artist': 'K Camp',
+ 'genre': 'Hip-Hop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Featured test',
+ 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190',
+ 'md5': 'd28675e5e8805035d949dc5cf161071d',
+ 'info_dict': {
+ 'id': 'USUV71402190',
+ 'ext': 'mp4',
+ 'title': 'Lemaitre ft. LoLo - Wait',
+ 'age_limit': 0,
+ 'timestamp': 1413432000,
+ 'upload_date': '20141016',
+ 'uploader': 'Lemaitre',
+ 'track': 'Wait',
+ 'artist': 'Lemaitre',
+ 'genre': 'Electronic',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Only available via webpage',
+ 'url': 'http://www.vevo.com/watch/GBUV71600656',
+ 'md5': '67e79210613865b66a47c33baa5e37fe',
+ 'info_dict': {
+ 'id': 'GBUV71600656',
+ 'ext': 'mp4',
+ 'title': 'ABC - Viva Love',
+ 'age_limit': 0,
+ 'timestamp': 1461830400,
+ 'upload_date': '20160428',
+ 'uploader': 'ABC',
+ 'track': 'Viva Love',
+ 'artist': 'ABC',
+ 'genre': 'Pop',
+ },
+ 'expected_warnings': ['Failed to download video versions info'],
+ }, {
+ # no genres available
+ 'url': 'http://www.vevo.com/watch/INS171400764',
+ 'only_matching': True,
+ }, {
+ # Another case available only via the webpage; using streams/streamsV3 formats
+ # Geo-restricted to Netherlands/Germany
+ 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.vevo.com/watch/artist/janet-jackson/US0450100550',
+ 'only_matching': True,
+ }]
_VERSIONS = {
0: 'youtube', # only in AuthenticateVideo videoVersions
1: 'level3',
@@ -138,6 +252,7 @@ class VevoIE(VevoBaseIE):
fatal=False))
else:
m = re.search(r'''(?xi)
+ _(?P<quality>[a-z0-9]+)
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
_(?P<vcodec>[a-z0-9]+)
_(?P<vbr>[0-9]+)
@@ -149,7 +264,7 @@ class VevoIE(VevoBaseIE):
formats.append({
'url': version_url,
- 'format_id': 'http-%s-%s' % (version, video_version['quality']),
+ 'format_id': f'http-{version}-{video_version.get("quality") or m.group("quality")}',
'vcodec': m.group('vcodec'),
'acodec': m.group('acodec'),
'vbr': int(m.group('vbr')),
diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py
index 251eb78fe..9b05c86a5 100644
--- a/yt_dlp/extractor/videa.py
+++ b/yt_dlp/extractor/videa.py
@@ -1,8 +1,10 @@
import random
import re
import string
+import struct
from .common import InfoExtractor
+from ..compat import compat_b64decode, compat_ord
from ..utils import (
ExtractorError,
int_or_none,
@@ -14,11 +16,6 @@ from ..utils import (
xpath_element,
xpath_text,
)
-from ..compat import (
- compat_b64decode,
- compat_ord,
- compat_struct_pack,
-)
class VideaIE(InfoExtractor):
@@ -102,7 +99,7 @@ class VideaIE(InfoExtractor):
j = (j + S[i]) % 256
S[i], S[j] = S[j], S[i]
k = S[(S[i] + S[j]) % 256]
- res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m]))
+ res += struct.pack('B', k ^ compat_ord(cipher_text[m]))
return res.decode()
diff --git a/yt_dlp/extractor/videocampus_sachsen.py b/yt_dlp/extractor/videocampus_sachsen.py
index 906412f08..679574bd7 100644
--- a/yt_dlp/extractor/videocampus_sachsen.py
+++ b/yt_dlp/extractor/videocampus_sachsen.py
@@ -6,14 +6,18 @@ from ..utils import ExtractorError
class VideocampusSachsenIE(InfoExtractor):
- IE_NAME = 'Vimp'
+ IE_NAME = 'ViMP'
_INSTANCES = (
+ 'bergauf.tv',
'campus.demo.vimp.com',
'corporate.demo.vimp.com',
'dancehalldatabase.com',
+ 'drehzahl.tv',
'educhannel.hs-gesundheit.de',
'emedia.ls.haw-hamburg.de',
'globale-evolution.net',
+ 'hohu.tv',
+ 'htvideos.hightechhigh.org',
'k210039.vimp.mivitec.net',
'media.cmslegal.com',
'media.hs-furtwangen.de',
@@ -25,6 +29,7 @@ class VideocampusSachsenIE(InfoExtractor):
'mportal.europa-uni.de',
'pacific.demo.vimp.com',
'slctv.com',
+ 'streaming.prairiesouth.ca',
'tube.isbonline.cn',
'univideo.uni-kassel.de',
'ursula2.genetics.emory.edu',
@@ -52,11 +57,15 @@ class VideocampusSachsenIE(InfoExtractor):
'vimp.weka-fachmedien.de',
'webtv.univ-montp3.fr',
'www.b-tu.de/media',
+ 'www.bergauf.tv',
'www.bigcitytv.de',
'www.cad-videos.de',
+ 'www.drehzahl.tv',
'www.fh-bielefeld.de/medienportal',
+ 'www.hohu.tv',
'www.orvovideo.com',
'www.rwe.tv',
+ 'www.salzi.tv',
'www.wenglor-media.com',
'www2.univ-sba.dz',
)
@@ -73,6 +82,7 @@ class VideocampusSachsenIE(InfoExtractor):
'id': 'e6b9349905c1628631f175712250f2a1',
'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
+ 'thumbnail': 'https://videocampus.sachsen.de/cache/1a985379ad3aecba8097a6902c7daa4e.jpg',
'ext': 'mp4',
},
},
@@ -82,6 +92,7 @@ class VideocampusSachsenIE(InfoExtractor):
'id': 'fc99c527e4205b121cb7c74433469262',
'title': 'Was ist selbstgesteuertes Lernen?',
'description': 'md5:196aa3b0509a526db62f84679522a2f5',
+ 'thumbnail': 'https://videocampus.sachsen.de/cache/6f4a85096ba24cb398e6ce54446b57ae.jpg',
'display_id': 'Was-ist-selbstgesteuertes-Lernen',
'ext': 'mp4',
},
@@ -92,6 +103,7 @@ class VideocampusSachsenIE(InfoExtractor):
'id': '09d4ed029002eb1bdda610f1103dd54c',
'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht',
'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58',
+ 'thumbnail': 'https://videocampus.sachsen.de/cache/2452498fe8c2d5a7dc79a05d30f407b6.jpg',
'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht',
'ext': 'mp4',
},
@@ -103,6 +115,7 @@ class VideocampusSachsenIE(InfoExtractor):
'id': '0183356e41af7bfb83d7667b20d9b6a3',
'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22',
'description': 'md5:508958bd93e0ca002ac731d94182a54f',
+ 'thumbnail': 'https://www2.univ-sba.dz/cache/4d5d4a0b4189271a8cc6cb5328e14769.jpg',
'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122',
'ext': 'mp4',
}
@@ -113,6 +126,7 @@ class VideocampusSachsenIE(InfoExtractor):
'id': 'c8816f1cc942c12b6cce57c835cffd7c',
'title': 'Preisverleihung »Produkte des Jahres 2022«',
'description': 'md5:60c347568ca89aa25b772c4ea564ebd3',
+ 'thumbnail': 'https://vimp.weka-fachmedien.de/cache/da9f3090e9227b25beacf67ccf94de14.png',
'display_id': 'Preisverleihung-Produkte-des-Jahres-2022',
'ext': 'mp4',
},
@@ -124,7 +138,7 @@ class VideocampusSachsenIE(InfoExtractor):
'title': 'Was ist selbstgesteuertes Lernen?',
'ext': 'mp4',
},
- }
+ },
]
def _real_extract(self, url):
@@ -139,12 +153,14 @@ class VideocampusSachsenIE(InfoExtractor):
if not (display_id or tmp_id):
# Title, description from embedded page's meta wouldn't be correct
- title = self._html_search_regex(r'<img[^>]* title="([^"<]+)"', webpage, 'title', fatal=False)
+ title = self._html_search_regex(r'<video-js[^>]* data-piwik-title="([^"<]+)"', webpage, 'title', fatal=False)
description = None
+ thumbnail = None
else:
title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False)
description = self._html_search_meta(
- ('og:description', 'twitter:description', 'description'), webpage, default=None)
+ ('og:description', 'twitter:description', 'description'), webpage, fatal=False)
+ thumbnail = self._html_search_meta(('og:image', 'twitter:image'), webpage, fatal=False)
formats, subtitles = [], {}
try:
@@ -162,7 +178,8 @@ class VideocampusSachsenIE(InfoExtractor):
'id': video_id,
'title': title,
'description': description,
+ 'thumbnail': thumbnail,
'display_id': display_id,
'formats': formats,
- 'subtitles': subtitles
+ 'subtitles': subtitles,
}
diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py
index 599996bf9..8092d340e 100644
--- a/yt_dlp/extractor/vidio.py
+++ b/yt_dlp/extractor/vidio.py
@@ -152,7 +152,7 @@ class VidioIE(VidioBaseIE):
'uploader': user.get('name'),
'timestamp': parse_iso8601(video.get('created_at')),
'uploader_id': username,
- 'uploader_url': format_field(username, template='https://www.vidio.com/@%s'),
+ 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
'channel': channel.get('name'),
'channel_id': str_or_none(channel.get('id')),
'view_count': get_count('view_count'),
@@ -283,5 +283,5 @@ class VidioLiveIE(VidioBaseIE):
'uploader': user.get('name'),
'timestamp': parse_iso8601(stream_meta.get('start_time')),
'uploader_id': username,
- 'uploader_url': format_field(username, template='https://www.vidio.com/@%s'),
+ 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
}
diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py
index b9845affd..69a75304e 100644
--- a/yt_dlp/extractor/vidlii.py
+++ b/yt_dlp/extractor/vidlii.py
@@ -100,7 +100,7 @@ class VidLiiIE(InfoExtractor):
uploader = self._search_regex(
r'<div[^>]+class=["\']wt_person[^>]+>\s*<a[^>]+\bhref=["\']/user/[^>]+>([^<]+)',
webpage, 'uploader', fatal=False)
- uploader_url = format_field(uploader, template='https://www.vidlii.com/user/%s')
+ uploader_url = format_field(uploader, None, 'https://www.vidlii.com/user/%s')
upload_date = unified_strdate(self._html_search_meta(
'datePublished', webpage, default=None) or self._search_regex(
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index 59c5353ab..961734345 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -40,6 +40,18 @@ class VimeoBaseInfoExtractor(InfoExtractor):
_LOGIN_REQUIRED = False
_LOGIN_URL = 'https://vimeo.com/log_in'
+ @staticmethod
+ def _smuggle_referrer(url, referrer_url):
+ return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
+
+ def _unsmuggle_headers(self, url):
+ """@returns (url, smuggled_data, headers)"""
+ url, data = unsmuggle_url(url, {})
+ headers = self.get_param('http_headers').copy()
+ if 'http_headers' in data:
+ headers.update(data['http_headers'])
+ return url, data, headers
+
def _perform_login(self, username, password):
webpage = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
@@ -718,10 +730,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
]
@staticmethod
- def _smuggle_referrer(url, referrer_url):
- return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
-
- @staticmethod
def _extract_urls(url, webpage):
urls = []
# Look for embedded (iframe) Vimeo player
@@ -754,8 +762,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
'Content-Type': 'application/x-www-form-urlencoded',
})
checked = self._download_json(
- url + '/check-password', video_id,
- 'Verifying the password', data=data, headers=headers)
+ f'{compat_urlparse.urlsplit(url)._replace(query=None).geturl()}/check-password',
+ video_id, 'Verifying the password', data=data, headers=headers)
if checked is False:
raise ExtractorError('Wrong video password', expected=True)
return checked
@@ -830,10 +838,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
raise
def _real_extract(self, url):
- url, data = unsmuggle_url(url, {})
- headers = self.get_param('http_headers').copy()
- if 'http_headers' in data:
- headers.update(data['http_headers'])
+ url, data, headers = self._unsmuggle_headers(url)
if 'Referer' not in headers:
headers['Referer'] = url
@@ -1383,14 +1388,15 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
_VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
@staticmethod
- def _extract_url(webpage):
+ def _extract_url(url, webpage):
mobj = re.search(
r'<iframe[^>]+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage)
- return unescapeHTML(mobj.group(1)) if mobj else None
+ return VimeoIE._smuggle_referrer(unescapeHTML(mobj.group(1)), url) if mobj else None
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ url, _, headers = self._unsmuggle_headers(url)
+ webpage = self._download_webpage(url, video_id, headers=headers)
config_url = self._parse_json(self._search_regex(
r'window\.OTTData\s*=\s*({.+})', webpage,
'ott data'), video_id, js_to_json)['config_url']
diff --git a/yt_dlp/extractor/vine.py b/yt_dlp/extractor/vine.py
index bbf43a83f..947f5cdb6 100644
--- a/yt_dlp/extractor/vine.py
+++ b/yt_dlp/extractor/vine.py
@@ -89,7 +89,7 @@ class VineIE(InfoExtractor):
username = data.get('username')
- alt_title = format_field(username, template='Vine by %s')
+ alt_title = format_field(username, None, 'Vine by %s')
return {
'id': video_id,
diff --git a/yt_dlp/extractor/voicy.py b/yt_dlp/extractor/voicy.py
index e4570a03a..feab79138 100644
--- a/yt_dlp/extractor/voicy.py
+++ b/yt_dlp/extractor/voicy.py
@@ -1,3 +1,5 @@
+import itertools
+
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -9,8 +11,6 @@ from ..utils import (
unsmuggle_url,
)
-import itertools
-
class VoicyBaseIE(InfoExtractor):
def _extract_from_playlist_data(self, value):
@@ -105,7 +105,7 @@ class VoicyChannelIE(VoicyBaseIE):
@classmethod
def suitable(cls, url):
- return not VoicyIE.suitable(url) and super(VoicyChannelIE, cls).suitable(url)
+ return not VoicyIE.suitable(url) and super().suitable(url)
def _entries(self, channel_id):
pager = ''
diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py
index 35662753e..0b9bf2903 100644
--- a/yt_dlp/extractor/vrv.py
+++ b/yt_dlp/extractor/vrv.py
@@ -1,17 +1,14 @@
import base64
-import json
import hashlib
import hmac
+import json
import random
import string
import time
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_urllib_parse_urlencode,
- compat_urllib_parse,
-)
+from ..compat import compat_HTTPError, compat_urllib_parse_urlencode
from ..utils import (
ExtractorError,
float_or_none,
@@ -46,12 +43,12 @@ class VRVBaseIE(InfoExtractor):
headers['Content-Type'] = 'application/json'
base_string = '&'.join([
'POST' if data else 'GET',
- compat_urllib_parse.quote(base_url, ''),
- compat_urllib_parse.quote(encoded_query, '')])
+ urllib.parse.quote(base_url, ''),
+ urllib.parse.quote(encoded_query, '')])
oauth_signature = base64.b64encode(hmac.new(
(self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'),
base_string.encode(), hashlib.sha1).digest()).decode()
- encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
+ encoded_query += '&oauth_signature=' + urllib.parse.quote(oauth_signature, '')
try:
return self._download_json(
'?'.join([base_url, encoded_query]), video_id,
diff --git a/yt_dlp/extractor/vshare.py b/yt_dlp/extractor/vshare.py
index 8ef75d30e..fd5226bbc 100644
--- a/yt_dlp/extractor/vshare.py
+++ b/yt_dlp/extractor/vshare.py
@@ -1,11 +1,7 @@
import re
from .common import InfoExtractor
-from ..compat import compat_chr
-from ..utils import (
- decode_packed_codes,
- ExtractorError,
-)
+from ..utils import ExtractorError, decode_packed_codes
class VShareIE(InfoExtractor):
@@ -37,7 +33,7 @@ class VShareIE(InfoExtractor):
digits = [int(digit) for digit in digits.split(',')]
key_digit = self._search_regex(
r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
- chars = [compat_chr(d - int(key_digit)) for d in digits]
+ chars = [chr(d - int(key_digit)) for d in digits]
return ''.join(chars)
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py
index 6349e5326..e1062b9b5 100644
--- a/yt_dlp/extractor/wppilot.py
+++ b/yt_dlp/extractor/wppilot.py
@@ -20,7 +20,7 @@ class WPPilotBaseIE(InfoExtractor):
def _get_channel_list(self, cache=True):
if cache is True:
- cache_res = self._downloader.cache.load('wppilot', 'channel-list')
+ cache_res = self.cache.load('wppilot', 'channel-list')
if cache_res:
return cache_res, True
webpage = self._download_webpage('https://pilot.wp.pl/tv/', None, 'Downloading webpage')
@@ -35,7 +35,7 @@ class WPPilotBaseIE(InfoExtractor):
channel_list = try_get(qhash_content, lambda x: x['data']['allChannels']['nodes'])
if channel_list is None:
continue
- self._downloader.cache.store('wppilot', 'channel-list', channel_list)
+ self.cache.store('wppilot', 'channel-list', channel_list)
return channel_list, False
raise ExtractorError('Unable to find the channel list')
@@ -101,7 +101,7 @@ class WPPilotIE(WPPilotBaseIE):
channel = self._get_channel(video_id)
video_id = str(channel['id'])
- is_authorized = next((c for c in self._downloader.cookiejar if c.name == 'netviapisessid'), None)
+ is_authorized = next((c for c in self.cookiejar if c.name == 'netviapisessid'), None)
# cookies starting with "g:" are assigned to guests
is_authorized = True if is_authorized is not None and not is_authorized.value.startswith('g:') else False
diff --git a/yt_dlp/extractor/xfileshare.py b/yt_dlp/extractor/xfileshare.py
index 28b6ecb6e..63abe4a1f 100644
--- a/yt_dlp/extractor/xfileshare.py
+++ b/yt_dlp/extractor/xfileshare.py
@@ -1,11 +1,10 @@
import re
from .common import InfoExtractor
-from ..compat import compat_chr
from ..utils import (
+ ExtractorError,
decode_packed_codes,
determine_ext,
- ExtractorError,
int_or_none,
js_to_json,
urlencode_postdata,
@@ -32,11 +31,11 @@ def aa_decode(aa_code):
aa_char = aa_char.replace('+ ', '')
m = re.match(r'^\d+', aa_char)
if m:
- ret += compat_chr(int(m.group(0), 8))
+ ret += chr(int(m.group(0), 8))
else:
m = re.match(r'^u([\da-f]+)', aa_char)
if m:
- ret += compat_chr(int(m.group(1), 16))
+ ret += chr(int(m.group(1), 16))
return ret
diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py
index ff15d3707..e42eed7d8 100644
--- a/yt_dlp/extractor/xhamster.py
+++ b/yt_dlp/extractor/xhamster.py
@@ -21,7 +21,7 @@ from ..utils import (
class XHamsterIE(InfoExtractor):
- _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)'
+ _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)'
_VALID_URL = r'''(?x)
https?://
(?:.+?\.)?%s/
@@ -32,7 +32,7 @@ class XHamsterIE(InfoExtractor):
''' % _DOMAINS
_TESTS = [{
'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
- 'md5': '98b4687efb1ffd331c4197854dc09e8f',
+ 'md5': '34e1ab926db5dc2750fed9e1f34304bb',
'info_dict': {
'id': '1509445',
'display_id': 'femaleagent-shy-beauty-takes-the-bait',
@@ -41,6 +41,7 @@ class XHamsterIE(InfoExtractor):
'timestamp': 1350194821,
'upload_date': '20121014',
'uploader': 'Ruseful2011',
+ 'uploader_id': 'ruseful2011',
'duration': 893,
'age_limit': 18,
},
@@ -70,6 +71,7 @@ class XHamsterIE(InfoExtractor):
'timestamp': 1454948101,
'upload_date': '20160208',
'uploader': 'parejafree',
+ 'uploader_id': 'parejafree',
'duration': 72,
'age_limit': 18,
},
@@ -115,6 +117,9 @@ class XHamsterIE(InfoExtractor):
}, {
'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
'only_matching': True,
+ }, {
+ 'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -244,7 +249,6 @@ class XHamsterIE(InfoExtractor):
categories = None
uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL']))
-
return {
'id': video_id,
'display_id': display_id,
@@ -263,7 +267,7 @@ class XHamsterIE(InfoExtractor):
'dislike_count': int_or_none(try_get(
video, lambda x: x['rating']['dislikes'], int)),
'comment_count': int_or_none(video.get('views')),
- 'age_limit': age_limit,
+ 'age_limit': age_limit if age_limit is not None else 18,
'categories': categories,
'formats': formats,
}
@@ -423,6 +427,9 @@ class XHamsterUserIE(InfoExtractor):
'id': 'firatkaan',
},
'playlist_mincount': 1,
+ }, {
+ 'url': 'https://xhday.com/users/mobhunter',
+ 'only_matching': True,
}]
def _entries(self, user_id):
diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py
index 3fe6192bf..8811df6d8 100644
--- a/yt_dlp/extractor/yahoo.py
+++ b/yt_dlp/extractor/yahoo.py
@@ -1,15 +1,15 @@
import hashlib
import itertools
import re
+import urllib.parse
+from .brightcove import BrightcoveNewIE
from .common import InfoExtractor, SearchInfoExtractor
-from ..compat import (
- compat_str,
- compat_urllib_parse,
-)
+from .youtube import YoutubeIE
+from ..compat import compat_str
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
int_or_none,
mimetype2ext,
parse_iso8601,
@@ -18,9 +18,6 @@ from ..utils import (
url_or_none,
)
-from .brightcove import BrightcoveNewIE
-from .youtube import YoutubeIE
-
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies'
@@ -333,7 +330,7 @@ class YahooSearchIE(SearchInfoExtractor):
def _search_results(self, query):
for pagenum in itertools.count(0):
- result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
+ result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (urllib.parse.quote_plus(query), pagenum * 30)
info = self._download_json(result_url, query,
note='Downloading results page ' + str(pagenum + 1))
yield from (self.url_result(result['rurl']) for result in info['results'])
@@ -434,7 +431,7 @@ class YahooGyaOIE(InfoExtractor):
page = 1
while True:
playlist = self._download_json(
- f'https://gyao.yahoo.co.jp/api/programs/{program_id}/videos?page={page}', program_id,
+ f'https://gyao.yahoo.co.jp/api/programs/{program_id}/videos?page={page}&serviceId=gy', program_id,
note=f'Downloading JSON metadata page {page}')
if not playlist:
break
diff --git a/yt_dlp/extractor/ynet.py b/yt_dlp/extractor/ynet.py
index 444785947..27eda9721 100644
--- a/yt_dlp/extractor/ynet.py
+++ b/yt_dlp/extractor/ynet.py
@@ -1,8 +1,8 @@
-import re
import json
+import re
+import urllib.parse
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote_plus
class YnetIE(InfoExtractor):
@@ -31,7 +31,7 @@ class YnetIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- content = compat_urllib_parse_unquote_plus(self._og_search_video_url(webpage))
+ content = urllib.parse.unquote_plus(self._og_search_video_url(webpage))
config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config'))
f4m_url = config['clip']['url']
title = self._og_search_title(webpage)
diff --git a/yt_dlp/extractor/younow.py b/yt_dlp/extractor/younow.py
index 76d89f3ce..18112ba35 100644
--- a/yt_dlp/extractor/younow.py
+++ b/yt_dlp/extractor/younow.py
@@ -91,7 +91,7 @@ def _extract_moment(item, fatal=True):
uploader = try_get(item, lambda x: x['owner']['name'], compat_str)
uploader_id = try_get(item, lambda x: x['owner']['userId'])
- uploader_url = format_field(uploader, template='https://www.younow.com/%s')
+ uploader_url = format_field(uploader, None, 'https://www.younow.com/%s')
entry = {
'extractor_key': 'YouNowMoment',
diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py
index 5aea82295..b484e08ec 100644
--- a/yt_dlp/extractor/youporn.py
+++ b/yt_dlp/extractor/youporn.py
@@ -135,9 +135,10 @@ class YouPornIE(InfoExtractor):
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
webpage, 'uploader', fatal=False)
upload_date = unified_strdate(self._html_search_regex(
- [r'UPLOADED:\s*<span>([^<]+)',
+ (r'UPLOADED:\s*<span>([^<]+)',
r'Date\s+[Aa]dded:\s*<span>([^<]+)',
- r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
+ r'''(?s)<div[^>]+class=["']videoInfo(?:Date|Time)\b[^>]*>(.+?)</div>''',
+ r'(?s)<label\b[^>]*>Uploaded[^<]*</label>\s*<span\b[^>]*>(.+?)</span>'),
webpage, 'upload date', fatal=False))
age_limit = self._rta_search(webpage)
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 5546aa9a3..ebc3381a2 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2,7 +2,6 @@ import base64
import calendar
import copy
import datetime
-import functools
import hashlib
import itertools
import json
@@ -14,18 +13,11 @@ import sys
import threading
import time
import traceback
+import urllib.error
+import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
-from ..compat import (
- compat_chr,
- compat_HTTPError,
- compat_parse_qs,
- compat_str,
- compat_urllib_parse_unquote_plus,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
- compat_urlparse,
-)
+from ..compat import functools
from ..jsinterp import JSInterpreter
from ..utils import (
NO_DEFAULT,
@@ -382,11 +374,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
pref = {}
if pref_cookie:
try:
- pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
+ pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
except ValueError:
self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
pref.update({'hl': 'en', 'tz': 'UTC'})
- self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
+ self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
def _real_initialize(self):
self._initialize_pref()
@@ -397,9 +389,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if self._LOGIN_REQUIRED and not self._cookies_passed:
self.raise_login_required('Login details are needed to download this content', method='cookies')
- _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
- _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
- _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
+ _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
+ _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
def _get_default_ytcfg(self, client='web'):
return copy.deepcopy(INNERTUBE_CLIENTS[client])
@@ -415,15 +406,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_client_name(self, ytcfg, default_client='web'):
return self._ytcfg_get_safe(
ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
- lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
+ lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
def _extract_client_version(self, ytcfg, default_client='web'):
return self._ytcfg_get_safe(
ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
- lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
+ lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
+
+ def _select_api_hostname(self, req_api_hostname, default_client=None):
+ return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
+ or req_api_hostname or self._get_innertube_host(default_client or 'web'))
def _extract_api_key(self, ytcfg=None, default_client='web'):
- return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
+ return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
def _extract_context(self, ytcfg=None, default_client='web'):
context = get_first(
@@ -470,18 +465,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
real_headers.update({'content-type': 'application/json'})
if headers:
real_headers.update(headers)
+ api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
+ or api_key or self._extract_api_key(default_client=default_client))
return self._download_json(
- f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',
+ f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
video_id=video_id, fatal=fatal, note=note, errnote=errnote,
data=json.dumps(data).encode('utf8'), headers=real_headers,
- query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
+ query={'key': api_key, 'prettyPrint': 'false'})
def extract_yt_initial_data(self, item_id, webpage, fatal=True):
- data = self._search_regex(
- (fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}',
- self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
- if data:
- return self._parse_json(data, item_id, fatal=fatal)
+ return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
@staticmethod
def _extract_session_index(*data):
@@ -497,7 +490,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# Deprecated?
def _extract_identity_token(self, ytcfg=None, webpage=None):
if ytcfg:
- token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
+ token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
if token:
return token
if webpage:
@@ -513,12 +506,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
"""
for data in args:
# ytcfg includes channel_syncid if on secondary channel
- delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
+ delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
if delegated_sid:
return delegated_sid
sync_ids = (try_get(
data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
- lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
+ lambda x: x['DATASYNC_ID']), str) or '').split('||')
if len(sync_ids) >= 2 and sync_ids[1]:
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
# and just "user_syncid||" for primary channel. We only want the channel_syncid
@@ -534,7 +527,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
expected_type=str)
- @property
+ @functools.cached_property
def is_authenticated(self):
return bool(self._generate_sapisidhash_header())
@@ -550,9 +543,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
self, *, ytcfg=None, account_syncid=None, session_index=None,
visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
- origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
+ origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
headers = {
- 'X-YouTube-Client-Name': compat_str(
+ 'X-YouTube-Client-Name': str(
self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
'Origin': origin,
@@ -612,7 +605,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_continuation_ep_data(cls, continuation_ep: dict):
if isinstance(continuation_ep, dict):
continuation = try_get(
- continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
+ continuation_ep, lambda x: x['continuationCommand']['token'], str)
if not continuation:
return
ctp = continuation_ep.get('clickTrackingParams')
@@ -672,7 +665,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_badges(self, renderer: dict):
badges = set()
for badge in try_get(renderer, lambda x: x['badges'], list) or []:
- label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
+ label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
if label:
badges.add(label.lower())
return badges
@@ -687,7 +680,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
obj = [obj]
for item in obj:
- text = try_get(item, lambda x: x['simpleText'], compat_str)
+ text = try_get(item, lambda x: x['simpleText'], str)
if text:
return text
runs = try_get(item, lambda x: x['runs'], list) or []
@@ -789,20 +782,20 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
- if isinstance(e.cause, compat_HTTPError):
+ if isinstance(e.cause, urllib.error.HTTPError):
first_bytes = e.cause.read(512)
if not is_html(first_bytes):
yt_error = try_get(
self._parse_json(
self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
- lambda x: x['error']['message'], compat_str)
+ lambda x: x['error']['message'], str)
if yt_error:
self._report_alerts([('ERROR', yt_error)], fatal=False)
# Downloading page may result in intermittent 5xx HTTP error
# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
- if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
+ if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
last_error = error_to_compat_str(e.cause or e.msg)
if count < retries:
continue
@@ -2212,28 +2205,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}, {
# Story. Requires specific player params to work.
# Note: stories get removed after some period of time
- 'url': 'https://www.youtube.com/watch?v=yN3x1t3sieA',
+ 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
'info_dict': {
- 'id': 'yN3x1t3sieA',
+ 'id': 'vv8qTUWmulI',
'ext': 'mp4',
- 'uploader': 'Linus Tech Tips',
- 'duration': 13,
- 'channel': 'Linus Tech Tips',
+ 'availability': 'unlisted',
+ 'view_count': int,
+ 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
+ 'upload_date': '20220526',
+ 'categories': ['Education'],
+ 'title': 'Story',
+ 'channel': 'IT\'S HISTORY',
+ 'description': '',
+ 'uploader_id': 'BlastfromthePast',
+ 'duration': 12,
+ 'uploader': 'IT\'S HISTORY',
'playable_in_embed': True,
- 'tags': [],
'age_limit': 0,
- 'uploader_url': 'http://www.youtube.com/user/LinusTechTips',
- 'upload_date': '20220402',
- 'thumbnail': 'https://i.ytimg.com/vi_webp/yN3x1t3sieA/maxresdefault.webp',
- 'title': 'Story',
'live_status': 'not_live',
- 'uploader_id': 'LinusTechTips',
+ 'tags': [],
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
+ 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
+ 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
+ }
+ }, {
+ 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
+ 'info_dict': {
+ 'id': 'tjjjtzRLHvA',
+ 'ext': 'mp4',
+ 'title': 'ハッシュタグ無し };if window.ytcsi',
+ 'upload_date': '20220323',
+ 'like_count': int,
+ 'availability': 'unlisted',
+ 'channel': 'nao20010128nao',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
+ 'age_limit': 0,
+ 'uploader': 'nao20010128nao',
+ 'uploader_id': 'nao20010128nao',
+ 'categories': ['Music'],
'view_count': int,
'description': '',
- 'channel_id': 'UCXuqSBlHAE6Xw-yeJA0Tunw',
- 'categories': ['Science & Technology'],
- 'channel_url': 'https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw',
- 'availability': 'unlisted',
+ 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
+ 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'channel_follower_count': int,
+ 'duration': 6,
+ 'tags': [],
+ 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
}
}
]
@@ -2319,7 +2338,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Obtain from MPD's maximum seq value
old_mpd_url = mpd_url
last_error = ctx.pop('last_error', None)
- expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
+ expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
or (mpd_url, stream_number, False))
if not refresh_sequence:
@@ -2386,6 +2405,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
yield {
'url': last_segment_url,
+ 'fragment_count': last_seq,
}
if known_idx == last_seq:
no_fragment_score += 5
@@ -2400,7 +2420,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_player_url(self, *ytcfgs, webpage=None):
player_url = traverse_obj(
ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
- get_all=False, expected_type=compat_str)
+ get_all=False, expected_type=str)
if not player_url:
return
return urljoin('https://www.youtube.com', player_url)
@@ -2417,7 +2437,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _signature_cache_id(self, example_sig):
""" Return a string representation of a signature """
- return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
+ return '.'.join(str(len(part)) for part in example_sig.split('.'))
@classmethod
def _extract_player_info(cls, player_url):
@@ -2447,7 +2467,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
assert os.path.basename(func_id) == func_id
- cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
+ cache_spec = self.cache.load('youtube-sigfuncs', func_id)
if cache_spec is not None:
return lambda s: ''.join(s[i] for i in cache_spec)
@@ -2455,11 +2475,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if code:
res = self._parse_sig_js(code)
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ test_string = ''.join(map(chr, range(len(example_sig))))
cache_res = res(test_string)
cache_spec = [ord(c) for c in cache_res]
- self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
+ self.cache.store('youtube-sigfuncs', func_id, cache_spec)
return res
def _print_sig_code(self, func, example_sig):
@@ -2494,12 +2514,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else:
yield _genslice(start, i, step)
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ test_string = ''.join(map(chr, range(len(example_sig))))
cache_res = func(test_string)
cache_spec = [ord(c) for c in cache_res]
expr_code = ' + '.join(gen_sig_code(cache_spec))
signature_id_tuple = '(%s)' % (
- ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
+ ', '.join(str(len(p)) for p in example_sig.split('.')))
code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
' return %s\n') % (signature_id_tuple, expr_code)
self.to_screen('Extracted signature function:\n' + code)
@@ -2530,22 +2550,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _decrypt_signature(self, s, video_id, player_url):
"""Turn the encrypted s field into a working signature"""
-
- if player_url is None:
- raise ExtractorError('Cannot decrypt signature without player_url')
-
try:
player_id = (player_url, self._signature_cache_id(s))
if player_id not in self._player_cache:
- func = self._extract_signature_function(
- video_id, player_url, s
- )
+ func = self._extract_signature_function(video_id, player_url, s)
self._player_cache[player_id] = func
func = self._player_cache[player_id]
self._print_sig_code(func, s)
return func(s)
except Exception as e:
- raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
+ raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
def _decrypt_nsig(self, s, video_id, player_url):
"""Turn the encrypted n field into a working signature"""
@@ -2580,7 +2594,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
- func_code = self._downloader.cache.load('youtube-nsig', player_id)
+ func_code = self.cache.load('youtube-nsig', player_id)
if func_code:
jsi = JSInterpreter(func_code)
@@ -2589,7 +2603,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
funcname = self._extract_n_function_name(jscode)
jsi = JSInterpreter(jscode)
func_code = jsi.extract_function_code(funcname)
- self._downloader.cache.store('youtube-nsig', player_id, func_code)
+ self.cache.store('youtube-nsig', player_id, func_code)
if self.get_param('youtube_print_sig_code'):
self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
@@ -2621,30 +2635,45 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return sts
def _mark_watched(self, video_id, player_responses):
- playback_url = get_first(
- player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
- expected_type=url_or_none)
- if not playback_url:
- self.report_warning('Unable to mark watched')
- return
- parsed_playback_url = compat_urlparse.urlparse(playback_url)
- qs = compat_urlparse.parse_qs(parsed_playback_url.query)
+ for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
+ label = 'fully ' if is_full else ''
+ url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
+ expected_type=url_or_none)
+ if not url:
+ self.report_warning(f'Unable to mark {label}watched')
+ return
+ parsed_url = urllib.parse.urlparse(url)
+ qs = urllib.parse.parse_qs(parsed_url.query)
+
+ # cpn generation algorithm is reverse engineered from base.js.
+ # In fact it works even with dummy cpn.
+ CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
+ cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
+
+ # # more consistent results setting it to right before the end
+ video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
+
+ qs.update({
+ 'ver': ['2'],
+ 'cpn': [cpn],
+ 'cmt': video_length,
+ 'el': 'detailpage', # otherwise defaults to "shorts"
+ })
- # cpn generation algorithm is reverse engineered from base.js.
- # In fact it works even with dummy cpn.
- CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
- cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
+ if is_full:
+ # these seem to mark watchtime "history" in the real world
+ # they're required, so send in a single value
+ qs.update({
+ 'st': video_length,
+ 'et': video_length,
+ })
- qs.update({
- 'ver': ['2'],
- 'cpn': [cpn],
- })
- playback_url = compat_urlparse.urlunparse(
- parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+ url = urllib.parse.urlunparse(
+ parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
- self._download_webpage(
- playback_url, video_id, 'Marking watched',
- 'Unable to mark watched', fatal=False)
+ self._download_webpage(
+ url, video_id, f'Marking {label}watched',
+ 'Unable to mark watched', fatal=False)
@staticmethod
def _extract_urls(webpage):
@@ -2713,39 +2742,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
chapter_title = lambda chapter: self._get_text(chapter, 'title')
- return next((
- filter(None, (
- self._extract_chapters(
- traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
- chapter_time, chapter_title, duration)
- for contents in content_list
- ))), [])
-
- def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
- chapters = []
- last_chapter = {'start_time': 0}
- for idx, chapter in enumerate(chapter_list or []):
- title = chapter_title(chapter)
- start_time = chapter_time(chapter)
- if start_time is None:
- continue
- last_chapter['end_time'] = start_time
- if start_time < last_chapter['start_time']:
- if idx == 1:
- chapters.pop()
- self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
- else:
- self.report_warning(f'Invalid start time for chapter "{title}"')
- continue
- last_chapter = {'start_time': start_time, 'title': title}
- chapters.append(last_chapter)
- last_chapter['end_time'] = duration
- return chapters
+ return next(filter(None, (
+ self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
+ chapter_time, chapter_title, duration)
+ for contents in content_list)), [])
- def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
- return self._parse_json(self._search_regex(
- (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',
- regex), webpage, name, default='{}'), video_id, fatal=False)
+ def _extract_chapters_from_description(self, description, duration):
+ return self._extract_chapters(
+ re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
+ chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
+ duration=duration, strict=False)
+
+ def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
+ if not duration:
+ return
+ chapter_list = [{
+ 'start_time': chapter_time(chapter),
+ 'title': chapter_title(chapter),
+ } for chapter in chapter_list or []]
+ if not strict:
+ chapter_list.sort(key=lambda c: c['start_time'] or 0)
+
+ chapters = [{'start_time': 0, 'title': '<Untitled>'}]
+ for idx, chapter in enumerate(chapter_list):
+ if chapter['start_time'] is None or not chapter['title']:
+ self.report_warning(f'Incomplete chapter {idx}')
+ elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
+ chapters[-1]['end_time'] = chapter['start_time']
+ chapters.append(chapter)
+ else:
+ self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
+ chapters[-1]['end_time'] = duration
+ return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:]
def _extract_comment(self, comment_renderer, parent=None):
comment_id = comment_renderer.get('commentId')
@@ -2758,12 +2786,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
author = self._get_text(comment_renderer, 'authorText')
author_id = try_get(comment_renderer,
- lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
+ lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
- lambda x: x['likeCount']), compat_str)) or 0
+ lambda x: x['likeCount']), str)) or 0
author_thumbnail = try_get(comment_renderer,
- lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
+ lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
is_favorited = 'creatorHeart' in (try_get(
@@ -3028,9 +3056,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
initial_pr = None
if webpage:
- initial_pr = self._extract_yt_initial_variable(
- webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
- video_id, 'initial player response')
+ initial_pr = self._search_json(
+ self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
all_clients = set(clients)
clients = clients[::-1]
@@ -3144,16 +3171,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
fmt_url = fmt.get('url')
if not fmt_url:
- sc = compat_parse_qs(fmt.get('signatureCipher'))
+ sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
encrypted_sig = try_get(sc, lambda x: x['s'][0])
- if not (sc and fmt_url and encrypted_sig):
+ if not all((sc, fmt_url, player_url, encrypted_sig)):
continue
- if not player_url:
+ try:
+ fmt_url += '&%s=%s' % (
+ traverse_obj(sc, ('sp', -1)) or 'signature',
+ self._decrypt_signature(encrypted_sig, video_id, player_url)
+ )
+ except ExtractorError as e:
+ self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)
+ self.write_debug(e, only_once=True)
continue
- signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
- sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
- fmt_url += '&' + sp + '=' + signature
query = parse_qs(fmt_url)
throttled = False
@@ -3164,7 +3195,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
except ExtractorError as e:
self.report_warning(
'nsig extraction failed: You may experience throttling for some formats\n'
- f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
+ f'n = {query["n"][0]} ; player = {player_url}', only_once=True)
+ self.write_debug(e, only_once=True)
throttled = True
if itag:
@@ -3380,12 +3412,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Unquote should take place before split on comma (,) since textual
# fields may contain comma as well (see
# https://github.com/ytdl-org/youtube-dl/issues/8536)
- feed_data = compat_parse_qs(
- compat_urllib_parse_unquote_plus(feed))
+ feed_data = urllib.parse.parse_qs(
+ urllib.parse.unquote_plus(feed))
def feed_entry(name):
return try_get(
- feed_data, lambda x: x[name][0], compat_str)
+ feed_data, lambda x: x[name][0], str)
feed_id = feed_entry('id')
if not feed_id:
@@ -3414,6 +3446,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
or get_first(microformats, 'lengthSeconds')
or parse_duration(search_meta('duration'))) or None
+ if get_first(video_details, 'isPostLiveDvr'):
+ self.write_debug('Video is in Post-Live Manifestless mode')
+ if duration or 0 > 4 * 3600:
+ self.report_warning(
+ 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
+ 'This is a known issue and patches are welcome')
+
live_broadcast_details, is_live, streaming_data, formats = self._list_formats(
video_id, microformats, video_details, player_responses, player_url, duration)
@@ -3523,7 +3562,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
'uploader_url': owner_profile_url,
'channel_id': channel_id,
- 'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),
+ 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
'duration': duration,
'view_count': int_or_none(
get_first((video_details, microformats), (..., 'viewCount'))
@@ -3593,7 +3632,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if 'translated_subs' in self._configuration_arg('skip'):
continue
trans_code += f'-{lang_code}'
- trans_name += format_field(lang_name, template=' from %s')
+ trans_name += format_field(lang_name, None, ' from %s')
# Add an "-orig" label to the original language so that it can be distinguished.
# The subs are returned without "-orig" as well for compatibility
if lang_code == f'a-{orig_trans_code}':
@@ -3605,9 +3644,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
info['automatic_captions'] = automatic_captions
info['subtitles'] = subtitles
- parsed_url = compat_urllib_parse_urlparse(url)
+ parsed_url = urllib.parse.urlparse(url)
for component in [parsed_url.fragment, parsed_url.query]:
- query = compat_parse_qs(component)
+ query = urllib.parse.parse_qs(component)
for k, v in query.items():
for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
d_k += '_time'
@@ -3616,7 +3655,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Youtube Music Auto-generated description
if video_description:
- mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
+ mobj = re.search(
+ r'''(?xs)
+ (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
+ (?P<album>[^\n]+)
+ (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
+ (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
+ (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
+ .+\nAuto-generated\ by\ YouTube\.\s*$
+ ''', video_description)
if mobj:
release_year = mobj.group('release_year')
release_date = mobj.group('release_date')
@@ -3634,9 +3681,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
initial_data = None
if webpage:
- initial_data = self._extract_yt_initial_variable(
- webpage, self._YT_INITIAL_DATA_RE, video_id,
- 'yt initial data')
+ initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
if not initial_data:
query = {'videoId': video_id}
query.update(self._get_checkok_params())
@@ -3646,13 +3691,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
headers=self.generate_api_headers(ytcfg=master_ytcfg),
note='Downloading initial data API JSON')
+ info['comment_count'] = traverse_obj(initial_data, (
+ 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
+ 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
+ ), (
+ 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
+ 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
+ ), expected_type=int_or_none, get_all=False)
+
try: # This will error if there is no livechat
initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
except (KeyError, IndexError, TypeError):
pass
else:
info.setdefault('subtitles', {})['live_chat'] = [{
- 'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies
+ # url is needed to set cookies
+ 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
'video_id': video_id,
'ext': 'json',
'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
@@ -3662,6 +3716,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
info['chapters'] = (
self._extract_chapters_from_json(initial_data, duration)
or self._extract_chapters_from_engagement_panel(initial_data, duration)
+ or self._extract_chapters_from_description(video_description, duration)
or None)
contents = traverse_obj(
@@ -3884,7 +3939,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
# generic endpoint URL support
ep_url = urljoin('https://www.youtube.com/', try_get(
renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- compat_str))
+ str))
if ep_url:
for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
if ie.suitable(ep_url):
@@ -3928,7 +3983,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _shelf_entries(self, shelf_renderer, skip_channels=False):
ep = try_get(
shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
- compat_str)
+ str)
shelf_url = urljoin('https://www.youtube.com', ep)
if shelf_url:
# Skipping links to another channels, note that checking for
@@ -3988,7 +4043,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
yield entry
# playlist attachment
playlist_id = try_get(
- post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
+ post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
if playlist_id:
yield self.url_result(
'https://www.youtube.com/playlist?list=%s' % playlist_id,
@@ -3999,7 +4054,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
if not isinstance(run, dict):
continue
ep_url = try_get(
- run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
+ run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
if not ep_url:
continue
if not YoutubeIE.suitable(ep_url):
@@ -4015,9 +4070,12 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
return
for content in contents:
renderer = content.get('backstagePostThreadRenderer')
- if not isinstance(renderer, dict):
+ if isinstance(renderer, dict):
+ yield from self._post_thread_entries(renderer)
continue
- yield from self._post_thread_entries(renderer)
+ renderer = content.get('videoRenderer')
+ if isinstance(renderer, dict):
+ yield self._video_entry(renderer)
r''' # unused
def _rich_grid_entries(self, contents):
@@ -4173,10 +4231,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
uploader['uploader'] = self._search_regex(
r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
uploader['uploader_id'] = try_get(
- owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
+ owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
uploader['uploader_url'] = urljoin(
'https://www.youtube.com/',
- try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
+ try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
return {k: v for k, v in uploader.items() if v is not None}
def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
@@ -4304,13 +4362,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
title = playlist.get('title') or try_get(
- data, lambda x: x['titleText']['simpleText'], compat_str)
+ data, lambda x: x['titleText']['simpleText'], str)
playlist_id = playlist.get('playlistId') or item_id
# Delegating everything except mix playlists to regular tab-based playlist URL
playlist_url = urljoin(url, try_get(
playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
- compat_str))
+ str))
# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
@@ -4381,7 +4439,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
continue
nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
text = try_get(
- nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
+ nav_item_renderer, lambda x: x['text']['simpleText'], str)
if not text or text.lower() != 'show unavailable videos':
continue
browse_endpoint = try_get(
@@ -4402,7 +4460,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
check_get_keys='contents', fatal=False, ytcfg=ytcfg,
note='Downloading API JSON with unavailable videos')
- @property
+ @functools.cached_property
def skip_webpage(self):
return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
@@ -4423,7 +4481,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
- if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
+ if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
last_error = error_to_compat_str(e.cause or e.msg)
if count < retries:
continue
@@ -5236,8 +5294,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
def _real_extract(self, url, smuggled_data):
item_id = self._match_id(url)
- url = compat_urlparse.urlunparse(
- compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
+ url = urllib.parse.urlunparse(
+ urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
compat_opts = self.get_param('compat_opts', [])
def get_mobj(url):
@@ -5257,7 +5315,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
mdata = self._extract_tab_endpoint(
f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
- get_all=False, expected_type=compat_str)
+ get_all=False, expected_type=str)
if not murl:
raise ExtractorError('Failed to resolve album to playlist')
return self.url_result(murl, ie=YoutubeTabIE.ie_key())
@@ -5622,11 +5680,13 @@ class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
channel = traverse_obj(
notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
expected_type=str)
+ notification_title = self._get_text(notification, 'shortMessage')
+ if notification_title:
+ notification_title = notification_title.replace('\xad', '') # remove soft hyphens
+ # TODO: handle recommended videos
title = self._search_regex(
- rf'{re.escape(channel)} [^:]+: (.+)', self._get_text(notification, 'shortMessage'),
+ rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
'video title', default=None)
- if title:
- title = title.replace('\xad', '') # remove soft hyphens
upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
else None)
@@ -5778,7 +5838,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
if params:
section = next((k for k, v in self._SECTIONS.items() if v == params), params)
else:
- section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()
+ section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
params = self._SECTIONS.get(section)
if not params:
section = None
@@ -5925,14 +5985,43 @@ class YoutubeTruncatedURLIE(InfoExtractor):
expected=True)
-class YoutubeClipIE(InfoExtractor):
+class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
IE_NAME = 'youtube:clip'
- IE_DESC = False # Do not list
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ # FIXME: Other metadata should be extracted from the clip, not from the base video
+ 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
+ 'info_dict': {
+ 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
+ 'ext': 'mp4',
+ 'section_start': 29.0,
+ 'section_end': 39.7,
+ 'duration': 10.7,
+ }
+ }]
def _real_extract(self, url):
- self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
- return self.url_result(url, 'Generic')
+ clip_id = self._match_id(url)
+ _, data = self._extract_webpage(url, clip_id)
+
+ video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
+ if not video_id:
+ raise ExtractorError('Unable to find video ID')
+
+ clip_data = traverse_obj(data, (
+ 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
+ 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
+ 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
+ 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': f'https://www.youtube.com/watch?v={video_id}',
+ 'ie_key': YoutubeIE.ie_key(),
+ 'id': clip_id,
+ 'section_start': int(clip_data['startTimeMs']) / 1000,
+ 'section_end': int(clip_data['endTimeMs']) / 1000,
+ }
class YoutubeTruncatedIDIE(InfoExtractor):
diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py
index 16f827a7e..2a7e85472 100644
--- a/yt_dlp/extractor/zattoo.py
+++ b/yt_dlp/extractor/zattoo.py
@@ -220,7 +220,7 @@ class ZattooPlatformBaseIE(InfoExtractor):
'id': channel_name,
'title': channel_name,
'is_live': True,
- 'format': formats,
+ 'formats': formats,
'subtitles': subtitles
}
diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py
index a388ff562..3a7f01f7a 100644
--- a/yt_dlp/extractor/zdf.py
+++ b/yt_dlp/extractor/zdf.py
@@ -69,6 +69,7 @@ class ZDFBaseIE(InfoExtractor):
f.update({
'url': format_url,
'format_id': join_nonempty('http', meta.get('type'), meta.get('quality')),
+ 'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None))
})
new_formats = [f]
formats.extend(merge_dicts(f, {
@@ -108,7 +109,7 @@ class ZDFBaseIE(InfoExtractor):
'class': track.get('class'),
'language': track.get('language'),
})
- self._sort_formats(formats, ('hasaud', 'res', 'quality', 'language_preference'))
+ self._sort_formats(formats, ('tbr', 'res', 'quality', 'language_preference'))
duration = float_or_none(try_get(
ptmd, lambda x: x['attributes']['duration']['value']), scale=1000)
@@ -187,7 +188,7 @@ class ZDFIE(ZDFBaseIE):
},
}, {
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
- 'md5': '3d6f1049e9682178a11c54b91f3dd065',
+ 'md5': '57af4423db0455a3975d2dc4578536bc',
'info_dict': {
'ext': 'mp4',
'id': 'video_funk_1770473',
@@ -230,6 +231,19 @@ class ZDFIE(ZDFBaseIE):
'timestamp': 1641355200,
'upload_date': '20220105',
},
+ 'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"'
+ }, {
+ 'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html',
+ 'info_dict': {
+ 'id': '191205_1800_sendung_sok8',
+ 'ext': 'mp4',
+ 'title': 'Das Geld anderer Leute',
+ 'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d',
+ 'duration': 2581.0,
+ 'timestamp': 1654790700,
+ 'upload_date': '20220609',
+ 'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350',
+ },
}]
def _extract_entry(self, url, player, content, video_id):
diff --git a/yt_dlp/extractor/zhihu.py b/yt_dlp/extractor/zhihu.py
index 70eb3ccd1..d8d259dd6 100644
--- a/yt_dlp/extractor/zhihu.py
+++ b/yt_dlp/extractor/zhihu.py
@@ -58,7 +58,7 @@ class ZhihuIE(InfoExtractor):
'uploader': author.get('name'),
'timestamp': int_or_none(zvideo.get('published_at')),
'uploader_id': author.get('id'),
- 'uploader_url': format_field(url_token, template='https://www.zhihu.com/people/%s'),
+ 'uploader_url': format_field(url_token, None, 'https://www.zhihu.com/people/%s'),
'duration': float_or_none(video.get('duration')),
'view_count': int_or_none(zvideo.get('play_count')),
'like_count': int_or_none(zvideo.get('liked_count')),