diff options
Diffstat (limited to 'yt_dlp/extractor')
132 files changed, 5596 insertions, 3950 deletions
diff --git a/yt_dlp/extractor/__init__.py b/yt_dlp/extractor/__init__.py index afd3d05ac..6bfa4bd7b 100644 --- a/yt_dlp/extractor/__init__.py +++ b/yt_dlp/extractor/__init__.py @@ -1,32 +1,15 @@ -import contextlib -import os +from ..compat.compat_utils import passthrough_module -from ..utils import load_plugins - -_LAZY_LOADER = False -if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): - with contextlib.suppress(ImportError): - from .lazy_extractors import * # noqa: F403 - from .lazy_extractors import _ALL_CLASSES - _LAZY_LOADER = True - -if not _LAZY_LOADER: - from .extractors import * # noqa: F403 - _ALL_CLASSES = [ # noqa: F811 - klass - for name, klass in globals().items() - if name.endswith('IE') and name != 'GenericIE' - ] - _ALL_CLASSES.append(GenericIE) # noqa: F405 - -_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) -_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES +passthrough_module(__name__, '.extractors') +del passthrough_module def gen_extractor_classes(): """ Return a list of supported extractors. The order does matter; the first extractor matched is the one handling the URL. """ + from .extractors import _ALL_CLASSES + return _ALL_CLASSES @@ -39,10 +22,12 @@ def gen_extractors(): def list_extractor_classes(age_limit=None): """Return a list of extractors that are suitable for the given age, sorted by extractor name""" + from .generic import GenericIE + yield from sorted(filter( - lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, # noqa: F405 + lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower()) - yield GenericIE # noqa: F405 + yield GenericIE def list_extractors(age_limit=None): @@ -52,4 +37,6 @@ def list_extractors(age_limit=None): def get_info_extractor(ie_name): """Returns the info extractor class with the given ie_name""" - return globals()[ie_name + 'IE'] + from . import extractors + + return getattr(extractors, f'{ie_name}IE') diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py new file mode 100644 index 000000000..37328dfc8 --- /dev/null +++ b/yt_dlp/extractor/_extractors.py @@ -0,0 +1,2198 @@ +# flake8: noqa: F401 + +from .abc import ( + ABCIE, + ABCIViewIE, + ABCIViewShowSeriesIE, +) +from .abcnews import ( + AbcNewsIE, + AbcNewsVideoIE, +) +from .abcotvs import ( + ABCOTVSIE, + ABCOTVSClipsIE, +) +from .abematv import ( + AbemaTVIE, + AbemaTVTitleIE, +) +from .academicearth import AcademicEarthCourseIE +from .acast import ( + ACastIE, + ACastChannelIE, +) +from .adn import ADNIE +from .adobeconnect import AdobeConnectIE +from .adobetv import ( + AdobeTVEmbedIE, + AdobeTVIE, + AdobeTVShowIE, + AdobeTVChannelIE, + AdobeTVVideoIE, +) +from .adultswim import AdultSwimIE +from .aenetworks import ( + AENetworksIE, + AENetworksCollectionIE, + AENetworksShowIE, + HistoryTopicIE, + HistoryPlayerIE, + BiographyIE, +) +from .afreecatv import ( + AfreecaTVIE, + AfreecaTVLiveIE, + AfreecaTVUserIE, +) +from .airmozilla import AirMozillaIE +from .aljazeera import AlJazeeraIE +from .alphaporno import AlphaPornoIE +from .amara import AmaraIE +from .alura import ( + AluraIE, + AluraCourseIE +) +from .amcnetworks import AMCNetworksIE +from .amazon import AmazonStoreIE +from .americastestkitchen import ( + AmericasTestKitchenIE, + AmericasTestKitchenSeasonIE, +) +from .animeondemand import AnimeOnDemandIE +from .anvato import AnvatoIE +from .aol import AolIE +from .allocine import AllocineIE +from .aliexpress import AliExpressLiveIE +from .alsace20tv import ( + Alsace20TVIE, + Alsace20TVEmbedIE, +) +from .apa import APAIE +from .aparat import AparatIE +from .appleconnect import AppleConnectIE +from .appletrailers import ( + AppleTrailersIE, + AppleTrailersSectionIE, +) +from .applepodcasts import ApplePodcastsIE +from .archiveorg import ( + ArchiveOrgIE, + YoutubeWebArchiveIE, +) +from .arcpublishing import ArcPublishingIE +from .arkena import ArkenaIE +from .ard import ( + ARDBetaMediathekIE, + ARDIE, + ARDMediathekIE, +) +from .arte import ( + ArteTVIE, + ArteTVEmbedIE, + ArteTVPlaylistIE, + ArteTVCategoryIE, +) +from .arnes import ArnesIE +from .asiancrush import ( + AsianCrushIE, + AsianCrushPlaylistIE, +) +from .atresplayer import AtresPlayerIE +from .atscaleconf import AtScaleConfEventIE +from .atttechchannel import ATTTechChannelIE +from .atvat import ATVAtIE +from .audimedia import AudiMediaIE +from .audioboom import AudioBoomIE +from .audiomack import AudiomackIE, AudiomackAlbumIE +from .audius import ( + AudiusIE, + AudiusTrackIE, + AudiusPlaylistIE, + AudiusProfileIE, +) +from .awaan import ( + AWAANIE, + AWAANVideoIE, + AWAANLiveIE, + AWAANSeasonIE, +) +from .azmedien import AZMedienIE +from .baidu import BaiduVideoIE +from .banbye import ( + BanByeIE, + BanByeChannelIE, +) +from .bandaichannel import BandaiChannelIE +from .bandcamp import ( + BandcampIE, + BandcampAlbumIE, + BandcampWeeklyIE, + BandcampUserIE, +) +from .bannedvideo import BannedVideoIE +from .bbc import ( + BBCCoUkIE, + BBCCoUkArticleIE, + BBCCoUkIPlayerEpisodesIE, + BBCCoUkIPlayerGroupIE, + BBCCoUkPlaylistIE, + BBCIE, +) +from .beeg import BeegIE +from .behindkink import BehindKinkIE +from .bellmedia import BellMediaIE +from .beatport import BeatportIE +from .bet import BetIE +from .bfi import BFIPlayerIE +from .bfmtv import ( + BFMTVIE, + BFMTVLiveIE, + BFMTVArticleIE, +) +from .bibeltv import BibelTVIE +from .bigflix import BigflixIE +from .bigo import BigoIE +from .bild import BildIE +from .bilibili import ( + BiliBiliIE, + BiliBiliSearchIE, + BilibiliCategoryIE, + BiliBiliBangumiIE, + BilibiliAudioIE, + BilibiliAudioAlbumIE, + BiliBiliPlayerIE, + BilibiliChannelIE, + BiliIntlIE, + BiliIntlSeriesIE, + BiliLiveIE, +) +from .biobiochiletv import BioBioChileTVIE +from .bitchute import ( + BitChuteIE, + BitChuteChannelIE, +) +from .bitwave import ( + BitwaveReplayIE, + BitwaveStreamIE, +) +from .biqle import BIQLEIE +from .blackboardcollaborate import BlackboardCollaborateIE +from .bleacherreport import ( + BleacherReportIE, + BleacherReportCMSIE, +) +from .blogger import BloggerIE +from .bloomberg import BloombergIE +from .bokecc import BokeCCIE +from .bongacams import BongaCamsIE +from .bostonglobe import BostonGlobeIE +from .box import BoxIE +from .bpb import BpbIE +from .br import ( + BRIE, + BRMediathekIE, +) +from .bravotv import BravoTVIE +from .breakcom import BreakIE +from .breitbart import BreitBartIE +from .brightcove import ( + BrightcoveLegacyIE, + BrightcoveNewIE, +) +from .businessinsider import BusinessInsiderIE +from .buzzfeed import BuzzFeedIE +from .byutv import BYUtvIE +from .c56 import C56IE +from .cableav import CableAVIE +from .callin import CallinIE +from .caltrans import CaltransIE +from .cam4 import CAM4IE +from .camdemy import ( + CamdemyIE, + CamdemyFolderIE +) +from .cammodels import CamModelsIE +from .camwithher import CamWithHerIE +from .canalalpha import CanalAlphaIE +from .canalplus import CanalplusIE +from .canalc2 import Canalc2IE +from .canvas import ( + CanvasIE, + CanvasEenIE, + VrtNUIE, + DagelijkseKostIE, +) +from .carambatv import ( + CarambaTVIE, + CarambaTVPageIE, +) +from .cartoonnetwork import CartoonNetworkIE +from .cbc import ( + CBCIE, + CBCPlayerIE, + CBCGemIE, + CBCGemPlaylistIE, + CBCGemLiveIE, +) +from .cbs import CBSIE +from .cbslocal import ( + CBSLocalIE, + CBSLocalArticleIE, +) +from .cbsinteractive import CBSInteractiveIE +from .cbsnews import ( + CBSNewsEmbedIE, + CBSNewsIE, + CBSNewsLiveVideoIE, +) +from .cbssports import ( + CBSSportsEmbedIE, + CBSSportsIE, + TwentyFourSevenSportsIE, +) +from .ccc import ( + CCCIE, + CCCPlaylistIE, +) +from .ccma import CCMAIE +from .cctv import CCTVIE +from .cda import CDAIE +from .ceskatelevize import CeskaTelevizeIE +from .cgtn import CGTNIE +from .channel9 import Channel9IE +from .charlierose import CharlieRoseIE +from .chaturbate import ChaturbateIE +from .chilloutzone import ChilloutzoneIE +from .chingari import ( + ChingariIE, + ChingariUserIE, +) +from .chirbit import ( + ChirbitIE, + ChirbitProfileIE, +) +from .cinchcast import CinchcastIE +from .cinemax import CinemaxIE +from .ciscolive import ( + CiscoLiveSessionIE, + CiscoLiveSearchIE, +) +from .ciscowebex import CiscoWebexIE +from .cjsw import CJSWIE +from .cliphunter import CliphunterIE +from .clippit import ClippitIE +from .cliprs import ClipRsIE +from .clipsyndicate import ClipsyndicateIE +from .closertotruth import CloserToTruthIE +from .cloudflarestream import CloudflareStreamIE +from .cloudy import CloudyIE +from .clubic import ClubicIE +from .clyp import ClypIE +from .cmt import CMTIE +from .cnbc import ( + CNBCIE, + CNBCVideoIE, +) +from .cnn import ( + CNNIE, + CNNBlogsIE, + CNNArticleIE, +) +from .coub import CoubIE +from .comedycentral import ( + ComedyCentralIE, + ComedyCentralTVIE, +) +from .commonmistakes import CommonMistakesIE, UnicodeBOMIE +from .commonprotocols import ( + MmsIE, + RtmpIE, + ViewSourceIE, +) +from .condenast import CondeNastIE +from .contv import CONtvIE +from .corus import CorusIE +from .cpac import ( + CPACIE, + CPACPlaylistIE, +) +from .cozytv import CozyTVIE +from .cracked import CrackedIE +from .crackle import CrackleIE +from .craftsy import CraftsyIE +from .crooksandliars import CrooksAndLiarsIE +from .crowdbunker import ( + CrowdBunkerIE, + CrowdBunkerChannelIE, +) +from .crunchyroll import ( + CrunchyrollIE, + CrunchyrollShowPlaylistIE, + CrunchyrollBetaIE, + CrunchyrollBetaShowIE, +) +from .cspan import CSpanIE, CSpanCongressIE +from .ctsnews import CtsNewsIE +from .ctv import CTVIE +from .ctvnews import CTVNewsIE +from .cultureunplugged import CultureUnpluggedIE +from .curiositystream import ( + CuriosityStreamIE, + CuriosityStreamCollectionsIE, + CuriosityStreamSeriesIE, +) +from .cwtv import CWTVIE +from .cybrary import ( + CybraryIE, + CybraryCourseIE +) +from .daftsex import DaftsexIE +from .dailymail import DailyMailIE +from .dailymotion import ( + DailymotionIE, + DailymotionPlaylistIE, + DailymotionUserIE, +) +from .dailywire import ( + DailyWireIE, + DailyWirePodcastIE, +) +from .damtomo import ( + DamtomoRecordIE, + DamtomoVideoIE, +) +from .daum import ( + DaumIE, + DaumClipIE, + DaumPlaylistIE, + DaumUserIE, +) +from .daystar import DaystarClipIE +from .dbtv import DBTVIE +from .dctp import DctpTvIE +from .deezer import ( + DeezerPlaylistIE, + DeezerAlbumIE, +) +from .democracynow import DemocracynowIE +from .dfb import DFBIE +from .dhm import DHMIE +from .digg import DiggIE +from .dotsub import DotsubIE +from .douyutv import ( + DouyuShowIE, + DouyuTVIE, +) +from .dplay import ( + DPlayIE, + DiscoveryPlusIE, + HGTVDeIE, + GoDiscoveryIE, + TravelChannelIE, + CookingChannelIE, + HGTVUsaIE, + FoodNetworkIE, + InvestigationDiscoveryIE, + DestinationAmericaIE, + AmHistoryChannelIE, + ScienceChannelIE, + DIYNetworkIE, + DiscoveryLifeIE, + AnimalPlanetIE, + TLCIE, + DiscoveryPlusIndiaIE, + DiscoveryNetworksDeIE, + DiscoveryPlusItalyIE, + DiscoveryPlusItalyShowIE, + DiscoveryPlusIndiaShowIE, +) +from .dreisat import DreiSatIE +from .drbonanza import DRBonanzaIE +from .drtuber import DrTuberIE +from .drtv import ( + DRTVIE, + DRTVLiveIE, +) +from .dtube import DTubeIE +from .dvtv import DVTVIE +from .duboku import ( + DubokuIE, + DubokuPlaylistIE +) +from .dumpert import DumpertIE +from .defense import DefenseGouvFrIE +from .digitalconcerthall import DigitalConcertHallIE +from .discovery import DiscoveryIE +from .disney import DisneyIE +from .dispeak import DigitallySpeakingIE +from .doodstream import DoodStreamIE +from .dropbox import DropboxIE +from .dropout import ( + DropoutSeasonIE, + DropoutIE +) +from .dw import ( + DWIE, + DWArticleIE, +) +from .eagleplatform import EaglePlatformIE +from .ebaumsworld import EbaumsWorldIE +from .echomsk import EchoMskIE +from .egghead import ( + EggheadCourseIE, + EggheadLessonIE, +) +from .ehow import EHowIE +from .eighttracks import EightTracksIE +from .einthusan import EinthusanIE +from .eitb import EitbIE +from .ellentube import ( + EllenTubeIE, + EllenTubeVideoIE, + EllenTubePlaylistIE, +) +from .elonet import ElonetIE +from .elpais import ElPaisIE +from .embedly import EmbedlyIE +from .engadget import EngadgetIE +from .epicon import ( + EpiconIE, + EpiconSeriesIE, +) +from .eporner import EpornerIE +from .eroprofile import ( + EroProfileIE, + EroProfileAlbumIE, +) +from .ertgr import ( + ERTFlixCodenameIE, + ERTFlixIE, + ERTWebtvEmbedIE, +) +from .escapist import EscapistIE +from .espn import ( + ESPNIE, + WatchESPNIE, + ESPNArticleIE, + FiveThirtyEightIE, + ESPNCricInfoIE, +) +from .esri import EsriVideoIE +from .europa import EuropaIE +from .europeantour import EuropeanTourIE +from .euscreen import EUScreenIE +from .expotv import ExpoTVIE +from .expressen import ExpressenIE +from .extremetube import ExtremeTubeIE +from .eyedotv import EyedoTVIE +from .facebook import ( + FacebookIE, + FacebookPluginsVideoIE, + FacebookRedirectURLIE, +) +from .fancode import ( + FancodeVodIE, + FancodeLiveIE +) + +from .faz import FazIE +from .fc2 import ( + FC2IE, + FC2EmbedIE, + FC2LiveIE, +) +from .fczenit import FczenitIE +from .fifa import FifaIE +from .filmmodu import FilmmoduIE +from .filmon import ( + FilmOnIE, + FilmOnChannelIE, +) +from .filmweb import FilmwebIE +from .firsttv import FirstTVIE +from .fivetv import FiveTVIE +from .flickr import FlickrIE +from .folketinget import FolketingetIE +from .footyroom import FootyRoomIE +from .formula1 import Formula1IE +from .fourtube import ( + FourTubeIE, + PornTubeIE, + PornerBrosIE, + FuxIE, +) +from .fourzerostudio import ( + FourZeroStudioArchiveIE, + FourZeroStudioClipIE, +) +from .fox import FOXIE +from .fox9 import ( + FOX9IE, + FOX9NewsIE, +) +from .foxgay import FoxgayIE +from .foxnews import ( + FoxNewsIE, + FoxNewsArticleIE, +) +from .foxsports import FoxSportsIE +from .fptplay import FptplayIE +from .franceinter import FranceInterIE +from .francetv import ( + FranceTVIE, + FranceTVSiteIE, + FranceTVInfoIE, +) +from .freesound import FreesoundIE +from .freespeech import FreespeechIE +from .frontendmasters import ( + FrontendMastersIE, + FrontendMastersLessonIE, + FrontendMastersCourseIE +) +from .freetv import ( + FreeTvIE, + FreeTvMoviesIE, +) +from .fujitv import FujiTVFODPlus7IE +from .funimation import ( + FunimationIE, + FunimationPageIE, + FunimationShowIE, +) +from .funk import FunkIE +from .fusion import FusionIE +from .fuyintv import FuyinTVIE +from .gab import ( + GabTVIE, + GabIE, +) +from .gaia import GaiaIE +from .gameinformer import GameInformerIE +from .gamejolt import ( + GameJoltIE, + GameJoltUserIE, + GameJoltGameIE, + GameJoltGameSoundtrackIE, + GameJoltCommunityIE, + GameJoltSearchIE, +) +from .gamespot import GameSpotIE +from .gamestar import GameStarIE +from .gaskrank import GaskrankIE +from .gazeta import GazetaIE +from .gdcvault import GDCVaultIE +from .gedidigital import GediDigitalIE +from .generic import GenericIE +from .gettr import ( + GettrIE, + GettrStreamingIE, +) +from .gfycat import GfycatIE +from .giantbomb import GiantBombIE +from .giga import GigaIE +from .glide import GlideIE +from .globo import ( + GloboIE, + GloboArticleIE, +) +from .go import GoIE +from .godtube import GodTubeIE +from .gofile import GofileIE +from .golem import GolemIE +from .goodgame import GoodGameIE +from .googledrive import ( + GoogleDriveIE, + GoogleDriveFolderIE, +) +from .googlepodcasts import ( + GooglePodcastsIE, + GooglePodcastsFeedIE, +) +from .googlesearch import GoogleSearchIE +from .gopro import GoProIE +from .goshgay import GoshgayIE +from .gotostage import GoToStageIE +from .gputechconf import GPUTechConfIE +from .gronkh import ( + GronkhIE, + GronkhFeedIE, + GronkhVodsIE +) +from .groupon import GrouponIE +from .hbo import HBOIE +from .hearthisat import HearThisAtIE +from .heise import HeiseIE +from .hellporno import HellPornoIE +from .helsinki import HelsinkiIE +from .hentaistigma import HentaiStigmaIE +from .hgtv import HGTVComShowIE +from .hketv import HKETVIE +from .hidive import HiDiveIE +from .historicfilms import HistoricFilmsIE +from .hitbox import HitboxIE, HitboxLiveIE +from .hitrecord import HitRecordIE +from .hotnewhiphop import HotNewHipHopIE +from .hotstar import ( + HotStarIE, + HotStarPrefixIE, + HotStarPlaylistIE, + HotStarSeriesIE, +) +from .howcast import HowcastIE +from .howstuffworks import HowStuffWorksIE +from .hrfensehen import HRFernsehenIE +from .hrti import ( + HRTiIE, + HRTiPlaylistIE, +) +from .hse import ( + HSEShowIE, + HSEProductIE, +) +from .huajiao import HuajiaoIE +from .huya import HuyaLiveIE +from .huffpost import HuffPostIE +from .hungama import ( + HungamaIE, + HungamaSongIE, + HungamaAlbumPlaylistIE, +) +from .hypem import HypemIE +from .icareus import IcareusIE +from .ichinanalive import ( + IchinanaLiveIE, + IchinanaLiveClipIE, +) +from .ign import ( + IGNIE, + IGNVideoIE, + IGNArticleIE, +) +from .iheart import ( + IHeartRadioIE, + IHeartRadioPodcastIE, +) +from .imdb import ( + ImdbIE, + ImdbListIE +) +from .imgur import ( + ImgurIE, + ImgurAlbumIE, + ImgurGalleryIE, +) +from .ina import InaIE +from .inc import IncIE +from .indavideo import IndavideoEmbedIE +from .infoq import InfoQIE +from .instagram import ( + InstagramIE, + InstagramIOSIE, + InstagramUserIE, + InstagramTagIE, + InstagramStoryIE, +) +from .internazionale import InternazionaleIE +from .internetvideoarchive import InternetVideoArchiveIE +from .iprima import ( + IPrimaIE, + IPrimaCNNIE +) +from .iqiyi import ( + IqiyiIE, + IqIE, + IqAlbumIE +) +from .itprotv import ( + ITProTVIE, + ITProTVCourseIE +) +from .itv import ( + ITVIE, + ITVBTCCIE, +) +from .ivi import ( + IviIE, + IviCompilationIE +) +from .ivideon import IvideonIE +from .iwara import ( + IwaraIE, + IwaraPlaylistIE, + IwaraUserIE, +) +from .ixigua import IxiguaIE +from .izlesene import IzleseneIE +from .jable import ( + JableIE, + JablePlaylistIE, +) +from .jamendo import ( + JamendoIE, + JamendoAlbumIE, +) +from .jeuxvideo import JeuxVideoIE +from .jove import JoveIE +from .joj import JojIE +from .jwplatform import JWPlatformIE +from .kakao import KakaoIE +from .kaltura import KalturaIE +from .karaoketv import KaraoketvIE +from .karrierevideos import KarriereVideosIE +from .keezmovies import KeezMoviesIE +from .kelbyone import KelbyOneIE +from .ketnet import KetnetIE +from .khanacademy import ( + KhanAcademyIE, + KhanAcademyUnitIE, +) +from .kicker import KickerIE +from .kickstarter import KickStarterIE +from .kinja import KinjaEmbedIE +from .kinopoisk import KinoPoiskIE +from .konserthusetplay import KonserthusetPlayIE +from .koo import KooIE +from .kth import KTHIE +from .krasview import KrasViewIE +from .ku6 import Ku6IE +from .kusi import KUSIIE +from .kuwo import ( + KuwoIE, + KuwoAlbumIE, + KuwoChartIE, + KuwoSingerIE, + KuwoCategoryIE, + KuwoMvIE, +) +from .la7 import ( + LA7IE, + LA7PodcastEpisodeIE, + LA7PodcastIE, +) +from .laola1tv import ( + Laola1TvEmbedIE, + Laola1TvIE, + EHFTVIE, + ITTFIE, +) +from .lastfm import ( + LastFMIE, + LastFMPlaylistIE, + LastFMUserIE, +) +from .lbry import ( + LBRYIE, + LBRYChannelIE, +) +from .lci import LCIIE +from .lcp import ( + LcpPlayIE, + LcpIE, +) +from .lecture2go import Lecture2GoIE +from .lecturio import ( + LecturioIE, + LecturioCourseIE, + LecturioDeCourseIE, +) +from .leeco import ( + LeIE, + LePlaylistIE, + LetvCloudIE, +) +from .lego import LEGOIE +from .lemonde import LemondeIE +from .lenta import LentaIE +from .libraryofcongress import LibraryOfCongressIE +from .libsyn import LibsynIE +from .lifenews import ( + LifeNewsIE, + LifeEmbedIE, +) +from .likee import ( + LikeeIE, + LikeeUserIE +) +from .limelight import ( + LimelightMediaIE, + LimelightChannelIE, + LimelightChannelListIE, +) +from .line import ( + LineLiveIE, + LineLiveChannelIE, +) +from .linkedin import ( + LinkedInIE, + LinkedInLearningIE, + LinkedInLearningCourseIE, +) +from .linuxacademy import LinuxAcademyIE +from .litv import LiTVIE +from .livejournal import LiveJournalIE +from .livestream import ( + LivestreamIE, + LivestreamOriginalIE, + LivestreamShortenerIE, +) +from .lnkgo import ( + LnkGoIE, + LnkIE, +) +from .localnews8 import LocalNews8IE +from .lovehomeporn import LoveHomePornIE +from .lrt import ( + LRTVODIE, + LRTStreamIE +) +from .lynda import ( + LyndaIE, + LyndaCourseIE +) +from .m6 import M6IE +from .magentamusik360 import MagentaMusik360IE +from .mailru import ( + MailRuIE, + MailRuMusicIE, + MailRuMusicSearchIE, +) +from .mainstreaming import MainStreamingIE +from .malltv import MallTVIE +from .mangomolo import ( + MangomoloVideoIE, + MangomoloLiveIE, +) +from .manoto import ( + ManotoTVIE, + ManotoTVShowIE, + ManotoTVLiveIE, +) +from .manyvids import ManyVidsIE +from .maoritv import MaoriTVIE +from .markiza import ( + MarkizaIE, + MarkizaPageIE, +) +from .massengeschmacktv import MassengeschmackTVIE +from .masters import MastersIE +from .matchtv import MatchTVIE +from .mdr import MDRIE +from .medaltv import MedalTVIE +from .mediaite import MediaiteIE +from .mediaklikk import MediaKlikkIE +from .mediaset import ( + MediasetIE, + MediasetShowIE, +) +from .mediasite import ( + MediasiteIE, + MediasiteCatalogIE, + MediasiteNamedCatalogIE, +) +from .medici import MediciIE +from .megaphone import MegaphoneIE +from .meipai import MeipaiIE +from .melonvod import MelonVODIE +from .meta import METAIE +from .metacafe import MetacafeIE +from .metacritic import MetacriticIE +from .mgoon import MgoonIE +from .mgtv import MGTVIE +from .miaopai import MiaoPaiIE +from .microsoftstream import MicrosoftStreamIE +from .microsoftvirtualacademy import ( + MicrosoftVirtualAcademyIE, + MicrosoftVirtualAcademyCourseIE, +) +from .mildom import ( + MildomIE, + MildomVodIE, + MildomClipIE, + MildomUserVodIE, +) +from .minds import ( + MindsIE, + MindsChannelIE, + MindsGroupIE, +) +from .ministrygrid import MinistryGridIE +from .minoto import MinotoIE +from .miomio import MioMioIE +from .mirrativ import ( + MirrativIE, + MirrativUserIE, +) +from .mirrorcouk import MirrorCoUKIE +from .mit import TechTVMITIE, OCWMITIE +from .mitele import MiTeleIE +from .mixch import ( + MixchIE, + MixchArchiveIE, +) +from .mixcloud import ( + MixcloudIE, + MixcloudUserIE, + MixcloudPlaylistIE, +) +from .mlb import ( + MLBIE, + MLBVideoIE, +) +from .mlssoccer import MLSSoccerIE +from .mnet import MnetIE +from .moevideo import MoeVideoIE +from .mofosex import ( + MofosexIE, + MofosexEmbedIE, +) +from .mojvideo import MojvideoIE +from .morningstar import MorningstarIE +from .motherless import ( + MotherlessIE, + MotherlessGroupIE +) +from .motorsport import MotorsportIE +from .movieclips import MovieClipsIE +from .moviepilot import MoviepilotIE +from .moviezine import MoviezineIE +from .movingimage import MovingImageIE +from .msn import MSNIE +from .mtv import ( + MTVIE, + MTVVideoIE, + MTVServicesEmbeddedIE, + MTVDEIE, + MTVJapanIE, + MTVItaliaIE, + MTVItaliaProgrammaIE, +) +from .muenchentv import MuenchenTVIE +from .murrtube import MurrtubeIE, MurrtubeUserIE +from .musescore import MuseScoreIE +from .musicdex import ( + MusicdexSongIE, + MusicdexAlbumIE, + MusicdexArtistIE, + MusicdexPlaylistIE, +) +from .mwave import MwaveIE, MwaveMeetGreetIE +from .mxplayer import ( + MxplayerIE, + MxplayerShowIE, +) +from .mychannels import MyChannelsIE +from .myspace import MySpaceIE, MySpaceAlbumIE +from .myspass import MySpassIE +from .myvi import ( + MyviIE, + MyviEmbedIE, +) +from .myvideoge import MyVideoGeIE +from .myvidster import MyVidsterIE +from .n1 import ( + N1InfoAssetIE, + N1InfoIIE, +) +from .nate import ( + NateIE, + NateProgramIE, +) +from .nationalgeographic import ( + NationalGeographicVideoIE, + NationalGeographicTVIE, +) +from .naver import ( + NaverIE, + NaverLiveIE, + NaverNowIE, +) +from .nba import ( + NBAWatchEmbedIE, + NBAWatchIE, + NBAWatchCollectionIE, + NBAEmbedIE, + NBAIE, + NBAChannelIE, +) +from .nbc import ( + NBCIE, + NBCNewsIE, + NBCOlympicsIE, + NBCOlympicsStreamIE, + NBCSportsIE, + NBCSportsStreamIE, + NBCSportsVPlayerIE, +) +from .ndr import ( + NDRIE, + NJoyIE, + NDREmbedBaseIE, + NDREmbedIE, + NJoyEmbedIE, +) +from .ndtv import NDTVIE +from .nebula import ( + NebulaIE, + NebulaSubscriptionsIE, + NebulaChannelIE, +) +from .nerdcubed import NerdCubedFeedIE +from .netzkino import NetzkinoIE +from .neteasemusic import ( + NetEaseMusicIE, + NetEaseMusicAlbumIE, + NetEaseMusicSingerIE, + NetEaseMusicListIE, + NetEaseMusicMvIE, + NetEaseMusicProgramIE, + NetEaseMusicDjRadioIE, +) +from .netverse import ( + NetverseIE, + NetversePlaylistIE, +) +from .newgrounds import ( + NewgroundsIE, + NewgroundsPlaylistIE, + NewgroundsUserIE, +) +from .newstube import NewstubeIE +from .newsy import NewsyIE +from .nextmedia import ( + NextMediaIE, + NextMediaActionNewsIE, + AppleDailyIE, + NextTVIE, +) +from .nexx import ( + NexxIE, + NexxEmbedIE, +) +from .nfb import NFBIE +from .nfhsnetwork import NFHSNetworkIE +from .nfl import ( + NFLIE, + NFLArticleIE, +) +from .nhk import ( + NhkVodIE, + NhkVodProgramIE, + NhkForSchoolBangumiIE, + NhkForSchoolSubjectIE, + NhkForSchoolProgramListIE, +) +from .nhl import NHLIE +from .nick import ( + NickIE, + NickBrIE, + NickDeIE, + NickNightIE, + NickRuIE, +) +from .niconico import ( + NiconicoIE, + NiconicoPlaylistIE, + NiconicoUserIE, + NiconicoSeriesIE, + NiconicoHistoryIE, + NicovideoSearchDateIE, + NicovideoSearchIE, + NicovideoSearchURLIE, + NicovideoTagURLIE, +) +from .ninecninemedia import ( + NineCNineMediaIE, + CPTwentyFourIE, +) +from .ninegag import NineGagIE +from .ninenow import NineNowIE +from .nintendo import NintendoIE +from .nitter import NitterIE +from .njpwworld import NJPWWorldIE +from .nobelprize import NobelPrizeIE +from .nonktube import NonkTubeIE +from .noodlemagazine import NoodleMagazineIE +from .noovo import NoovoIE +from .normalboots import NormalbootsIE +from .nosvideo import NosVideoIE +from .nova import ( + NovaEmbedIE, + NovaIE, +) +from .novaplay import NovaPlayIE +from .nowness import ( + NownessIE, + NownessPlaylistIE, + NownessSeriesIE, +) +from .noz import NozIE +from .npo import ( + AndereTijdenIE, + NPOIE, + NPOLiveIE, + NPORadioIE, + NPORadioFragmentIE, + SchoolTVIE, + HetKlokhuisIE, + VPROIE, + WNLIE, +) +from .npr import NprIE +from .nrk import ( + NRKIE, + NRKPlaylistIE, + NRKSkoleIE, + NRKTVIE, + NRKTVDirekteIE, + NRKRadioPodkastIE, + NRKTVEpisodeIE, + NRKTVEpisodesIE, + NRKTVSeasonIE, + NRKTVSeriesIE, +) +from .nrl import NRLTVIE +from .ntvcojp import NTVCoJpCUIE +from .ntvde import NTVDeIE +from .ntvru import NTVRuIE +from .nytimes import ( + NYTimesIE, + NYTimesArticleIE, + NYTimesCookingIE, +) +from .nuvid import NuvidIE +from .nzherald import NZHeraldIE +from .nzz import NZZIE +from .odatv import OdaTVIE +from .odnoklassniki import OdnoklassnikiIE +from .oktoberfesttv import OktoberfestTVIE +from .olympics import OlympicsReplayIE +from .on24 import On24IE +from .ondemandkorea import OnDemandKoreaIE +from .onefootball import OneFootballIE +from .onet import ( + OnetIE, + OnetChannelIE, + OnetMVPIE, + OnetPlIE, +) +from .onionstudios import OnionStudiosIE +from .ooyala import ( + OoyalaIE, + OoyalaExternalIE, +) +from .opencast import ( + OpencastIE, + OpencastPlaylistIE, +) +from .openrec import ( + OpenRecIE, + OpenRecCaptureIE, + OpenRecMovieIE, +) +from .ora import OraTVIE +from .orf import ( + ORFTVthekIE, + ORFFM4IE, + ORFFM4StoryIE, + ORFOE1IE, + ORFOE3IE, + ORFNOEIE, + ORFWIEIE, + ORFBGLIE, + ORFOOEIE, + ORFSTMIE, + ORFKTNIE, + ORFSBGIE, + ORFTIRIE, + ORFVBGIE, + ORFIPTVIE, +) +from .outsidetv import OutsideTVIE +from .packtpub import ( + PacktPubIE, + PacktPubCourseIE, +) +from .palcomp3 import ( + PalcoMP3IE, + PalcoMP3ArtistIE, + PalcoMP3VideoIE, +) +from .pandoratv import PandoraTVIE +from .panopto import ( + PanoptoIE, + PanoptoListIE, + PanoptoPlaylistIE +) +from .paramountplus import ( + ParamountPlusIE, + ParamountPlusSeriesIE, +) +from .parliamentliveuk import ParliamentLiveUKIE +from .parlview import ParlviewIE +from .patreon import ( + PatreonIE, + PatreonUserIE +) +from .pbs import PBSIE +from .pearvideo import PearVideoIE +from .peekvids import PeekVidsIE, PlayVidsIE +from .peertube import ( + PeerTubeIE, + PeerTubePlaylistIE, +) +from .peertv import PeerTVIE +from .peloton import ( + PelotonIE, + PelotonLiveIE +) +from .people import PeopleIE +from .performgroup import PerformGroupIE +from .periscope import ( + PeriscopeIE, + PeriscopeUserIE, +) +from .philharmoniedeparis import PhilharmonieDeParisIE +from .phoenix import PhoenixIE +from .photobucket import PhotobucketIE +from .piapro import PiaproIE +from .picarto import ( + PicartoIE, + PicartoVodIE, +) +from .piksel import PikselIE +from .pinkbike import PinkbikeIE +from .pinterest import ( + PinterestIE, + PinterestCollectionIE, +) +from .pixivsketch import ( + PixivSketchIE, + PixivSketchUserIE, +) +from .pladform import PladformIE +from .planetmarathi import PlanetMarathiIE +from .platzi import ( + PlatziIE, + PlatziCourseIE, +) +from .playfm import PlayFMIE +from .playplustv import PlayPlusTVIE +from .plays import PlaysTVIE +from .playstuff import PlayStuffIE +from .playsuisse import PlaySuisseIE +from .playtvak import PlaytvakIE +from .playvid import PlayvidIE +from .playwire import PlaywireIE +from .plutotv import PlutoTVIE +from .pluralsight import ( + PluralsightIE, + PluralsightCourseIE, +) +from .podchaser import PodchaserIE +from .podomatic import PodomaticIE +from .pokemon import ( + PokemonIE, + PokemonWatchIE, +) +from .pokergo import ( + PokerGoIE, + PokerGoCollectionIE, +) +from .polsatgo import PolsatGoIE +from .polskieradio import ( + PolskieRadioIE, + PolskieRadioCategoryIE, + PolskieRadioPlayerIE, + PolskieRadioPodcastIE, + PolskieRadioPodcastListIE, + PolskieRadioRadioKierowcowIE, +) +from .popcorntimes import PopcorntimesIE +from .popcorntv import PopcornTVIE +from .porn91 import Porn91IE +from .porncom import PornComIE +from .pornflip import PornFlipIE +from .pornhd import PornHdIE +from .pornhub import ( + PornHubIE, + PornHubUserIE, + PornHubPlaylistIE, + PornHubPagedVideoListIE, + PornHubUserVideosUploadIE, +) +from .pornotube import PornotubeIE +from .pornovoisines import PornoVoisinesIE +from .pornoxo import PornoXOIE +from .pornez import PornezIE +from .puhutv import ( + PuhuTVIE, + PuhuTVSerieIE, +) +from .premiershiprugby import PremiershipRugbyIE +from .presstv import PressTVIE +from .projectveritas import ProjectVeritasIE +from .prosiebensat1 import ProSiebenSat1IE +from .prx import ( + PRXStoryIE, + PRXSeriesIE, + PRXAccountIE, + PRXStoriesSearchIE, + PRXSeriesSearchIE +) +from .puls4 import Puls4IE +from .pyvideo import PyvideoIE +from .qqmusic import ( + QQMusicIE, + QQMusicSingerIE, + QQMusicAlbumIE, + QQMusicToplistIE, + QQMusicPlaylistIE, +) +from .r7 import ( + R7IE, + R7ArticleIE, +) +from .radiko import RadikoIE, RadikoRadioIE +from .radiocanada import ( + RadioCanadaIE, + RadioCanadaAudioVideoIE, +) +from .radiode import RadioDeIE +from .radiojavan import RadioJavanIE +from .radiobremen import RadioBremenIE +from .radiofrance import FranceCultureIE, RadioFranceIE +from .radiozet import RadioZetPodcastIE +from .radiokapital import ( + RadioKapitalIE, + RadioKapitalShowIE, +) +from .radlive import ( + RadLiveIE, + RadLiveChannelIE, + RadLiveSeasonIE, +) +from .rai import ( + RaiPlayIE, + RaiPlayLiveIE, + RaiPlayPlaylistIE, + RaiPlaySoundIE, + RaiPlaySoundLiveIE, + RaiPlaySoundPlaylistIE, + RaiIE, +) +from .raywenderlich import ( + RayWenderlichIE, + RayWenderlichCourseIE, +) +from .rbmaradio import RBMARadioIE +from .rcs import ( + RCSIE, + RCSEmbedsIE, + RCSVariousIE, +) +from .rcti import ( + RCTIPlusIE, + RCTIPlusSeriesIE, + RCTIPlusTVIE, +) +from .rds import RDSIE +from .redbulltv import ( + RedBullTVIE, + RedBullEmbedIE, + RedBullTVRrnContentIE, + RedBullIE, +) +from .reddit import RedditIE +from .redgifs import ( + RedGifsIE, + RedGifsSearchIE, + RedGifsUserIE, +) +from .redtube import RedTubeIE +from .regiotv import RegioTVIE +from .rentv import ( + RENTVIE, + RENTVArticleIE, +) +from .restudy import RestudyIE +from .reuters import ReutersIE +from .reverbnation import ReverbNationIE +from .rice import RICEIE +from .rmcdecouverte import RMCDecouverteIE +from .rockstargames import RockstarGamesIE +from .rokfin import ( + RokfinIE, + RokfinStackIE, + RokfinChannelIE, + RokfinSearchIE, +) +from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE +from .rottentomatoes import RottenTomatoesIE +from .rozhlas import RozhlasIE +from .rtbf import RTBFIE +from .rte import RteIE, RteRadioIE +from .rtlnl import RtlNlIE +from .rtl2 import ( + RTL2IE, + RTL2YouIE, + RTL2YouSeriesIE, +) +from .rtnews import ( + RTNewsIE, + RTDocumentryIE, + RTDocumentryPlaylistIE, + RuptlyIE, +) +from .rtp import RTPIE +from .rtrfm import RTRFMIE +from .rts import RTSIE +from .rtve import ( + RTVEALaCartaIE, + RTVEAudioIE, + RTVELiveIE, + RTVEInfantilIE, + RTVETelevisionIE, +) +from .rtvnh import RTVNHIE +from .rtvs import RTVSIE +from .ruhd import RUHDIE +from .rule34video import Rule34VideoIE +from .rumble import ( + RumbleEmbedIE, + RumbleChannelIE, +) +from .rutube import ( + RutubeIE, + RutubeChannelIE, + RutubeEmbedIE, + RutubeMovieIE, + RutubePersonIE, + RutubePlaylistIE, + RutubeTagsIE, +) +from .glomex import ( + GlomexIE, + GlomexEmbedIE, +) +from .megatvcom import ( + MegaTVComIE, + MegaTVComEmbedIE, +) +from .ant1newsgr import ( + Ant1NewsGrWatchIE, + Ant1NewsGrArticleIE, + Ant1NewsGrEmbedIE, +) +from .rutv import RUTVIE +from .ruutu import RuutuIE +from .ruv import ( + RuvIE, + RuvSpilaIE +) +from .safari import ( + SafariIE, + SafariApiIE, + SafariCourseIE, +) +from .saitosan import SaitosanIE +from .samplefocus import SampleFocusIE +from .sapo import SapoIE +from .savefrom import SaveFromIE +from .sbs import SBSIE +from .screencast import ScreencastIE +from .screencastomatic import ScreencastOMaticIE +from .scrippsnetworks import ( + ScrippsNetworksWatchIE, + ScrippsNetworksIE, +) +from .scte import ( + SCTEIE, + SCTECourseIE, +) +from .seeker import SeekerIE +from .senategov import SenateISVPIE, SenateGovIE +from .sendtonews import SendtoNewsIE +from .servus import ServusIE +from .sevenplus import SevenPlusIE +from .sexu import SexuIE +from .seznamzpravy import ( + SeznamZpravyIE, + SeznamZpravyArticleIE, +) +from .shahid import ( + ShahidIE, + ShahidShowIE, +) +from .shared import ( + SharedIE, + VivoIE, +) +from .shemaroome import ShemarooMeIE +from .showroomlive import ShowRoomLiveIE +from .simplecast import ( + SimplecastIE, + SimplecastEpisodeIE, + SimplecastPodcastIE, +) +from .sina import SinaIE +from .sixplay import SixPlayIE +from .skeb import SkebIE +from .skyit import ( + SkyItPlayerIE, + SkyItVideoIE, + SkyItVideoLiveIE, + SkyItIE, + SkyItAcademyIE, + SkyItArteIE, + CieloTVItIE, + TV8ItIE, +) +from .skylinewebcams import SkylineWebcamsIE +from .skynewsarabia import ( + SkyNewsArabiaIE, + SkyNewsArabiaArticleIE, +) +from .skynewsau import SkyNewsAUIE +from .sky import ( + SkyNewsIE, + SkyNewsStoryIE, + SkySportsIE, + SkySportsNewsIE, +) +from .slideshare import SlideshareIE +from .slideslive import SlidesLiveIE +from .slutload import SlutloadIE +from .snotr import SnotrIE +from .sohu import SohuIE +from .sonyliv import ( + SonyLIVIE, + SonyLIVSeriesIE, +) +from .soundcloud import ( + SoundcloudEmbedIE, + SoundcloudIE, + SoundcloudSetIE, + SoundcloudRelatedIE, + SoundcloudUserIE, + SoundcloudTrackStationIE, + SoundcloudPlaylistIE, + SoundcloudSearchIE, +) +from .soundgasm import ( + SoundgasmIE, + SoundgasmProfileIE +) +from .southpark import ( + SouthParkIE, + SouthParkDeIE, + SouthParkDkIE, + SouthParkEsIE, + SouthParkLatIE, + SouthParkNlIE +) +from .sovietscloset import ( + SovietsClosetIE, + SovietsClosetPlaylistIE +) +from .spankbang import ( + SpankBangIE, + SpankBangPlaylistIE, +) +from .spankwire import SpankwireIE +from .spiegel import SpiegelIE +from .spike import ( + BellatorIE, + ParamountNetworkIE, +) +from .stitcher import ( + StitcherIE, + StitcherShowIE, +) +from .sport5 import Sport5IE +from .sportbox import SportBoxIE +from .sportdeutschland import SportDeutschlandIE +from .spotify import ( + SpotifyIE, + SpotifyShowIE, +) +from .spreaker import ( + SpreakerIE, + SpreakerPageIE, + SpreakerShowIE, + SpreakerShowPageIE, +) +from .springboardplatform import SpringboardPlatformIE +from .sprout import SproutIE +from .srgssr import ( + SRGSSRIE, + SRGSSRPlayIE, +) +from .srmediathek import SRMediathekIE +from .stanfordoc import StanfordOpenClassroomIE +from .startv import StarTVIE +from .steam import SteamIE +from .storyfire import ( + StoryFireIE, + StoryFireUserIE, + StoryFireSeriesIE, +) +from .streamable import StreamableIE +from .streamanity import StreamanityIE +from .streamcloud import StreamcloudIE +from .streamcz import StreamCZIE +from .streamff import StreamFFIE +from .streetvoice import StreetVoiceIE +from .stretchinternet import StretchInternetIE +from .stripchat import StripchatIE +from .stv import STVPlayerIE +from .substack import SubstackIE +from .sunporno import SunPornoIE +from .sverigesradio import ( + SverigesRadioEpisodeIE, + SverigesRadioPublicationIE, +) +from .svt import ( + SVTIE, + SVTPageIE, + SVTPlayIE, + SVTSeriesIE, +) +from .swrmediathek import SWRMediathekIE +from .syfy import SyfyIE +from .sztvhu import SztvHuIE +from .tagesschau import TagesschauIE +from .tass import TassIE +from .tbs import TBSIE +from .tdslifeway import TDSLifewayIE +from .teachable import ( + TeachableIE, + TeachableCourseIE, +) +from .teachertube import ( + TeacherTubeIE, + TeacherTubeUserIE, +) +from .teachingchannel import TeachingChannelIE +from .teamcoco import TeamcocoIE +from .teamtreehouse import TeamTreeHouseIE +from .techtalks import TechTalksIE +from .ted import ( + TedEmbedIE, + TedPlaylistIE, + TedSeriesIE, + TedTalkIE, +) +from .tele5 import Tele5IE +from .tele13 import Tele13IE +from .telebruxelles import TeleBruxellesIE +from .telecinco import TelecincoIE +from .telegraaf import TelegraafIE +from .telegram import TelegramEmbedIE +from .telemb import TeleMBIE +from .telemundo import TelemundoIE +from .telequebec import ( + TeleQuebecIE, + TeleQuebecSquatIE, + TeleQuebecEmissionIE, + TeleQuebecLiveIE, + TeleQuebecVideoIE, +) +from .teletask import TeleTaskIE +from .telewebion import TelewebionIE +from .tennistv import TennisTVIE +from .tenplay import TenPlayIE +from .testurl import TestURLIE +from .tf1 import TF1IE +from .tfo import TFOIE +from .theintercept import TheInterceptIE +from .theplatform import ( + ThePlatformIE, + ThePlatformFeedIE, +) +from .thestar import TheStarIE +from .thesun import TheSunIE +from .theta import ( + ThetaVideoIE, + ThetaStreamIE, +) +from .theweatherchannel import TheWeatherChannelIE +from .thisamericanlife import ThisAmericanLifeIE +from .thisav import ThisAVIE +from .thisoldhouse import ThisOldHouseIE +from .threespeak import ( + ThreeSpeakIE, + ThreeSpeakUserIE, +) +from .threeqsdn import ThreeQSDNIE +from .tiktok import ( + TikTokIE, + TikTokUserIE, + TikTokSoundIE, + TikTokEffectIE, + TikTokTagIE, + TikTokVMIE, + DouyinIE, +) +from .tinypic import TinyPicIE +from .tmz import TMZIE +from .tnaflix import ( + TNAFlixNetworkEmbedIE, + TNAFlixIE, + EMPFlixIE, + MovieFapIE, +) +from .toggle import ( + ToggleIE, + MeWatchIE, +) +from .toggo import ( + ToggoIE, +) +from .tokentube import ( + TokentubeIE, + TokentubeChannelIE +) +from .tonline import TOnlineIE +from .toongoggles import ToonGogglesIE +from .toutv import TouTvIE +from .toypics import ToypicsUserIE, ToypicsIE +from .traileraddict import TrailerAddictIE +from .trilulilu import TriluliluIE +from .trovo import ( + TrovoIE, + TrovoVodIE, + TrovoChannelVodIE, + TrovoChannelClipIE, +) +from .trueid import TrueIDIE +from .trunews import TruNewsIE +from .trutv import TruTVIE +from .tube8 import Tube8IE +from .tubitv import ( + TubiTvIE, + TubiTvShowIE, +) +from .tumblr import TumblrIE +from .tunein import ( + TuneInClipIE, + TuneInStationIE, + TuneInProgramIE, + TuneInTopicIE, + TuneInShortenerIE, +) +from .tunepk import TunePkIE +from .turbo import TurboIE +from .tv2 import ( + TV2IE, + TV2ArticleIE, + KatsomoIE, + MTVUutisetArticleIE, +) +from .tv2dk import ( + TV2DKIE, + TV2DKBornholmPlayIE, +) +from .tv2hu import ( + TV2HuIE, + TV2HuSeriesIE, +) +from .tv4 import TV4IE +from .tv5mondeplus import TV5MondePlusIE +from .tv5unis import ( + TV5UnisVideoIE, + TV5UnisIE, +) +from .tva import ( + TVAIE, + QubIE, +) +from .tvanouvelles import ( + TVANouvellesIE, + TVANouvellesArticleIE, +) +from .tvc import ( + TVCIE, + TVCArticleIE, +) +from .tver import TVerIE +from .tvigle import TvigleIE +from .tvland import TVLandIE +from .tvn24 import TVN24IE +from .tvnet import TVNetIE +from .tvnoe import TVNoeIE +from .tvnow import ( + TVNowIE, + TVNowFilmIE, + TVNowNewIE, + TVNowSeasonIE, + TVNowAnnualIE, + TVNowShowIE, +) +from .tvopengr import ( + TVOpenGrWatchIE, + TVOpenGrEmbedIE, +) +from .tvp import ( + TVPEmbedIE, + TVPIE, + TVPStreamIE, + TVPWebsiteIE, +) +from .tvplay import ( + TVPlayIE, + ViafreeIE, + TVPlayHomeIE, +) +from .tvplayer import TVPlayerIE +from .tweakers import TweakersIE +from .twentyfourvideo import TwentyFourVideoIE +from .twentymin import TwentyMinutenIE +from .twentythreevideo import TwentyThreeVideoIE +from .twitcasting import ( + TwitCastingIE, + TwitCastingLiveIE, + TwitCastingUserIE, +) +from .twitch import ( + TwitchVodIE, + TwitchCollectionIE, + TwitchVideosIE, + TwitchVideosClipsIE, + TwitchVideosCollectionsIE, + TwitchStreamIE, + TwitchClipsIE, +) +from .twitter import ( + TwitterCardIE, + TwitterIE, + TwitterAmplifyIE, + TwitterBroadcastIE, + TwitterShortenerIE, +) +from .udemy import ( + UdemyIE, + UdemyCourseIE +) +from .udn import UDNEmbedIE +from .ufctv import ( + UFCTVIE, + UFCArabiaIE, +) +from .ukcolumn import UkColumnIE +from .uktvplay import UKTVPlayIE +from .digiteka import DigitekaIE +from .dlive import ( + DLiveVODIE, + DLiveStreamIE, +) +from .drooble import DroobleIE +from .umg import UMGDeIE +from .unistra import UnistraIE +from .unity import UnityIE +from .uol import UOLIE +from .uplynk import ( + UplynkIE, + UplynkPreplayIE, +) +from .urort import UrortIE +from .urplay import URPlayIE +from .usanetwork import USANetworkIE +from .usatoday import USATodayIE +from .ustream import UstreamIE, UstreamChannelIE +from .ustudio import ( + UstudioIE, + UstudioEmbedIE, +) +from .utreon import UtreonIE +from .varzesh3 import Varzesh3IE +from .vbox7 import Vbox7IE +from .veehd import VeeHDIE +from .veo import VeoIE +from .veoh import VeohIE +from .vesti import VestiIE +from .vevo import ( + VevoIE, + VevoPlaylistIE, +) +from .vgtv import ( + BTArticleIE, + BTVestlendingenIE, + VGTVIE, +) +from .vh1 import VH1IE +from .vice import ( + ViceIE, + ViceArticleIE, + ViceShowIE, +) +from .vidbit import VidbitIE +from .viddler import ViddlerIE +from .videa import VideaIE +from .videocampus_sachsen import VideocampusSachsenIE +from .videodetective import VideoDetectiveIE +from .videofyme import VideofyMeIE +from .videomore import ( + VideomoreIE, + VideomoreVideoIE, + VideomoreSeasonIE, +) +from .videopress import VideoPressIE +from .vidio import ( + VidioIE, + VidioPremierIE, + VidioLiveIE +) +from .vidlii import VidLiiIE +from .vier import VierIE, VierVideosIE +from .viewlift import ( + ViewLiftIE, + ViewLiftEmbedIE, +) +from .viidea import ViideaIE +from .vimeo import ( + VimeoIE, + VimeoAlbumIE, + VimeoChannelIE, + VimeoGroupsIE, + VimeoLikesIE, + VimeoOndemandIE, + VimeoReviewIE, + VimeoUserIE, + VimeoWatchLaterIE, + VHXEmbedIE, +) +from .vimm import ( + VimmIE, + VimmRecordingIE, +) +from .vimple import VimpleIE +from .vine import ( + VineIE, + VineUserIE, +) +from .viki import ( + VikiIE, + VikiChannelIE, +) +from .viqeo import ViqeoIE +from .viu import ( + ViuIE, + ViuPlaylistIE, + ViuOTTIE, +) +from .vk import ( + VKIE, + VKUserVideosIE, + VKWallPostIE, +) +from .vlive import ( + VLiveIE, + VLivePostIE, + VLiveChannelIE, +) +from .vodlocker import VodlockerIE +from .vodpl import VODPlIE +from .vodplatform import VODPlatformIE +from .voicerepublic import VoiceRepublicIE +from .voicy import ( + VoicyIE, + VoicyChannelIE, +) +from .voot import ( + VootIE, + VootSeriesIE, +) +from .voxmedia import ( + VoxMediaVolumeIE, + VoxMediaIE, +) +from .vrt import VRTIE +from .vrak import VrakIE +from .vrv import ( + VRVIE, + VRVSeriesIE, +) +from .vshare import VShareIE +from .vtm import VTMIE +from .medialaan import MedialaanIE +from .vuclip import VuClipIE +from .vupload import VuploadIE +from .vvvvid import ( + VVVVIDIE, + VVVVIDShowIE, +) +from .vyborymos import VyboryMosIE +from .vzaar import VzaarIE +from .wakanim import WakanimIE +from .walla import WallaIE +from .washingtonpost import ( + WashingtonPostIE, + WashingtonPostArticleIE, +) +from .wasdtv import ( + WASDTVStreamIE, + WASDTVRecordIE, + WASDTVClipIE, +) +from .wat import WatIE +from .watchbox import WatchBoxIE +from .watchindianporn import WatchIndianPornIE +from .wdr import ( + WDRIE, + WDRPageIE, + WDRElefantIE, + WDRMobileIE, +) +from .webcaster import ( + WebcasterIE, + WebcasterFeedIE, +) +from .webofstories import ( + WebOfStoriesIE, + WebOfStoriesPlaylistIE, +) +from .weibo import ( + WeiboIE, + WeiboMobileIE +) +from .weiqitv import WeiqiTVIE +from .willow import WillowIE +from .wimtv import WimTVIE +from .whowatch import WhoWatchIE +from .wistia import ( + WistiaIE, + WistiaPlaylistIE, +) +from .worldstarhiphop import WorldStarHipHopIE +from .wppilot import ( + WPPilotIE, + WPPilotChannelsIE, +) +from .wsj import ( + WSJIE, + WSJArticleIE, +) +from .wwe import WWEIE +from .xbef import XBefIE +from .xboxclips import XboxClipsIE +from .xfileshare import XFileShareIE +from .xhamster import ( + XHamsterIE, + XHamsterEmbedIE, + XHamsterUserIE, +) +from .xiami import ( + XiamiSongIE, + XiamiAlbumIE, + XiamiArtistIE, + XiamiCollectionIE +) +from .ximalaya import ( + XimalayaIE, + XimalayaAlbumIE +) +from .xinpianchang import XinpianchangIE +from .xminus import XMinusIE +from .xnxx import XNXXIE +from .xstream import XstreamIE +from .xtube import XTubeUserIE, XTubeIE +from .xuite import XuiteIE +from .xvideos import XVideosIE +from .xxxymovies import XXXYMoviesIE +from .yahoo import ( + YahooIE, + YahooSearchIE, + YahooGyaOPlayerIE, + YahooGyaOIE, + YahooJapanNewsIE, +) +from .yandexdisk import YandexDiskIE +from .yandexmusic import ( + YandexMusicTrackIE, + YandexMusicAlbumIE, + YandexMusicPlaylistIE, + YandexMusicArtistTracksIE, + YandexMusicArtistAlbumsIE, +) +from .yandexvideo import ( + YandexVideoIE, + YandexVideoPreviewIE, + ZenYandexIE, + ZenYandexChannelIE, +) +from .yapfiles import YapFilesIE +from .yesjapan import YesJapanIE +from .yinyuetai import YinYueTaiIE +from .ynet import YnetIE +from .youjizz import YouJizzIE +from .youku import ( + YoukuIE, + YoukuShowIE, +) +from .younow import ( + YouNowLiveIE, + YouNowChannelIE, + YouNowMomentIE, +) +from .youporn import YouPornIE +from .yourporn import YourPornIE +from .yourupload import YourUploadIE +from .youtube import ( + YoutubeIE, + YoutubeClipIE, + YoutubeFavouritesIE, + YoutubeNotificationsIE, + YoutubeHistoryIE, + YoutubeTabIE, + YoutubeLivestreamEmbedIE, + YoutubePlaylistIE, + YoutubeRecommendedIE, + YoutubeSearchDateIE, + YoutubeSearchIE, + YoutubeSearchURLIE, + YoutubeMusicSearchURLIE, + YoutubeSubscriptionsIE, + YoutubeStoriesIE, + YoutubeTruncatedIDIE, + YoutubeTruncatedURLIE, + YoutubeYtBeIE, + YoutubeYtUserIE, + YoutubeWatchLaterIE, +) +from .zapiks import ZapiksIE +from .zattoo import ( + BBVTVIE, + EinsUndEinsTVIE, + EWETVIE, + GlattvisionTVIE, + MNetTVIE, + NetPlusIE, + OsnatelTVIE, + QuantumTVIE, + SaltTVIE, + SAKTVIE, + VTXTVIE, + WalyTVIE, + ZattooIE, + ZattooLiveIE, + ZattooMoviesIE, + ZattooRecordingsIE, +) +from .zdf import ZDFIE, ZDFChannelIE +from .zee5 import ( + Zee5IE, + Zee5SeriesIE, +) +from .zhihu import ZhihuIE +from .zingmp3 import ( + ZingMp3IE, + ZingMp3AlbumIE, + ZingMp3ChartHomeIE, + ZingMp3WeekChartIE, + ZingMp3ChartMusicVideoIE, + ZingMp3UserIE, +) +from .zoom import ZoomIE +from .zype import ZypeIE diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index 1b9deeae8..a75efdd0f 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -7,16 +7,17 @@ import json import re import struct import time +import urllib.parse +import urllib.request import urllib.response import uuid from .common import InfoExtractor from ..aes import aes_ecb_decrypt -from ..compat import compat_urllib_parse_urlparse, compat_urllib_request from ..utils import ( ExtractorError, bytes_to_intlist, - decode_base, + decode_base_n, int_or_none, intlist_to_bytes, request_to_url, @@ -33,7 +34,7 @@ def add_opener(ydl, handler): ''' Add a handler for opening URLs, like _download_webpage ''' # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 - assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector) + assert isinstance(ydl._opener, urllib.request.OpenerDirector) ydl._opener.add_handler(handler) @@ -46,7 +47,7 @@ def remove_opener(ydl, handler): # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 opener = ydl._opener - assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector) + assert isinstance(ydl._opener, urllib.request.OpenerDirector) if isinstance(handler, (type, tuple)): find_cp = lambda x: isinstance(x, handler) else: @@ -96,7 +97,7 @@ def remove_opener(ydl, handler): opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)] -class AbemaLicenseHandler(compat_urllib_request.BaseHandler): +class AbemaLicenseHandler(urllib.request.BaseHandler): handler_order = 499 STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E' @@ -109,7 +110,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler): self.ie = ie def _get_videokey_from_ticket(self, ticket): - to_show = self.ie._downloader.params.get('verbose', False) + to_show = self.ie.get_param('verbose', False) media_token = self.ie._get_media_token(to_show=to_show) license_response = self.ie._download_json( @@ -123,7 +124,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler): 'Content-Type': 'application/json', }) - res = decode_base(license_response['k'], self.STRTABLE) + res = decode_base_n(license_response['k'], table=self.STRTABLE) encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) h = hmac.new( @@ -136,7 +137,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler): def abematv_license_open(self, url): url = request_to_url(url) - ticket = compat_urllib_parse_urlparse(url).netloc + ticket = urllib.parse.urlparse(url).netloc response_data = self._get_videokey_from_ticket(ticket) return urllib.response.addinfourl(io.BytesIO(response_data), headers={ 'Content-Length': len(response_data), diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index a8e6c4363..a2666c2b8 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -1,3 +1,4 @@ +import getpass import json import re import time @@ -5,19 +6,15 @@ import urllib.error import xml.etree.ElementTree as etree from .common import InfoExtractor -from ..compat import ( - compat_urlparse, - compat_getpass -) +from ..compat import compat_urlparse from ..utils import ( + NO_DEFAULT, + ExtractorError, unescapeHTML, - urlencode_postdata, unified_timestamp, - ExtractorError, - NO_DEFAULT, + urlencode_postdata, ) - MSO_INFO = { 'DTV': { 'name': 'DIRECTV', @@ -1431,7 +1428,7 @@ class AdobePassIE(InfoExtractor): guid = xml_text(resource, 'guid') if '<' in resource else resource count = 0 while count < 2: - requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {} + requestor_info = self.cache.load(self._MVPD_CACHE, requestor_id) or {} authn_token = requestor_info.get('authn_token') if authn_token and is_expired(authn_token, 'simpleTokenExpires'): authn_token = None @@ -1506,7 +1503,7 @@ class AdobePassIE(InfoExtractor): 'send_confirm_link': False, 'send_token': True })) - philo_code = compat_getpass('Type auth code you have received [Return]: ') + philo_code = getpass.getpass('Type auth code you have received [Return]: ') self._download_webpage( 'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({ 'token': philo_code @@ -1726,12 +1723,12 @@ class AdobePassIE(InfoExtractor): raise_mvpd_required() raise if '<pendingLogout' in session: - self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) + self.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue authn_token = unescapeHTML(xml_text(session, 'authnToken')) requestor_info['authn_token'] = authn_token - self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) + self.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) authz_token = requestor_info.get(guid) if authz_token and is_expired(authz_token, 'simpleTokenTTL'): @@ -1747,14 +1744,14 @@ class AdobePassIE(InfoExtractor): 'userMeta': '1', }), headers=mvpd_headers) if '<pendingLogout' in authorize: - self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) + self.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue if '<error' in authorize: raise ExtractorError(xml_text(authorize, 'details'), expected=True) authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) requestor_info[guid] = authz_token - self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) + self.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) mvpd_headers.update({ 'ap_19': xml_text(authn_token, 'simpleSamlNameID'), @@ -1770,7 +1767,7 @@ class AdobePassIE(InfoExtractor): 'hashed_guid': 'false', }), headers=mvpd_headers) if '<pendingLogout' in short_authorize: - self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) + self.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue return short_authorize diff --git a/yt_dlp/extractor/animelab.py b/yt_dlp/extractor/animelab.py deleted file mode 100644 index fe2b70aed..000000000 --- a/yt_dlp/extractor/animelab.py +++ /dev/null @@ -1,270 +0,0 @@ -from .common import InfoExtractor - -from ..utils import ( - ExtractorError, - urlencode_postdata, - int_or_none, - str_or_none, - determine_ext, -) - -from ..compat import compat_HTTPError - - -class AnimeLabBaseIE(InfoExtractor): - _LOGIN_URL = 'https://www.animelab.com/login' - _NETRC_MACHINE = 'animelab' - _LOGGED_IN = False - - def _is_logged_in(self, login_page=None): - if not self._LOGGED_IN: - if not login_page: - login_page = self._download_webpage(self._LOGIN_URL, None, 'Downloading login page') - AnimeLabBaseIE._LOGGED_IN = 'Sign In' not in login_page - return self._LOGGED_IN - - def _perform_login(self, username, password): - if self._is_logged_in(): - return - - login_form = { - 'email': username, - 'password': password, - } - - try: - response = self._download_webpage( - self._LOGIN_URL, None, 'Logging in', 'Wrong login info', - data=urlencode_postdata(login_form), - headers={'Content-Type': 'application/x-www-form-urlencoded'}) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: - raise ExtractorError('Unable to log in (wrong credentials?)', expected=True) - raise - - if not self._is_logged_in(response): - raise ExtractorError('Unable to login (cannot verify if logged in)') - - def _real_initialize(self): - if not self._is_logged_in(): - self.raise_login_required('Login is required to access any AnimeLab content') - - -class AnimeLabIE(AnimeLabBaseIE): - _VALID_URL = r'https?://(?:www\.)?animelab\.com/player/(?P<id>[^/]+)' - - _TEST = { - 'url': 'https://www.animelab.com/player/fullmetal-alchemist-brotherhood-episode-42', - 'md5': '05bde4b91a5d1ff46ef5b94df05b0f7f', - 'info_dict': { - 'id': '383', - 'ext': 'mp4', - 'display_id': 'fullmetal-alchemist-brotherhood-episode-42', - 'title': 'Fullmetal Alchemist: Brotherhood - Episode 42 - Signs of a Counteroffensive', - 'description': 'md5:103eb61dd0a56d3dfc5dbf748e5e83f4', - 'series': 'Fullmetal Alchemist: Brotherhood', - 'episode': 'Signs of a Counteroffensive', - 'episode_number': 42, - 'duration': 1469, - 'season': 'Season 1', - 'season_number': 1, - 'season_id': '38', - }, - 'params': { - # Ensure the same video is downloaded whether the user is premium or not - 'format': '[format_id=21711_yeshardsubbed_ja-JP][height=480]', - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - - # unfortunately we can get different URLs for the same formats - # e.g. if we are using a "free" account so no dubs available - # (so _remove_duplicate_formats is not effective) - # so we use a dictionary as a workaround - formats = {} - for language_option_url in ('https://www.animelab.com/player/%s/subtitles', - 'https://www.animelab.com/player/%s/dubbed'): - actual_url = language_option_url % display_id - webpage = self._download_webpage(actual_url, display_id, 'Downloading URL ' + actual_url) - - video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id) - position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position')) - - raw_data = video_collection[position]['videoEntry'] - - video_id = str_or_none(raw_data['id']) - - # create a title from many sources (while grabbing other info) - # TODO use more fallback sources to get some of these - series = raw_data.get('showTitle') - video_type = raw_data.get('videoEntryType', {}).get('name') - episode_number = raw_data.get('episodeNumber') - episode_name = raw_data.get('name') - - title_parts = (series, video_type, episode_number, episode_name) - if None not in title_parts: - title = '%s - %s %s - %s' % title_parts - else: - title = episode_name - - description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None) - - duration = int_or_none(raw_data.get('duration')) - - thumbnail_data = raw_data.get('images', []) - thumbnails = [] - for thumbnail in thumbnail_data: - for instance in thumbnail['imageInstances']: - image_data = instance.get('imageInfo', {}) - thumbnails.append({ - 'id': str_or_none(image_data.get('id')), - 'url': image_data.get('fullPath'), - 'width': image_data.get('width'), - 'height': image_data.get('height'), - }) - - season_data = raw_data.get('season', {}) or {} - season = str_or_none(season_data.get('name')) - season_number = int_or_none(season_data.get('seasonNumber')) - season_id = str_or_none(season_data.get('id')) - - for video_data in raw_data['videoList']: - current_video_list = {} - current_video_list['language'] = video_data.get('language', {}).get('languageCode') - - is_hardsubbed = video_data.get('hardSubbed') - - for video_instance in video_data['videoInstances']: - httpurl = video_instance.get('httpUrl') - url = httpurl if httpurl else video_instance.get('rtmpUrl') - if url is None: - # this video format is unavailable to the user (not premium etc.) - continue - - current_format = current_video_list.copy() - - format_id_parts = [] - - format_id_parts.append(str_or_none(video_instance.get('id'))) - - if is_hardsubbed is not None: - if is_hardsubbed: - format_id_parts.append('yeshardsubbed') - else: - format_id_parts.append('nothardsubbed') - - format_id_parts.append(current_format['language']) - - format_id = '_'.join([x for x in format_id_parts if x is not None]) - - ext = determine_ext(url) - if ext == 'm3u8': - for format_ in self._extract_m3u8_formats( - url, video_id, m3u8_id=format_id, fatal=False): - formats[format_['format_id']] = format_ - continue - elif ext == 'mpd': - for format_ in self._extract_mpd_formats( - url, video_id, mpd_id=format_id, fatal=False): - formats[format_['format_id']] = format_ - continue - - current_format['url'] = url - quality_data = video_instance.get('videoQuality') - if quality_data: - quality = quality_data.get('name') or quality_data.get('description') - else: - quality = None - - height = None - if quality: - height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None)) - - if height is None: - self.report_warning('Could not get height of video') - else: - current_format['height'] = height - current_format['format_id'] = format_id - - formats[current_format['format_id']] = current_format - - formats = list(formats.values()) - self._sort_formats(formats) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'series': series, - 'episode': episode_name, - 'episode_number': int_or_none(episode_number), - 'thumbnails': thumbnails, - 'duration': duration, - 'formats': formats, - 'season': season, - 'season_number': season_number, - 'season_id': season_id, - } - - -class AnimeLabShowsIE(AnimeLabBaseIE): - _VALID_URL = r'https?://(?:www\.)?animelab\.com/shows/(?P<id>[^/]+)' - - _TEST = { - 'url': 'https://www.animelab.com/shows/attack-on-titan', - 'info_dict': { - 'id': '45', - 'title': 'Attack on Titan', - 'description': 'md5:989d95a2677e9309368d5cf39ba91469', - }, - 'playlist_count': 59, - 'skip': 'All AnimeLab content requires authentication', - } - - def _real_extract(self, url): - _BASE_URL = 'http://www.animelab.com' - _SHOWS_API_URL = '/api/videoentries/show/videos/' - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id, 'Downloading requested URL') - - show_data_str = self._search_regex(r'({"id":.*}),\svideoEntry', webpage, 'AnimeLab show data') - show_data = self._parse_json(show_data_str, display_id) - - show_id = str_or_none(show_data.get('id')) - title = show_data.get('name') - description = show_data.get('shortSynopsis') or show_data.get('longSynopsis') - - entries = [] - for season in show_data['seasons']: - season_id = season['id'] - get_data = urlencode_postdata({ - 'seasonId': season_id, - 'limit': 1000, - }) - # despite using urlencode_postdata, we are sending a GET request - target_url = _BASE_URL + _SHOWS_API_URL + show_id + "?" + get_data.decode('utf-8') - response = self._download_webpage( - target_url, - None, 'Season id %s' % season_id) - - season_data = self._parse_json(response, display_id) - - for video_data in season_data['list']: - entries.append(self.url_result( - _BASE_URL + '/player/' + video_data['slug'], 'AnimeLab', - str_or_none(video_data.get('id')), video_data.get('name') - )) - - return { - '_type': 'playlist', - 'id': show_id, - 'title': title, - 'description': description, - 'entries': entries, - } - -# TODO implement myqueue diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index c85d5297d..1ca6ddc4d 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -1,36 +1,34 @@ -import re import json +import re +import urllib.parse + from .common import InfoExtractor -from .youtube import YoutubeIE, YoutubeBaseInfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_urllib_parse_unquote_plus, - compat_HTTPError -) +from .youtube import YoutubeBaseInfoExtractor, YoutubeIE +from ..compat import compat_HTTPError, compat_urllib_parse_unquote from ..utils import ( + KNOWN_EXTENSIONS, + ExtractorError, + HEADRequest, bug_reports_message, clean_html, dict_get, extract_attributes, - ExtractorError, get_element_by_id, - HEADRequest, int_or_none, join_nonempty, - KNOWN_EXTENSIONS, merge_dicts, mimetype2ext, orderedSet, parse_duration, parse_qs, - str_to_int, str_or_none, + str_to_int, traverse_obj, try_get, unified_strdate, unified_timestamp, + url_or_none, urlhandle_detect_ext, - url_or_none ) @@ -143,7 +141,7 @@ class ArchiveOrgIE(InfoExtractor): return json.loads(extract_attributes(element)['value']) def _real_extract(self, url): - video_id = compat_urllib_parse_unquote_plus(self._match_id(url)) + video_id = urllib.parse.unquote_plus(self._match_id(url)) identifier, entry_id = (video_id.split('/', 1) + [None])[:2] # Archive.org metadata API doesn't clearly demarcate playlist entries @@ -442,9 +440,10 @@ class YoutubeWebArchiveIE(InfoExtractor): 'only_matching': True }, ] - _YT_INITIAL_DATA_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE - _YT_INITIAL_PLAYER_RESPONSE_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*({.+?})[)\s]*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE - _YT_INITIAL_BOUNDARY_RE = r'(?:(?:var\s+meta|</script|\n)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_BOUNDARY_RE + _YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE + _YT_INITIAL_PLAYER_RESPONSE_RE = fr'''(?x) + (?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*| + {YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE}''' _YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers _YT_ALL_THUMB_SERVERS = orderedSet( @@ -474,11 +473,6 @@ class YoutubeWebArchiveIE(InfoExtractor): elif not isinstance(res, list) or len(res) != 0: self.report_warning('Error while parsing CDX API response' + bug_reports_message()) - def _extract_yt_initial_variable(self, webpage, regex, video_id, name): - return self._parse_json(self._search_regex( - (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}', - regex), webpage, name, default='{}'), video_id, fatal=False) - def _extract_webpage_title(self, webpage): page_title = self._html_extract_title(webpage, default='') # YouTube video pages appear to always have either 'YouTube -' as prefix or '- YouTube' as suffix. @@ -488,10 +482,11 @@ class YoutubeWebArchiveIE(InfoExtractor): def _extract_metadata(self, video_id, webpage): search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None)) if webpage else (lambda x: None)) - player_response = self._extract_yt_initial_variable( - webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE, video_id, 'initial player response') or {} - initial_data = self._extract_yt_initial_variable( - webpage, self._YT_INITIAL_DATA_RE, video_id, 'initial player response') or {} + player_response = self._search_json( + self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', + video_id, default={}) + initial_data = self._search_json( + self._YT_INITIAL_DATA_RE, webpage, 'initial data', video_id, default={}) initial_data_video = traverse_obj( initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'videoPrimaryInfoRenderer'), diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index 96b134fa0..c80ce2233 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -90,7 +90,7 @@ class ArnesIE(InfoExtractor): 'timestamp': parse_iso8601(video.get('creationTime')), 'channel': channel.get('name'), 'channel_id': channel_id, - 'channel_url': format_field(channel_id, template=f'{self._BASE_URL}/?channel=%s'), + 'channel_url': format_field(channel_id, None, f'{self._BASE_URL}/?channel=%s'), 'duration': float_or_none(video.get('duration'), 1000), 'view_count': int_or_none(video.get('views')), 'tags': video.get('hashtags'), diff --git a/yt_dlp/extractor/atscaleconf.py b/yt_dlp/extractor/atscaleconf.py new file mode 100644 index 000000000..3f7b1e9f8 --- /dev/null +++ b/yt_dlp/extractor/atscaleconf.py @@ -0,0 +1,34 @@ +import re + +from .common import InfoExtractor + + +class AtScaleConfEventIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?atscaleconference\.com/events/(?P<id>[^/&$?]+)' + + _TESTS = [{ + 'url': 'https://atscaleconference.com/events/data-scale-spring-2022/', + 'playlist_mincount': 13, + 'info_dict': { + 'id': 'data-scale-spring-2022', + 'title': 'Data @Scale Spring 2022', + 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' + }, + }, { + 'url': 'https://atscaleconference.com/events/video-scale-2021/', + 'playlist_mincount': 14, + 'info_dict': { + 'id': 'video-scale-2021', + 'title': 'Video @Scale 2021', + 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' + }, + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + + return self.playlist_from_matches( + re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage), + ie='Generic', playlist_id=id, + title=self._og_search_title(webpage), description=self._og_search_description(webpage)) diff --git a/yt_dlp/extractor/audius.py b/yt_dlp/extractor/audius.py index 189d1224f..0105d9db8 100644 --- a/yt_dlp/extractor/audius.py +++ b/yt_dlp/extractor/audius.py @@ -1,8 +1,8 @@ import random from .common import InfoExtractor -from ..utils import ExtractorError, try_get, compat_str, str_or_none -from ..compat import compat_urllib_parse_unquote +from ..compat import compat_str, compat_urllib_parse_unquote +from ..utils import ExtractorError, str_or_none, try_get class AudiusBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py index d289f6be3..6fc938de9 100644 --- a/yt_dlp/extractor/awaan.py +++ b/yt_dlp/extractor/awaan.py @@ -41,7 +41,7 @@ class AWAANBaseIE(InfoExtractor): 'id': video_id, 'title': title, 'description': video_data.get('description_en') or video_data.get('description_ar'), - 'thumbnail': format_field(img, template='http://admin.mangomolo.com/analytics/%s'), + 'thumbnail': format_field(img, None, 'http://admin.mangomolo.com/analytics/%s'), 'duration': int_or_none(video_data.get('duration')), 'timestamp': parse_iso8601(video_data.get('create_time'), ' '), 'is_live': is_live, diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 9cb019a49..5ddeef7b5 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -1,16 +1,12 @@ -import xml.etree.ElementTree import functools import itertools import json import re +import urllib.error +import xml.etree.ElementTree from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, - compat_urllib_error, - compat_urlparse, -) +from ..compat import compat_HTTPError, compat_str, compat_urlparse from ..utils import ( ExtractorError, OnDemandPagedList, @@ -391,7 +387,7 @@ class BBCCoUkIE(InfoExtractor): href, programme_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) except ExtractorError as e: - if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError) + if not (isinstance(e.exc_info[1], urllib.error.HTTPError) and e.exc_info[1].code in (403, 404)): raise fmts = [] diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py index 8f9849d9b..5ae4b917a 100644 --- a/yt_dlp/extractor/bellmedia.py +++ b/yt_dlp/extractor/bellmedia.py @@ -24,7 +24,7 @@ class BellMediaIE(InfoExtractor): )/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})''' _TESTS = [{ 'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070', - 'md5': '36d3ef559cfe8af8efe15922cd3ce950', + 'md5': '3e5b8e38370741d5089da79161646635', 'info_dict': { 'id': '1403070', 'ext': 'flv', @@ -32,6 +32,14 @@ class BellMediaIE(InfoExtractor): 'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3', 'upload_date': '20180525', 'timestamp': 1527288600, + 'season_id': 73997, + 'season': '2018', + 'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg', + 'tags': [], + 'categories': ['ETFs'], + 'season_number': 8, + 'duration': 272.038, + 'series': 'Market Call Tonight', }, }, { 'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582', diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index ead0dd88b..d695d9b49 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -677,6 +677,11 @@ class BilibiliAudioIE(BilibiliAudioBaseIE): 'vcodec': 'none' }] + for a_format in formats: + a_format.setdefault('http_headers', {}).update({ + 'Referer': url, + }) + song = self._call_api('song/info', au_id) title = song['title'] statistic = song.get('statistic') or {} @@ -784,7 +789,8 @@ class BiliIntlBaseIE(InfoExtractor): def json2srt(self, json): data = '\n\n'.join( f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}' - for i, line in enumerate(json['body']) if line.get('content')) + for i, line in enumerate(traverse_obj(json, ( + 'body', lambda _, l: l['content'] and l['from'] and l['to'])))) return data def _get_subtitles(self, *, ep_id=None, aid=None): @@ -947,12 +953,11 @@ class BiliIntlIE(BiliIntlBaseIE): video_id = ep_id or aid webpage = self._download_webpage(url, video_id) # Bstation layout - initial_data = self._parse_json(self._search_regex( - r'window\.__INITIAL_(?:DATA|STATE)__\s*=\s*({.+?});', webpage, - 'preload state', default='{}'), video_id, fatal=False) or {} - video_data = ( - traverse_obj(initial_data, ('OgvVideo', 'epDetail'), expected_type=dict) - or traverse_obj(initial_data, ('UgcVideo', 'videoData'), expected_type=dict) or {}) + initial_data = ( + self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={}) + or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None)) + video_data = traverse_obj( + initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) if season_id and not video_data: # Non-Bstation layout, read through episode list @@ -960,7 +965,7 @@ class BiliIntlIE(BiliIntlBaseIE): video_data = traverse_obj(season_json, ('sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == ep_id), expected_type=dict, get_all=False) - return self._extract_video_info(video_data, ep_id=ep_id, aid=aid) + return self._extract_video_info(video_data or {}, ep_id=ep_id, aid=aid) class BiliIntlSeriesIE(BiliIntlBaseIE): diff --git a/yt_dlp/extractor/bloomberg.py b/yt_dlp/extractor/bloomberg.py index c0aaeae02..c842c342c 100644 --- a/yt_dlp/extractor/bloomberg.py +++ b/yt_dlp/extractor/bloomberg.py @@ -7,13 +7,11 @@ class BloombergIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)' _TESTS = [{ - 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2', - # The md5 checksum changes + 'url': 'https://www.bloomberg.com/news/videos/2021-09-14/apple-unveils-the-new-iphone-13-stock-doesn-t-move-much-video', 'info_dict': { - 'id': 'qurhIVlJSB6hzkVi229d8g', + 'id': 'V8cFcYMxTHaMcEiiYVr39A', 'ext': 'flv', - 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', - 'description': 'md5:a8ba0302912d03d246979735c17d2761', + 'title': 'Apple Unveils the New IPhone 13, Stock Doesn\'t Move Much', }, 'params': { 'format': 'best[format_id^=hds]', @@ -57,7 +55,7 @@ class BloombergIE(InfoExtractor): title = re.sub(': Video$', '', self._og_search_title(webpage)) embed_info = self._download_json( - 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id) + 'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id) formats = [] for stream in embed_info['streams']: stream_url = stream.get('url') diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 936c34e15..a5412897d 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -600,9 +600,9 @@ class BrightcoveNewIE(AdobePassIE): account_id, player_id, embed, content_type, video_id = self._match_valid_url(url).groups() policy_key_id = '%s_%s' % (account_id, player_id) - policy_key = self._downloader.cache.load('brightcove', policy_key_id) + policy_key = self.cache.load('brightcove', policy_key_id) policy_key_extracted = False - store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x) + store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x) def extract_policy_key(): base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index cac3f1e9d..999b7bc53 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -304,13 +304,13 @@ class CBCGemIE(InfoExtractor): def _get_claims_token(self, email, password): if not self.claims_token_valid(): self._claims_token = self._new_claims_token(email, password) - self._downloader.cache.store(self._NETRC_MACHINE, 'claims_token', self._claims_token) + self.cache.store(self._NETRC_MACHINE, 'claims_token', self._claims_token) return self._claims_token def _real_initialize(self): if self.claims_token_valid(): return - self._claims_token = self._downloader.cache.load(self._NETRC_MACHINE, 'claims_token') + self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token') def _find_secret_formats(self, formats, video_id): """ Find a valid video url and convert it to the secret variant """ diff --git a/yt_dlp/extractor/ccc.py b/yt_dlp/extractor/ccc.py index b11e1f74e..1bc0f07f2 100644 --- a/yt_dlp/extractor/ccc.py +++ b/yt_dlp/extractor/ccc.py @@ -75,6 +75,7 @@ class CCCIE(InfoExtractor): 'thumbnail': event_data.get('thumb_url'), 'timestamp': parse_iso8601(event_data.get('date')), 'duration': int_or_none(event_data.get('length')), + 'view_count': int_or_none(event_data.get('view_count')), 'tags': event_data.get('tags'), 'formats': formats, } diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index 9b257bee9..6d01c60d5 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -1,13 +1,9 @@ import codecs -import re import json +import re from .common import InfoExtractor -from ..compat import ( - compat_chr, - compat_ord, - compat_urllib_parse_unquote, -) +from ..compat import compat_ord, compat_urllib_parse_unquote from ..utils import ( ExtractorError, float_or_none, @@ -16,8 +12,8 @@ from ..utils import ( multipart_encode, parse_duration, random_birthday, - urljoin, try_get, + urljoin, ) @@ -144,7 +140,7 @@ class CDAIE(InfoExtractor): b = [] for c in a: f = compat_ord(c) - b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f <= 126 else compat_chr(f)) + b.append(chr(33 + (f + 14) % 94) if 33 <= f <= 126 else chr(f)) a = ''.join(b) a = a.replace('.cda.mp4', '') for p in ('.2cda.pl', '.3cda.pl'): diff --git a/yt_dlp/extractor/chingari.py b/yt_dlp/extractor/chingari.py index 7e8c0bfc9..e54d92a86 100644 --- a/yt_dlp/extractor/chingari.py +++ b/yt_dlp/extractor/chingari.py @@ -1,11 +1,11 @@ import itertools import json +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote_plus from ..utils import ( - clean_html, ExtractorError, + clean_html, int_or_none, str_to_int, url_or_none, @@ -47,8 +47,8 @@ class ChingariBaseIE(InfoExtractor): 'id': id, 'extractor_key': ChingariIE.ie_key(), 'extractor': 'Chingari', - 'title': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))), - 'description': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))), + 'title': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))), + 'description': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))), 'duration': media_data.get('duration'), 'thumbnail': url_or_none(thumbnail), 'like_count': post_data.get('likeCount'), diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index ebeca4395..4fbcfe203 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1,6 +1,10 @@ import base64 import collections +import getpass import hashlib +import http.client +import http.cookiejar +import http.cookies import itertools import json import math @@ -9,24 +13,12 @@ import os import random import sys import time +import urllib.parse +import urllib.request import xml.etree.ElementTree -from ..compat import ( - compat_cookiejar_Cookie, - compat_cookies_SimpleCookie, - compat_etree_fromstring, - compat_expanduser, - compat_getpass, - compat_http_client, - compat_os_name, - compat_str, - compat_urllib_error, - compat_urllib_parse_unquote, - compat_urllib_parse_urlencode, - compat_urllib_request, - compat_urlparse, - re, -) +from ..compat import functools, re # isort: split +from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name from ..downloader import FileDownloader from ..downloader.f4m import get_base_url, remove_encrypted_media from ..utils import ( @@ -35,6 +27,7 @@ from ..utils import ( ExtractorError, GeoRestrictedError, GeoUtils, + LenientJSONDecoder, RegexNotFoundError, UnsupportedError, age_restricted, @@ -384,6 +377,11 @@ class InfoExtractor: release_year: Year (YYYY) when the album was released. composer: Composer of the piece + The following fields should only be set for clips that should be cut from the original video: + + section_start: Start time of the section in seconds + section_end: End time of the section in seconds + Unless mentioned otherwise, the fields should be Unicode strings. Unless mentioned otherwise, None is equivalent to absence of information. @@ -610,8 +608,7 @@ class InfoExtractor: if ip_block: self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block) - self._downloader.write_debug( - '[debug] Using fake IP %s as X-Forwarded-For' % self._x_forwarded_for_ip) + self.write_debug(f'Using fake IP {self._x_forwarded_for_ip} as X-Forwarded-For') return # Path 2: bypassing based on country code @@ -666,7 +663,7 @@ class InfoExtractor: if hasattr(e, 'countries'): kwargs['countries'] = e.countries raise type(e)(e.orig_msg, **kwargs) - except compat_http_client.IncompleteRead as e: + except http.client.IncompleteRead as e: raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url)) except (KeyError, StopIteration) as e: raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url)) @@ -690,6 +687,14 @@ class InfoExtractor: """Sets a YoutubeDL instance as the downloader for this IE.""" self._downloader = downloader + @property + def cache(self): + return self._downloader.cache + + @property + def cookiejar(self): + return self._downloader.cookiejar + def _initialize_pre_login(self): """ Intialization before login. Redefine in subclasses.""" pass @@ -717,7 +722,7 @@ class InfoExtractor: @staticmethod def __can_accept_status_code(err, expected_status): - assert isinstance(err, compat_urllib_error.HTTPError) + assert isinstance(err, urllib.error.HTTPError) if expected_status is None: return False elif callable(expected_status): @@ -725,7 +730,14 @@ class InfoExtractor: else: return err.code in variadic(expected_status) - def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None): + def _create_request(self, url_or_request, data=None, headers=None, query=None): + if isinstance(url_or_request, urllib.request.Request): + return update_Request(url_or_request, data=data, headers=headers, query=query) + if query: + url_or_request = update_url_query(url_or_request, query) + return sanitized_Request(url_or_request, data, headers or {}) + + def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None): """ Return the response handle. @@ -753,21 +765,13 @@ class InfoExtractor: # geo unrestricted country. We will do so once we encounter any # geo restriction error. if self._x_forwarded_for_ip: - if 'X-Forwarded-For' not in headers: - headers['X-Forwarded-For'] = self._x_forwarded_for_ip + headers = (headers or {}).copy() + headers.setdefault('X-Forwarded-For', self._x_forwarded_for_ip) - if isinstance(url_or_request, compat_urllib_request.Request): - url_or_request = update_Request( - url_or_request, data=data, headers=headers, query=query) - else: - if query: - url_or_request = update_url_query(url_or_request, query) - if data is not None or headers: - url_or_request = sanitized_Request(url_or_request, data, headers) try: - return self._downloader.urlopen(url_or_request) + return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query)) except network_exceptions as err: - if isinstance(err, compat_urllib_error.HTTPError): + if isinstance(err, urllib.error.HTTPError): if self.__can_accept_status_code(err, expected_status): # Retain reference to error to prevent file object from # being closed before it can be read. Works around the @@ -788,14 +792,42 @@ class InfoExtractor: self.report_warning(errmsg) return False - def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): + def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, + encoding=None, data=None, headers={}, query={}, expected_status=None): """ Return a tuple (page content as string, URL handle). - See _download_webpage docstring for arguments specification. + Arguments: + url_or_request -- plain text URL as a string or + a urllib.request.Request object + video_id -- Video/playlist/item identifier (string) + + Keyword arguments: + note -- note printed before downloading (string) + errnote -- note printed in case of an error (string) + fatal -- flag denoting whether error should be considered fatal, + i.e. whether it should cause ExtractionError to be raised, + otherwise a warning will be reported and extraction continued + encoding -- encoding for a page content decoding, guessed automatically + when not explicitly specified + data -- POST data (bytes) + headers -- HTTP headers (dict) + query -- URL query (dict) + expected_status -- allows to accept failed HTTP requests (non 2xx + status code) by explicitly specifying a set of accepted status + codes. Can be any of the following entities: + - an integer type specifying an exact failed status code to + accept + - a list or a tuple of integer types specifying a list of + failed status codes to accept + - a callable accepting an actual failed status code and + returning True if it should be accepted + Note that this argument does not affect success status codes (2xx) + which are always accepted. """ + # Strip hashes from the URL (#1038) - if isinstance(url_or_request, (compat_str, str)): + if isinstance(url_or_request, str): url_or_request = url_or_request.partition('#')[0] urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status) @@ -850,140 +882,48 @@ class InfoExtractor: 'Visit http://blocklist.rkn.gov.ru/ for a block reason.', expected=True) + def _request_dump_filename(self, url, video_id): + basen = f'{video_id}_{url}' + trim_length = self.get_param('trim_file_name') or 240 + if len(basen) > trim_length: + h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() + basen = basen[:trim_length - len(h)] + h + filename = sanitize_filename(f'{basen}.dump', restricted=True) + # Working around MAX_PATH limitation on Windows (see + # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx) + if compat_os_name == 'nt': + absfilepath = os.path.abspath(filename) + if len(absfilepath) > 259: + filename = fR'\\?\{absfilepath}' + return filename + + def __decode_webpage(self, webpage_bytes, encoding, headers): + if not encoding: + encoding = self._guess_encoding_from_content(headers.get('Content-Type', ''), webpage_bytes) + try: + return webpage_bytes.decode(encoding, 'replace') + except LookupError: + return webpage_bytes.decode('utf-8', 'replace') + def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None): - content_type = urlh.headers.get('Content-Type', '') webpage_bytes = urlh.read() if prefix is not None: webpage_bytes = prefix + webpage_bytes - if not encoding: - encoding = self._guess_encoding_from_content(content_type, webpage_bytes) if self.get_param('dump_intermediate_pages', False): self.to_screen('Dumping request to ' + urlh.geturl()) dump = base64.b64encode(webpage_bytes).decode('ascii') self._downloader.to_screen(dump) - if self.get_param('write_pages', False): - basen = f'{video_id}_{urlh.geturl()}' - trim_length = self.get_param('trim_file_name') or 240 - if len(basen) > trim_length: - h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() - basen = basen[:trim_length - len(h)] + h - raw_filename = basen + '.dump' - filename = sanitize_filename(raw_filename, restricted=True) - self.to_screen('Saving request to ' + filename) - # Working around MAX_PATH limitation on Windows (see - # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx) - if compat_os_name == 'nt': - absfilepath = os.path.abspath(filename) - if len(absfilepath) > 259: - filename = '\\\\?\\' + absfilepath + if self.get_param('write_pages'): + filename = self._request_dump_filename(urlh.geturl(), video_id) + self.to_screen(f'Saving request to {filename}') with open(filename, 'wb') as outf: outf.write(webpage_bytes) - try: - content = webpage_bytes.decode(encoding, 'replace') - except LookupError: - content = webpage_bytes.decode('utf-8', 'replace') - + content = self.__decode_webpage(webpage_bytes, encoding, urlh.headers) self.__check_blocked(content) return content - def _download_webpage( - self, url_or_request, video_id, note=None, errnote=None, - fatal=True, tries=1, timeout=5, encoding=None, data=None, - headers={}, query={}, expected_status=None): - """ - Return the data of the page as a string. - - Arguments: - url_or_request -- plain text URL as a string or - a compat_urllib_request.Requestobject - video_id -- Video/playlist/item identifier (string) - - Keyword arguments: - note -- note printed before downloading (string) - errnote -- note printed in case of an error (string) - fatal -- flag denoting whether error should be considered fatal, - i.e. whether it should cause ExtractionError to be raised, - otherwise a warning will be reported and extraction continued - tries -- number of tries - timeout -- sleep interval between tries - encoding -- encoding for a page content decoding, guessed automatically - when not explicitly specified - data -- POST data (bytes) - headers -- HTTP headers (dict) - query -- URL query (dict) - expected_status -- allows to accept failed HTTP requests (non 2xx - status code) by explicitly specifying a set of accepted status - codes. Can be any of the following entities: - - an integer type specifying an exact failed status code to - accept - - a list or a tuple of integer types specifying a list of - failed status codes to accept - - a callable accepting an actual failed status code and - returning True if it should be accepted - Note that this argument does not affect success status codes (2xx) - which are always accepted. - """ - - success = False - try_count = 0 - while success is False: - try: - res = self._download_webpage_handle( - url_or_request, video_id, note, errnote, fatal, - encoding=encoding, data=data, headers=headers, query=query, - expected_status=expected_status) - success = True - except compat_http_client.IncompleteRead as e: - try_count += 1 - if try_count >= tries: - raise e - self._sleep(timeout, video_id) - if res is False: - return res - else: - content, _ = res - return content - - def _download_xml_handle( - self, url_or_request, video_id, note='Downloading XML', - errnote='Unable to download XML', transform_source=None, - fatal=True, encoding=None, data=None, headers={}, query={}, - expected_status=None): - """ - Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle). - - See _download_webpage docstring for arguments specification. - """ - res = self._download_webpage_handle( - url_or_request, video_id, note, errnote, fatal=fatal, - encoding=encoding, data=data, headers=headers, query=query, - expected_status=expected_status) - if res is False: - return res - xml_string, urlh = res - return self._parse_xml( - xml_string, video_id, transform_source=transform_source, - fatal=fatal), urlh - - def _download_xml( - self, url_or_request, video_id, - note='Downloading XML', errnote='Unable to download XML', - transform_source=None, fatal=True, encoding=None, - data=None, headers={}, query={}, expected_status=None): - """ - Return the xml as an xml.etree.ElementTree.Element. - - See _download_webpage docstring for arguments specification. - """ - res = self._download_xml_handle( - url_or_request, video_id, note=note, errnote=errnote, - transform_source=transform_source, fatal=fatal, encoding=encoding, - data=data, headers=headers, query=query, - expected_status=expected_status) - return res if res is False else res[0] - def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True): if transform_source: xml_string = transform_source(xml_string) @@ -996,101 +936,126 @@ class InfoExtractor: else: self.report_warning(errmsg + str(ve)) - def _download_json_handle( - self, url_or_request, video_id, note='Downloading JSON metadata', - errnote='Unable to download JSON metadata', transform_source=None, - fatal=True, encoding=None, data=None, headers={}, query={}, - expected_status=None): - """ - Return a tuple (JSON object, URL handle). - - See _download_webpage docstring for arguments specification. - """ - res = self._download_webpage_handle( - url_or_request, video_id, note, errnote, fatal=fatal, - encoding=encoding, data=data, headers=headers, query=query, - expected_status=expected_status) - if res is False: - return res - json_string, urlh = res - return self._parse_json( - json_string, video_id, transform_source=transform_source, - fatal=fatal), urlh - - def _download_json( - self, url_or_request, video_id, note='Downloading JSON metadata', - errnote='Unable to download JSON metadata', transform_source=None, - fatal=True, encoding=None, data=None, headers={}, query={}, - expected_status=None): - """ - Return the JSON object as a dict. - - See _download_webpage docstring for arguments specification. - """ - res = self._download_json_handle( - url_or_request, video_id, note=note, errnote=errnote, - transform_source=transform_source, fatal=fatal, encoding=encoding, - data=data, headers=headers, query=query, - expected_status=expected_status) - return res if res is False else res[0] - - def _parse_json(self, json_string, video_id, transform_source=None, fatal=True): - if transform_source: - json_string = transform_source(json_string) + def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, **parser_kwargs): try: - return json.loads(json_string, strict=False) + return json.loads( + json_string, cls=LenientJSONDecoder, strict=False, transform_source=transform_source, **parser_kwargs) except ValueError as ve: - errmsg = '%s: Failed to parse JSON ' % video_id + errmsg = f'{video_id}: Failed to parse JSON' if fatal: raise ExtractorError(errmsg, cause=ve) else: - self.report_warning(errmsg + str(ve)) + self.report_warning(f'{errmsg}: {ve}') def _parse_socket_response_as_json(self, data, video_id, transform_source=None, fatal=True): return self._parse_json( data[data.find('{'):data.rfind('}') + 1], video_id, transform_source, fatal) - def _download_socket_json_handle( - self, url_or_request, video_id, note='Polling socket', - errnote='Unable to poll socket', transform_source=None, - fatal=True, encoding=None, data=None, headers={}, query={}, - expected_status=None): - """ - Return a tuple (JSON object, URL handle). + def __create_download_methods(name, parser, note, errnote, return_value): + + def parse(ie, content, *args, **kwargs): + if parser is None: + return content + # parser is fetched by name so subclasses can override it + return getattr(ie, parser)(content, *args, **kwargs) + + def download_handle(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None, + fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): + res = self._download_webpage_handle( + url_or_request, video_id, note=note, errnote=errnote, fatal=fatal, encoding=encoding, + data=data, headers=headers, query=query, expected_status=expected_status) + if res is False: + return res + content, urlh = res + return parse(self, content, video_id, transform_source=transform_source, fatal=fatal), urlh + + def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None, + fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): + if self.get_param('load_pages'): + url_or_request = self._create_request(url_or_request, data, headers, query) + filename = self._request_dump_filename(url_or_request.full_url, video_id) + self.to_screen(f'Loading request from {filename}') + try: + with open(filename, 'rb') as dumpf: + webpage_bytes = dumpf.read() + except OSError as e: + self.report_warning(f'Unable to load request from disk: {e}') + else: + content = self.__decode_webpage(webpage_bytes, encoding, url_or_request.headers) + return parse(self, content, video_id, transform_source, fatal) + kwargs = { + 'note': note, + 'errnote': errnote, + 'transform_source': transform_source, + 'fatal': fatal, + 'encoding': encoding, + 'data': data, + 'headers': headers, + 'query': query, + 'expected_status': expected_status, + } + if parser is None: + kwargs.pop('transform_source') + # The method is fetched by name so subclasses can override _download_..._handle + res = getattr(self, download_handle.__name__)(url_or_request, video_id, **kwargs) + return res if res is False else res[0] + + def impersonate(func, name, return_value): + func.__name__, func.__qualname__ = name, f'InfoExtractor.{name}' + func.__doc__ = f''' + @param transform_source Apply this transformation before parsing + @returns {return_value} + + See _download_webpage_handle docstring for other arguments specification + ''' + + impersonate(download_handle, f'_download_{name}_handle', f'({return_value}, URL handle)') + impersonate(download_content, f'_download_{name}', f'{return_value}') + return download_handle, download_content + + _download_xml_handle, _download_xml = __create_download_methods( + 'xml', '_parse_xml', 'Downloading XML', 'Unable to download XML', 'xml as an xml.etree.ElementTree.Element') + _download_json_handle, _download_json = __create_download_methods( + 'json', '_parse_json', 'Downloading JSON metadata', 'Unable to download JSON metadata', 'JSON object as a dict') + _download_socket_json_handle, _download_socket_json = __create_download_methods( + 'socket_json', '_parse_socket_response_as_json', 'Polling socket', 'Unable to poll socket', 'JSON object as a dict') + __download_webpage = __create_download_methods('webpage', None, None, None, 'data of the page as a string')[1] - See _download_webpage docstring for arguments specification. - """ - res = self._download_webpage_handle( - url_or_request, video_id, note, errnote, fatal=fatal, - encoding=encoding, data=data, headers=headers, query=query, - expected_status=expected_status) - if res is False: - return res - webpage, urlh = res - return self._parse_socket_response_as_json( - webpage, video_id, transform_source=transform_source, - fatal=fatal), urlh - - def _download_socket_json( - self, url_or_request, video_id, note='Polling socket', - errnote='Unable to poll socket', transform_source=None, - fatal=True, encoding=None, data=None, headers={}, query={}, - expected_status=None): + def _download_webpage( + self, url_or_request, video_id, note=None, errnote=None, + fatal=True, tries=1, timeout=NO_DEFAULT, *args, **kwargs): """ - Return the JSON object as a dict. + Return the data of the page as a string. - See _download_webpage docstring for arguments specification. + Keyword arguments: + tries -- number of tries + timeout -- sleep interval between tries + + See _download_webpage_handle docstring for other arguments specification. """ - res = self._download_socket_json_handle( - url_or_request, video_id, note=note, errnote=errnote, - transform_source=transform_source, fatal=fatal, encoding=encoding, - data=data, headers=headers, query=query, - expected_status=expected_status) - return res if res is False else res[0] + + R''' # NB: These are unused; should they be deprecated? + if tries != 1: + self._downloader.deprecation_warning('tries argument is deprecated in InfoExtractor._download_webpage') + if timeout is NO_DEFAULT: + timeout = 5 + else: + self._downloader.deprecation_warning('timeout argument is deprecated in InfoExtractor._download_webpage') + ''' + + try_count = 0 + while True: + try: + return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs) + except http.client.IncompleteRead as e: + try_count += 1 + if try_count >= tries: + raise e + self._sleep(timeout, video_id) def report_warning(self, msg, video_id=None, *args, only_once=False, **kwargs): - idstr = format_field(video_id, template='%s: ') + idstr = format_field(video_id, None, '%s: ') msg = f'[{self.IE_NAME}] {idstr}{msg}' if only_once: if f'WARNING: {msg}' in self._printed_messages: @@ -1136,7 +1101,7 @@ class InfoExtractor: self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')): self.report_warning(msg) return - msg += format_field(self._login_hint(method), template='. %s') + msg += format_field(self._login_hint(method), None, '. %s') raise ExtractorError(msg, expected=True) def raise_geo_restricted( @@ -1228,6 +1193,33 @@ class InfoExtractor: self.report_warning('unable to extract %s' % _name + bug_reports_message()) return None + def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='', + contains_pattern='(?s:.+)', fatal=True, default=NO_DEFAULT, **kwargs): + """Searches string for the JSON object specified by start_pattern""" + # NB: end_pattern is only used to reduce the size of the initial match + if default is NO_DEFAULT: + default, has_default = {}, False + else: + fatal, has_default = False, True + + json_string = self._search_regex( + rf'{start_pattern}\s*(?P<json>{{\s*{contains_pattern}\s*}})\s*{end_pattern}', + string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT) + if not json_string: + return default + + _name = self._downloader._format_err(name, self._downloader.Styles.EMPHASIS) + try: + return self._parse_json(json_string, video_id, ignore_extra=True, **kwargs) + except ExtractorError as e: + if fatal: + raise ExtractorError( + f'Unable to extract {_name} - Failed to parse JSON', cause=e.cause, video_id=video_id) + elif not has_default: + self.report_warning( + f'Unable to extract {_name} - Failed to parse JSON: {e}', video_id=video_id) + return default + def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None): """ Like _search_regex, but strips HTML tags and unescapes entities. @@ -1292,7 +1284,7 @@ class InfoExtractor: if tfa is not None: return tfa - return compat_getpass('Type %s and press [Return]: ' % note) + return getpass.getpass('Type %s and press [Return]: ' % note) # Helper functions for extracting OpenGraph info @staticmethod @@ -1343,7 +1335,7 @@ class InfoExtractor: return self._og_search_property('url', html, **kargs) def _html_extract_title(self, html, name='title', *, fatal=False, **kwargs): - return self._html_search_regex(r'(?s)<title>([^<]+)</title>', html, name, fatal=fatal, **kwargs) + return self._html_search_regex(r'(?s)<title\b[^>]*>([^<]+)</title>', html, name, fatal=fatal, **kwargs) def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): name = variadic(name) @@ -1400,27 +1392,25 @@ class InfoExtractor: return self._html_search_meta('twitter:player', html, 'twitter card player') - def _search_json_ld(self, html, video_id, expected_type=None, **kwargs): - json_ld_list = list(re.finditer(JSON_LD_RE, html)) - default = kwargs.get('default', NO_DEFAULT) - # JSON-LD may be malformed and thus `fatal` should be respected. - # At the same time `default` may be passed that assumes `fatal=False` - # for _search_regex. Let's simulate the same behavior here as well. - fatal = kwargs.get('fatal', True) if default is NO_DEFAULT else False - json_ld = [] - for mobj in json_ld_list: - json_ld_item = self._parse_json( - mobj.group('json_ld'), video_id, fatal=fatal) - if not json_ld_item: - continue - if isinstance(json_ld_item, dict): - json_ld.append(json_ld_item) - elif isinstance(json_ld_item, (list, tuple)): - json_ld.extend(json_ld_item) - if json_ld: - json_ld = self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type) - if json_ld: - return json_ld + def _yield_json_ld(self, html, video_id, *, fatal=True, default=NO_DEFAULT): + """Yield all json ld objects in the html""" + if default is not NO_DEFAULT: + fatal = False + for mobj in re.finditer(JSON_LD_RE, html): + json_ld_item = self._parse_json(mobj.group('json_ld'), video_id, fatal=fatal) + for json_ld in variadic(json_ld_item): + if isinstance(json_ld, dict): + yield json_ld + + def _search_json_ld(self, html, video_id, expected_type=None, *, fatal=True, default=NO_DEFAULT): + """Search for a video in any json ld in the html""" + if default is not NO_DEFAULT: + fatal = False + info = self._json_ld( + list(self._yield_json_ld(html, video_id, fatal=fatal, default=default)), + video_id, fatal=fatal, expected_type=expected_type) + if info: + return info if default is not NO_DEFAULT: return default elif fatal: @@ -1430,7 +1420,7 @@ class InfoExtractor: return {} def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None): - if isinstance(json_ld, compat_str): + if isinstance(json_ld, str): json_ld = self._parse_json(json_ld, video_id, fatal=fatal) if not json_ld: return {} @@ -1451,6 +1441,10 @@ class InfoExtractor: 'ViewAction': 'view', } + def is_type(e, *expected_types): + type = variadic(traverse_obj(e, '@type')) + return any(x in type for x in expected_types) + def extract_interaction_type(e): interaction_type = e.get('interactionType') if isinstance(interaction_type, dict): @@ -1464,9 +1458,7 @@ class InfoExtractor: if not isinstance(interaction_statistic, list): return for is_e in interaction_statistic: - if not isinstance(is_e, dict): - continue - if is_e.get('@type') != 'InteractionCounter': + if not is_type(is_e, 'InteractionCounter'): continue interaction_type = extract_interaction_type(is_e) if not interaction_type: @@ -1503,22 +1495,23 @@ class InfoExtractor: info['chapters'] = chapters def extract_video_object(e): - assert e['@type'] == 'VideoObject' + assert is_type(e, 'VideoObject') author = e.get('author') info.update({ 'url': url_or_none(e.get('contentUrl')), 'title': unescapeHTML(e.get('name')), 'description': unescapeHTML(e.get('description')), - 'thumbnails': [{'url': url_or_none(url)} - for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))], + 'thumbnails': [{'url': url} + for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL')) + if url_or_none(url)], 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), # author can be an instance of 'Organization' or 'Person' types. # both types can have 'name' property(inherited from 'Thing' type). [1] # however some websites are using 'Text' type instead. # 1. https://schema.org/VideoObject - 'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None, - 'filesize': float_or_none(e.get('contentSize')), + 'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, str) else None, + 'filesize': int_or_none(float_or_none(e.get('contentSize'))), 'tbr': int_or_none(e.get('bitrate')), 'width': int_or_none(e.get('width')), 'height': int_or_none(e.get('height')), @@ -1534,13 +1527,12 @@ class InfoExtractor: if at_top_level and set(e.keys()) == {'@context', '@graph'}: traverse_json_ld(variadic(e['@graph'], allowed_types=(dict,)), at_top_level=False) break - item_type = e.get('@type') - if expected_type is not None and expected_type != item_type: + if expected_type is not None and not is_type(e, expected_type): continue rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none) if rating is not None: info['average_rating'] = rating - if item_type in ('TVEpisode', 'Episode'): + if is_type(e, 'TVEpisode', 'Episode'): episode_name = unescapeHTML(e.get('name')) info.update({ 'episode': episode_name, @@ -1550,37 +1542,39 @@ class InfoExtractor: if not info.get('title') and episode_name: info['title'] = episode_name part_of_season = e.get('partOfSeason') - if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'): + if is_type(part_of_season, 'TVSeason', 'Season', 'CreativeWorkSeason'): info.update({ 'season': unescapeHTML(part_of_season.get('name')), 'season_number': int_or_none(part_of_season.get('seasonNumber')), }) part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries') - if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'): + if is_type(part_of_series, 'TVSeries', 'Series', 'CreativeWorkSeries'): info['series'] = unescapeHTML(part_of_series.get('name')) - elif item_type == 'Movie': + elif is_type(e, 'Movie'): info.update({ 'title': unescapeHTML(e.get('name')), 'description': unescapeHTML(e.get('description')), 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('dateCreated')), }) - elif item_type in ('Article', 'NewsArticle'): + elif is_type(e, 'Article', 'NewsArticle'): info.update({ 'timestamp': parse_iso8601(e.get('datePublished')), 'title': unescapeHTML(e.get('headline')), 'description': unescapeHTML(e.get('articleBody') or e.get('description')), }) - if traverse_obj(e, ('video', 0, '@type')) == 'VideoObject': + if is_type(traverse_obj(e, ('video', 0)), 'VideoObject'): extract_video_object(e['video'][0]) - elif item_type == 'VideoObject': + elif is_type(traverse_obj(e, ('subjectOf', 0)), 'VideoObject'): + extract_video_object(e['subjectOf'][0]) + elif is_type(e, 'VideoObject'): extract_video_object(e) if expected_type is None: continue else: break video = e.get('video') - if isinstance(video, dict) and video.get('@type') == 'VideoObject': + if is_type(video, 'VideoObject'): extract_video_object(video) if expected_type is None: continue @@ -1597,15 +1591,13 @@ class InfoExtractor: webpage, 'next.js data', fatal=fatal, **kw), video_id, transform_source=transform_source, fatal=fatal) - def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__'): - ''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. ''' - # not all website do this, but it can be changed - # https://stackoverflow.com/questions/67463109/how-to-change-or-hide-nuxt-and-nuxt-keyword-in-page-source + def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)): + """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function""" rectx = re.escape(context_name) + FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)' js, arg_keys, arg_vals = self._search_regex( - (r'<script>window\.%s=\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.+?)\)\);?</script>' % rectx, - r'%s\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)' % rectx), - webpage, context_name, group=['js', 'arg_keys', 'arg_vals']) + (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'), + webpage, context_name, group=('js', 'arg_keys', 'arg_vals'), fatal=fatal) args = dict(zip(arg_keys.split(','), arg_vals.split(','))) @@ -1613,7 +1605,8 @@ class InfoExtractor: if val in ('undefined', 'void 0'): args[key] = 'null' - return self._parse_json(js_to_json(js, args), video_id)['data'][0] + ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal) + return traverse_obj(ret, traverse) or {} @staticmethod def _hidden_inputs(html): @@ -2166,7 +2159,7 @@ class InfoExtractor: ]), m3u8_doc) def format_url(url): - return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url) + return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url) if self.get_param('hls_split_discontinuity', False): def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None): @@ -2539,7 +2532,7 @@ class InfoExtractor: }) continue - src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src) + src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src) src_url = src_url.strip() if proto == 'm3u8' or src_ext == 'm3u8': @@ -2562,7 +2555,7 @@ class InfoExtractor: 'plugin': 'flowplayer-3.2.0.1', } f4m_url += '&' if '?' in f4m_url else '?' - f4m_url += compat_urllib_parse_urlencode(f4m_params) + f4m_url += urllib.parse.urlencode(f4m_params) formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)) elif src_ext == 'mpd': formats.extend(self._extract_mpd_formats( @@ -2803,13 +2796,18 @@ class InfoExtractor: mime_type = representation_attrib['mimeType'] content_type = representation_attrib.get('contentType', mime_type.split('/')[0]) - codecs = parse_codecs(representation_attrib.get('codecs', '')) + codec_str = representation_attrib.get('codecs', '') + # Some kind of binary subtitle found in some youtube livestreams + if mime_type == 'application/x-rawcc': + codecs = {'scodec': codec_str} + else: + codecs = parse_codecs(codec_str) if content_type not in ('video', 'audio', 'text'): if mime_type == 'image/jpeg': content_type = mime_type - elif codecs['vcodec'] != 'none': + elif codecs.get('vcodec', 'none') != 'none': content_type = 'video' - elif codecs['acodec'] != 'none': + elif codecs.get('acodec', 'none') != 'none': content_type = 'audio' elif codecs.get('scodec', 'none') != 'none': content_type = 'text' @@ -2827,7 +2825,7 @@ class InfoExtractor: if re.match(r'^https?://', base_url): break if mpd_base_url and base_url.startswith('/'): - base_url = compat_urlparse.urljoin(mpd_base_url, base_url) + base_url = urllib.parse.urljoin(mpd_base_url, base_url) elif mpd_base_url and not re.match(r'^https?://', base_url): if not mpd_base_url.endswith('/'): mpd_base_url += '/' @@ -3097,7 +3095,7 @@ class InfoExtractor: sampling_rate = int_or_none(track.get('SamplingRate')) track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern) - track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern) + track_url_pattern = urllib.parse.urljoin(ism_url, track_url_pattern) fragments = [] fragment_ctx = { @@ -3116,7 +3114,7 @@ class InfoExtractor: fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat for _ in range(fragment_repeat): fragments.append({ - 'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern), + 'url': re.sub(r'{start[ _]time}', str(fragment_ctx['time']), track_url_pattern), 'duration': fragment_ctx['duration'] / stream_timescale, }) fragment_ctx['time'] += fragment_ctx['duration'] @@ -3184,7 +3182,8 @@ class InfoExtractor: return f return {} - def _media_formats(src, cur_media_type, type_info={}): + def _media_formats(src, cur_media_type, type_info=None): + type_info = type_info or {} full_url = absolute_url(src) ext = type_info.get('ext') or determine_ext(full_url) if ext == 'm3u8': @@ -3202,6 +3201,7 @@ class InfoExtractor: formats = [{ 'url': full_url, 'vcodec': 'none' if cur_media_type == 'audio' else None, + 'ext': ext, }] return is_plain_url, formats @@ -3228,7 +3228,8 @@ class InfoExtractor: media_attributes = extract_attributes(media_tag) src = strip_or_none(media_attributes.get('src')) if src: - _, formats = _media_formats(src, media_type) + f = parse_content_type(media_attributes.get('type')) + _, formats = _media_formats(src, media_type, f) media_info['formats'].extend(formats) media_info['thumbnail'] = absolute_url(media_attributes.get('poster')) if media_content: @@ -3357,7 +3358,7 @@ class InfoExtractor: return formats, subtitles def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]): - query = compat_urlparse.urlparse(url).query + query = urllib.parse.urlparse(url).query url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url) mobj = re.search( r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url) @@ -3463,7 +3464,7 @@ class InfoExtractor: if not isinstance(track, dict): continue track_kind = track.get('kind') - if not track_kind or not isinstance(track_kind, compat_str): + if not track_kind or not isinstance(track_kind, str): continue if track_kind.lower() not in ('captions', 'subtitles'): continue @@ -3536,7 +3537,7 @@ class InfoExtractor: # Often no height is provided but there is a label in # format like "1080p", "720p SD", or 1080. height = int_or_none(self._search_regex( - r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''), + r'^(\d{3,4})[pP]?(?:\b|$)', str(source.get('label') or ''), 'height', default=None)) a_format = { 'url': source_url, @@ -3588,17 +3589,15 @@ class InfoExtractor: def _set_cookie(self, domain, name, value, expire_time=None, port=None, path='/', secure=False, discard=False, rest={}, **kwargs): - cookie = compat_cookiejar_Cookie( + cookie = http.cookiejar.Cookie( 0, name, value, port, port is not None, domain, True, domain.startswith('.'), path, True, secure, expire_time, discard, None, None, rest) - self._downloader.cookiejar.set_cookie(cookie) + self.cookiejar.set_cookie(cookie) def _get_cookies(self, url): - """ Return a compat_cookies_SimpleCookie with the cookies for the url """ - req = sanitized_Request(url) - self._downloader.cookiejar.add_cookie_header(req) - return compat_cookies_SimpleCookie(req.get_header('Cookie')) + """ Return a http.cookies.SimpleCookie with the cookies for the url """ + return http.cookies.SimpleCookie(self._downloader._calc_cookies(url)) def _apply_first_set_cookie_header(self, url_handle, cookie): """ @@ -3742,7 +3741,7 @@ class InfoExtractor: def _get_automatic_captions(self, *args, **kwargs): raise NotImplementedError('This method must be implemented by subclasses') - @property + @functools.cached_property def _cookies_passed(self): """Whether cookies have been passed to YoutubeDL""" return self.get_param('cookiefile') is not None or self.get_param('cookiesfrombrowser') is not None @@ -3764,10 +3763,10 @@ class InfoExtractor: return headers def _generic_id(self, url): - return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) + return urllib.parse.unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) def _generic_title(self, url): - return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]) + return urllib.parse.unquote(os.path.splitext(url_basename(url))[0]) @staticmethod def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None): diff --git a/yt_dlp/extractor/commonprotocols.py b/yt_dlp/extractor/commonprotocols.py index e8f19b9e0..2f93e8ea5 100644 --- a/yt_dlp/extractor/commonprotocols.py +++ b/yt_dlp/extractor/commonprotocols.py @@ -1,5 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import compat_urlparse class RtmpIE(InfoExtractor): @@ -23,7 +24,7 @@ class RtmpIE(InfoExtractor): 'formats': [{ 'url': url, 'ext': 'flv', - 'format_id': compat_urlparse.urlparse(url).scheme, + 'format_id': urllib.parse.urlparse(url).scheme, }], } diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index bb1dbbaad..6877e1a3f 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -1,19 +1,20 @@ import base64 -import re import json -import zlib - +import re +import urllib.request import xml.etree.ElementTree +import zlib from hashlib import sha1 -from math import pow, sqrt, floor +from math import floor, pow, sqrt + from .common import InfoExtractor from .vrv import VRVBaseIE +from ..aes import aes_cbc_decrypt from ..compat import ( compat_b64decode, compat_etree_fromstring, compat_str, compat_urllib_parse_urlencode, - compat_urllib_request, compat_urlparse, ) from ..utils import ( @@ -22,8 +23,8 @@ from ..utils import ( extract_attributes, float_or_none, format_field, - intlist_to_bytes, int_or_none, + intlist_to_bytes, join_nonempty, lowercase_escape, merge_dicts, @@ -34,9 +35,6 @@ from ..utils import ( try_get, xpath_text, ) -from ..aes import ( - aes_cbc_decrypt, -) class CrunchyrollBaseIE(InfoExtractor): @@ -259,7 +257,7 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVBaseIE): } def _download_webpage(self, url_or_request, *args, **kwargs): - request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) + request = (url_or_request if isinstance(url_or_request, urllib.request.Request) else sanitized_Request(url_or_request)) # Accept-Language must be set explicitly to accept any language to avoid issues # similar to https://github.com/ytdl-org/youtube-dl/issues/6797. @@ -728,11 +726,12 @@ class CrunchyrollBetaBaseIE(CrunchyrollBaseIE): headers={ 'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token'] }) - bucket = policy_response['cms']['bucket'] + cms = traverse_obj(policy_response, 'cms_beta', 'cms') + bucket = cms['bucket'] params = { - 'Policy': policy_response['cms']['policy'], - 'Signature': policy_response['cms']['signature'], - 'Key-Pair-Id': policy_response['cms']['key_pair_id'] + 'Policy': cms['policy'], + 'Signature': cms['signature'], + 'Key-Pair-Id': cms['key_pair_id'] } locale = traverse_obj(initial_state, ('localization', 'locale')) if locale: diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index 5b76b29ff..a105b6ce2 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -1,12 +1,8 @@ import re from .common import InfoExtractor -from ..utils import ( - int_or_none, - urlencode_postdata, - compat_str, - ExtractorError, -) +from ..compat import compat_str +from ..utils import ExtractorError, int_or_none, urlencode_postdata class CuriosityStreamBaseIE(InfoExtractor): @@ -23,6 +19,11 @@ class CuriosityStreamBaseIE(InfoExtractor): def _call_api(self, path, video_id, query=None): headers = {} + if not self._auth_token: + auth_cookie = self._get_cookies('https://curiositystream.com').get('auth_token') + if auth_cookie: + self.write_debug('Obtained auth_token cookie') + self._auth_token = auth_cookie.value if self._auth_token: headers['X-Auth-Token'] = self._auth_token result = self._download_json( @@ -45,7 +46,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): IE_NAME = 'curiositystream' _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)' _TESTS = [{ - 'url': 'https://app.curiositystream.com/video/2', + 'url': 'http://app.curiositystream.com/video/2', 'info_dict': { 'id': '2', 'ext': 'mp4', diff --git a/yt_dlp/extractor/cwtv.py b/yt_dlp/extractor/cwtv.py index 07239f39c..9b83264ee 100644 --- a/yt_dlp/extractor/cwtv.py +++ b/yt_dlp/extractor/cwtv.py @@ -91,4 +91,5 @@ class CWTVIE(InfoExtractor): 'timestamp': parse_iso8601(video_data.get('start_time')), 'age_limit': parse_age_limit(video_data.get('rating')), 'ie_key': 'ThePlatform', + 'thumbnail': video_data.get('large_thumbnail') } diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py index 3b090d5e0..46438891f 100644 --- a/yt_dlp/extractor/dailymotion.py +++ b/yt_dlp/extractor/dailymotion.py @@ -5,13 +5,15 @@ import re from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( + ExtractorError, + OnDemandPagedList, age_restricted, clean_html, - ExtractorError, int_or_none, - OnDemandPagedList, + traverse_obj, try_get, unescapeHTML, + unsmuggle_url, urlencode_postdata, ) @@ -220,6 +222,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): return urls def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url) video_id, playlist_id = self._match_valid_url(url).groups() if playlist_id: @@ -252,7 +255,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): metadata = self._download_json( 'https://www.dailymotion.com/player/metadata/video/' + xid, xid, 'Downloading metadata JSON', - query={'app': 'com.dailymotion.neon'}) + query=traverse_obj(smuggled_data, 'query') or {'app': 'com.dailymotion.neon'}) error = metadata.get('error') if error: diff --git a/yt_dlp/extractor/dailywire.py b/yt_dlp/extractor/dailywire.py new file mode 100644 index 000000000..1f27797ad --- /dev/null +++ b/yt_dlp/extractor/dailywire.py @@ -0,0 +1,114 @@ +from .common import InfoExtractor +from ..utils import ( + determine_ext, + float_or_none, + join_nonempty, + traverse_obj, + url_or_none, +) + + +class DailyWireBaseIE(InfoExtractor): + _JSON_PATH = { + 'episode': ('props', 'pageProps', 'episodeData', 'episode'), + 'videos': ('props', 'pageProps', 'videoData', 'video'), + 'podcasts': ('props', 'pageProps', 'episode'), + } + + def _get_json(self, url): + sites_type, slug = self._match_valid_url(url).group('sites_type', 'id') + json_data = self._search_nextjs_data(self._download_webpage(url, slug), slug) + return slug, traverse_obj(json_data, self._JSON_PATH[sites_type]) + + +class DailyWireIE(DailyWireBaseIE): + _VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>episode|videos)/(?P<id>[\w-]+)' + _TESTS = [{ + 'url': 'https://www.dailywire.com/episode/1-fauci', + 'info_dict': { + 'id': 'ckzsl50xnqpy30850in3v4bu7', + 'ext': 'mp4', + 'display_id': '1-fauci', + 'title': '1. Fauci', + 'description': 'md5:9df630347ef85081b7e97dd30bc22853', + 'thumbnail': 'https://daily-wire-production.imgix.net/episodes/ckzsl50xnqpy30850in3v4bu7/ckzsl50xnqpy30850in3v4bu7-1648237399554.jpg', + 'creator': 'Caroline Roberts', + 'series_id': 'ckzplm0a097fn0826r2vc3j7h', + 'series': 'China: The Enemy Within', + } + }, { + 'url': 'https://www.dailywire.com/episode/ep-124-bill-maher', + 'info_dict': { + 'id': 'cl0ngbaalplc80894sfdo9edf', + 'ext': 'mp3', + 'display_id': 'ep-124-bill-maher', + 'title': 'Ep. 124 - Bill Maher', + 'thumbnail': 'https://daily-wire-production.imgix.net/episodes/cl0ngbaalplc80894sfdo9edf/cl0ngbaalplc80894sfdo9edf-1647065568518.jpg', + 'creator': 'Caroline Roberts', + 'description': 'md5:adb0de584bcfa9c41374999d9e324e98', + 'series_id': 'cjzvep7270hp00786l9hwccob', + 'series': 'The Sunday Special', + } + }, { + 'url': 'https://www.dailywire.com/videos/the-hyperions', + 'only_matching': True, + }] + + def _real_extract(self, url): + slug, episode_info = self._get_json(url) + urls = traverse_obj( + episode_info, (('segments', 'videoUrl'), ..., ('video', 'audio')), expected_type=url_or_none) + + formats, subtitles = [], {} + for url in urls: + if determine_ext(url) != 'm3u8': + formats.append({'url': url}) + continue + format_, subs_ = self._extract_m3u8_formats_and_subtitles(url, slug) + formats.extend(format_) + self._merge_subtitles(subs_, target=subtitles) + self._sort_formats(formats) + return { + 'id': episode_info['id'], + 'display_id': slug, + 'title': traverse_obj(episode_info, 'title', 'name'), + 'description': episode_info.get('description'), + 'creator': join_nonempty(('createdBy', 'firstName'), ('createdBy', 'lastName'), from_dict=episode_info, delim=' '), + 'duration': float_or_none(episode_info.get('duration')), + 'is_live': episode_info.get('isLive'), + 'thumbnail': traverse_obj(episode_info, 'thumbnail', 'image', expected_type=url_or_none), + 'formats': formats, + 'subtitles': subtitles, + 'series_id': traverse_obj(episode_info, ('show', 'id')), + 'series': traverse_obj(episode_info, ('show', 'name')), + } + + +class DailyWirePodcastIE(DailyWireBaseIE): + _VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>podcasts)/(?P<podcaster>[\w-]+/(?P<id>[\w-]+))' + _TESTS = [{ + 'url': 'https://www.dailywire.com/podcasts/morning-wire/get-ready-for-recession-6-15-22', + 'info_dict': { + 'id': 'cl4f01d0w8pbe0a98ydd0cfn1', + 'ext': 'm4a', + 'display_id': 'get-ready-for-recession-6-15-22', + 'title': 'Get Ready for Recession | 6.15.22', + 'description': 'md5:c4afbadda4e1c38a4496f6d62be55634', + 'thumbnail': 'https://daily-wire-production.imgix.net/podcasts/ckx4otgd71jm508699tzb6hf4-1639506575562.jpg', + 'duration': 900.117667, + } + }] + + def _real_extract(self, url): + slug, episode_info = self._get_json(url) + audio_id = traverse_obj(episode_info, 'audioMuxPlaybackId', 'VUsAipTrBVSgzw73SpC2DAJD401TYYwEp') + + return { + 'id': episode_info['id'], + 'url': f'https://stream.media.dailywire.com/{audio_id}/audio.m4a', + 'display_id': slug, + 'title': episode_info.get('title'), + 'duration': float_or_none(episode_info.get('duration')), + 'thumbnail': episode_info.get('thumbnail'), + 'description': episode_info.get('description'), + } diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py index c891ad0a6..3813a51fe 100644 --- a/yt_dlp/extractor/digitalconcerthall.py +++ b/yt_dlp/extractor/digitalconcerthall.py @@ -86,7 +86,7 @@ class DigitalConcertHallIE(InfoExtractor): }) m3u8_url = traverse_obj( - stream_info, ('channel', lambda x: x.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False) + stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False) formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False) self._sort_formats(formats) diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py index 6ac0c713a..0d12513b2 100644 --- a/yt_dlp/extractor/dropbox.py +++ b/yt_dlp/extractor/dropbox.py @@ -53,8 +53,8 @@ class DropboxIE(InfoExtractor): else: raise ExtractorError('Password protected video, use --video-password <password>', expected=True) - json_string = self._html_search_regex(r'InitReact\.mountComponent\(.*?,\s*(\{.+\})\s*?\)', webpage, 'Info JSON') - info_json = self._parse_json(json_string, video_id).get('props') + info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id, + contains_pattern=r'.+?"preview".+?', end_pattern=r'\)')['props'] transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False) formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id) diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py index 475825eb8..e280b1c9f 100644 --- a/yt_dlp/extractor/dropout.py +++ b/yt_dlp/extractor/dropout.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from .vimeo import VHXEmbedIE from ..utils import ( - clean_html, ExtractorError, + clean_html, get_element_by_class, get_element_by_id, get_elements_by_class, @@ -96,11 +96,12 @@ class DropoutIE(InfoExtractor): def _login(self, display_id): username, password = self._get_login_info() - if not (username and password): - self.raise_login_required(method='password') + if not username: + return True response = self._download_webpage( - self._LOGIN_URL, display_id, note='Logging in', data=urlencode_postdata({ + self._LOGIN_URL, display_id, note='Logging in', fatal=False, + data=urlencode_postdata({ 'email': username, 'password': password, 'authenticity_token': self._get_authenticity_token(display_id), @@ -110,19 +111,25 @@ class DropoutIE(InfoExtractor): user_has_subscription = self._search_regex( r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none') if user_has_subscription.lower() == 'true': - return response + return elif user_has_subscription.lower() == 'false': - raise ExtractorError('Account is not subscribed') + return 'Account is not subscribed' else: - raise ExtractorError('Incorrect username/password') + return 'Incorrect username/password' def _real_extract(self, url): display_id = self._match_id(url) - try: - self._login(display_id) - webpage = self._download_webpage(url, display_id, note='Downloading video webpage') - finally: - self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out', fatal=False) + + webpage = None + if self._get_cookies('https://www.dropout.tv').get('_session'): + webpage = self._download_webpage(url, display_id) + if not webpage or '<div id="watch-unauthorized"' in webpage: + login_err = self._login(display_id) + webpage = self._download_webpage(url, display_id) + if login_err and '<div id="watch-unauthorized"' in webpage: + if login_err is True: + self.raise_login_required(method='any') + raise ExtractorError(login_err, expected=True) embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url') thumbnail = self._og_search_thumbnail(webpage) @@ -137,7 +144,7 @@ class DropoutIE(InfoExtractor): return { '_type': 'url_transparent', 'ie_key': VHXEmbedIE.ie_key(), - 'url': embed_url, + 'url': VHXEmbedIE._smuggle_referrer(embed_url, 'https://www.dropout.tv'), 'id': self._search_regex(r'embed\.vhx\.tv/videos/(.+?)\?', embed_url, 'id'), 'display_id': display_id, 'title': title, diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py index 24403842d..fb0546cae 100644 --- a/yt_dlp/extractor/duboku.py +++ b/yt_dlp/extractor/duboku.py @@ -51,31 +51,39 @@ def _get_element_by_tag_and_attrib(html, tag=None, attribute=None, value=None, e class DubokuIE(InfoExtractor): IE_NAME = 'duboku' - IE_DESC = 'www.duboku.co' + IE_DESC = 'www.duboku.io' - _VALID_URL = r'(?:https?://[^/]+\.duboku\.co/vodplay/)(?P<id>[0-9]+-[0-9-]+)\.html.*' + _VALID_URL = r'(?:https?://[^/]+\.duboku\.io/vodplay/)(?P<id>[0-9]+-[0-9-]+)\.html.*' _TESTS = [{ - 'url': 'https://www.duboku.co/vodplay/1575-1-1.html', + 'url': 'https://w.duboku.io/vodplay/1575-1-1.html', 'info_dict': { 'id': '1575-1-1', - 'ext': 'ts', + 'ext': 'mp4', 'series': '白色月光', 'title': 'contains:白色月光', 'season_number': 1, 'episode_number': 1, + 'season': 'Season 1', + 'episode_id': '1', + 'season_id': '1', + 'episode': 'Episode 1', }, 'params': { 'skip_download': 'm3u8 download', }, }, { - 'url': 'https://www.duboku.co/vodplay/1588-1-1.html', + 'url': 'https://w.duboku.io/vodplay/1588-1-1.html', 'info_dict': { 'id': '1588-1-1', - 'ext': 'ts', + 'ext': 'mp4', 'series': '亲爱的自己', - 'title': 'contains:预告片', + 'title': 'contains:第1集', 'season_number': 1, 'episode_number': 1, + 'episode': 'Episode 1', + 'season': 'Season 1', + 'episode_id': '1', + 'season_id': '1', }, 'params': { 'skip_download': 'm3u8 download', @@ -91,7 +99,7 @@ class DubokuIE(InfoExtractor): season_id = temp[1] episode_id = temp[2] - webpage_url = 'https://www.duboku.co/vodplay/%s.html' % video_id + webpage_url = 'https://w.duboku.io/vodplay/%s.html' % video_id webpage_html = self._download_webpage(webpage_url, video_id) # extract video url @@ -124,12 +132,13 @@ class DubokuIE(InfoExtractor): data_from = player_data.get('from') # if it is an embedded iframe, maybe it's an external source + headers = {'Referer': webpage_url} if data_from == 'iframe': # use _type url_transparent to retain the meaningful details # of the video. return { '_type': 'url_transparent', - 'url': smuggle_url(data_url, {'http_headers': {'Referer': webpage_url}}), + 'url': smuggle_url(data_url, {'http_headers': headers}), 'id': video_id, 'title': title, 'series': series_title, @@ -139,7 +148,7 @@ class DubokuIE(InfoExtractor): 'episode_id': episode_id, } - formats = self._extract_m3u8_formats(data_url, video_id, 'mp4') + formats = self._extract_m3u8_formats(data_url, video_id, 'mp4', headers=headers) return { 'id': video_id, @@ -150,36 +159,29 @@ class DubokuIE(InfoExtractor): 'episode_number': int_or_none(episode_id), 'episode_id': episode_id, 'formats': formats, - 'http_headers': {'Referer': 'https://www.duboku.co/static/player/videojs.html'} + 'http_headers': headers } class DubokuPlaylistIE(InfoExtractor): IE_NAME = 'duboku:list' - IE_DESC = 'www.duboku.co entire series' + IE_DESC = 'www.duboku.io entire series' - _VALID_URL = r'(?:https?://[^/]+\.duboku\.co/voddetail/)(?P<id>[0-9]+)\.html.*' + _VALID_URL = r'(?:https?://[^/]+\.duboku\.io/voddetail/)(?P<id>[0-9]+)\.html.*' _TESTS = [{ - 'url': 'https://www.duboku.co/voddetail/1575.html', + 'url': 'https://w.duboku.io/voddetail/1575.html', 'info_dict': { 'id': 'startswith:1575', 'title': '白色月光', }, 'playlist_count': 12, }, { - 'url': 'https://www.duboku.co/voddetail/1554.html', + 'url': 'https://w.duboku.io/voddetail/1554.html', 'info_dict': { 'id': 'startswith:1554', 'title': '以家人之名', }, 'playlist_mincount': 30, - }, { - 'url': 'https://www.duboku.co/voddetail/1554.html#playlist2', - 'info_dict': { - 'id': '1554#playlist2', - 'title': '以家人之名', - }, - 'playlist_mincount': 27, }] def _real_extract(self, url): @@ -189,7 +191,7 @@ class DubokuPlaylistIE(InfoExtractor): series_id = mobj.group('id') fragment = compat_urlparse.urlparse(url).fragment - webpage_url = 'https://www.duboku.co/voddetail/%s.html' % series_id + webpage_url = 'https://w.duboku.io/voddetail/%s.html' % series_id webpage_html = self._download_webpage(webpage_url, series_id) # extract title @@ -234,6 +236,6 @@ class DubokuPlaylistIE(InfoExtractor): # return url results return self.playlist_result([ self.url_result( - compat_urlparse.urljoin('https://www.duboku.co', x['href']), + compat_urlparse.urljoin('https://w.duboku.io', x['href']), ie=DubokuIE.ie_key(), video_title=x.get('title')) for x in playlist], series_id + '#' + playlist_id, title) diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py index 507f0a5c1..276543653 100644 --- a/yt_dlp/extractor/ertgr.py +++ b/yt_dlp/extractor/ertgr.py @@ -119,7 +119,7 @@ class ERTFlixCodenameIE(ERTFlixBaseIE): class ERTFlixIE(ERTFlixBaseIE): IE_NAME = 'ertflix' IE_DESC = 'ERTFLIX videos' - _VALID_URL = r'https?://www\.ertflix\.gr/(?:series|vod)/(?P<id>[a-z]{3}\.\d+)' + _VALID_URL = r'https?://www\.ertflix\.gr/(?:[^/]+/)?(?:series|vod)/(?P<id>[a-z]{3}\.\d+)' _TESTS = [{ 'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates', 'md5': '6479d5e60fd7e520b07ba5411dcdd6e7', @@ -171,6 +171,9 @@ class ERTFlixIE(ERTFlixBaseIE): 'title': 'Το δίκτυο', }, 'playlist_mincount': 9, + }, { + 'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari', + 'only_matching': True, }] def _extract_episode(self, episode): diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py index 8fad70e6b..451148636 100644 --- a/yt_dlp/extractor/espn.py +++ b/yt_dlp/extractor/espn.py @@ -1,8 +1,11 @@ +import base64 +import json import re +import urllib.parse +from .adobepass import AdobePassIE from .common import InfoExtractor from .once import OnceIE -from ..compat import compat_str from ..utils import ( determine_ext, dict_get, @@ -24,7 +27,6 @@ class ESPNIE(OnceIE): (?: (?: video/(?:clip|iframe/twitter)| - watch/player ) (?: .*?\?.*?\bid=| @@ -47,6 +49,8 @@ class ESPNIE(OnceIE): 'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f', 'timestamp': 1390936111, 'upload_date': '20140128', + 'duration': 1302, + 'thumbnail': r're:https://.+\.jpg', }, 'params': { 'skip_download': True, @@ -72,15 +76,6 @@ class ESPNIE(OnceIE): 'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774', 'only_matching': True, }, { - 'url': 'http://www.espn.com/watch/player?id=19141491', - 'only_matching': True, - }, { - 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875', - 'only_matching': True, - }, { - 'url': 'http://www.espn.com/watch/player/_/id/19141491', - 'only_matching': True, - }, { 'url': 'http://www.espn.com/video/clip?id=10365079', 'only_matching': True, }, { @@ -98,7 +93,13 @@ class ESPNIE(OnceIE): }, { 'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings', 'only_matching': True, - }] + }, { + 'url': 'http://www.espn.com/watch/player?id=19141491', + 'only_matching': True, + }, { + 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875', + 'only_matching': True, + }, ] def _real_extract(self, url): video_id = self._match_id(url) @@ -116,7 +117,7 @@ class ESPNIE(OnceIE): for source_id, source in source.items(): if source_id == 'alert': continue - elif isinstance(source, compat_str): + elif isinstance(source, str): extract_source(source, base_source_id) elif isinstance(source, dict): traverse_source( @@ -196,7 +197,7 @@ class ESPNArticleIE(InfoExtractor): @classmethod def suitable(cls, url): - return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url) + return False if (ESPNIE.suitable(url) or WatchESPNIE.suitable(url)) else super().suitable(url) def _real_extract(self, url): video_id = self._match_id(url) @@ -277,3 +278,119 @@ class ESPNCricInfoIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, } + + +class WatchESPNIE(AdobePassIE): + _VALID_URL = r'https://www.espn.com/watch/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})' + _TESTS = [{ + 'url': 'https://www.espn.com/watch/player/_/id/ba7d17da-453b-4697-bf92-76a99f61642b', + 'info_dict': { + 'id': 'ba7d17da-453b-4697-bf92-76a99f61642b', + 'ext': 'mp4', + 'title': 'Serbia vs. Turkey', + 'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/ba7d17da-453b-4697-bf92-76a99f61642b/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.espn.com/watch/player/_/id/4e9b5bd1-4ceb-4482-9d28-1dd5f30d2f34', + 'info_dict': { + 'id': '4e9b5bd1-4ceb-4482-9d28-1dd5f30d2f34', + 'ext': 'mp4', + 'title': 'Real Madrid vs. Real Betis (LaLiga)', + 'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS', + }, + 'params': { + 'skip_download': True, + }, + }] + + _API_KEY = 'ZXNwbiZicm93c2VyJjEuMC4w.ptUt7QxsteaRruuPmGZFaJByOoqKvDP2a5YkInHrc7c' + + def _call_bamgrid_api(self, path, video_id, payload=None, headers={}): + if 'Authorization' not in headers: + headers['Authorization'] = f'Bearer {self._API_KEY}' + parse = urllib.parse.urlencode if path == 'token' else json.dumps + return self._download_json( + f'https://espn.api.edge.bamgrid.com/{path}', video_id, headers=headers, data=parse(payload).encode()) + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + f'https://watch-cdn.product.api.espn.com/api/product/v3/watchespn/web/playback/event?id={video_id}', + video_id)['playbackState'] + + # ESPN+ subscription required, through cookies + if 'DTC' in video_data.get('sourceId'): + cookie = self._get_cookies(url).get('ESPN-ONESITE.WEB-PROD.token') + if not cookie: + self.raise_login_required(method='cookies') + + assertion = self._call_bamgrid_api( + 'devices', video_id, + headers={'Content-Type': 'application/json; charset=UTF-8'}, + payload={ + 'deviceFamily': 'android', + 'applicationRuntime': 'android', + 'deviceProfile': 'tv', + 'attributes': {}, + })['assertion'] + token = self._call_bamgrid_api( + 'token', video_id, payload={ + 'subject_token': assertion, + 'subject_token_type': 'urn:bamtech:params:oauth:token-type:device', + 'platform': 'android', + 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange' + })['access_token'] + + assertion = self._call_bamgrid_api( + 'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]}, + headers={ + 'Authorization': token, + 'Content-Type': 'application/json; charset=UTF-8' + })['assertion'] + token = self._call_bamgrid_api( + 'token', video_id, payload={ + 'subject_token': assertion, + 'subject_token_type': 'urn:bamtech:params:oauth:token-type:account', + 'platform': 'android', + 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange' + })['access_token'] + + playback = self._download_json( + video_data['videoHref'].format(scenario='browser~ssai'), video_id, + headers={ + 'Accept': 'application/vnd.media-service+json; version=5', + 'Authorization': token + }) + m3u8_url, headers = playback['stream']['complete'][0]['url'], {'authorization': token} + + # No login required + elif video_data.get('sourceId') == 'ESPN_FREE': + asset = self._download_json( + f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb', + video_id) + m3u8_url, headers = asset['stream'], {} + + # TV Provider required + else: + resource = self._get_mvpd_resource('ESPN', video_data['name'], video_id, None) + auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource).encode() + + asset = self._download_json( + f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb', + video_id, data=f'adobeToken={urllib.parse.quote_plus(base64.b64encode(auth))}&drmSupport=HLS'.encode()) + m3u8_url, headers = asset['stream'], {} + + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_data.get('name'), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnail': video_data.get('posterHref'), + 'http_headers': headers, + } diff --git a/yt_dlp/extractor/expressen.py b/yt_dlp/extractor/expressen.py index a1b8e9bc9..5aba21ba7 100644 --- a/yt_dlp/extractor/expressen.py +++ b/yt_dlp/extractor/expressen.py @@ -19,9 +19,10 @@ class ExpressenIE(InfoExtractor): ''' _TESTS = [{ 'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/', - 'md5': '2fbbe3ca14392a6b1b36941858d33a45', + 'md5': 'deb2ca62e7b1dcd19fa18ba37523f66e', 'info_dict': { - 'id': '8690962', + 'id': 'ba90f5a9-78d1-4511-aa02-c177b9c99136', + 'display_id': 'ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden', 'ext': 'mp4', 'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden', 'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba', @@ -64,7 +65,7 @@ class ExpressenIE(InfoExtractor): display_id, transform_source=unescapeHTML) info = extract_data('video-tracking-info') - video_id = info['videoId'] + video_id = info['contentId'] data = extract_data('article-data') stream = data['stream'] diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 9c5a5f482..32818a024 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1,2175 +1,23 @@ -# flake8: noqa: F401 +import contextlib +import os -from .abc import ( - ABCIE, - ABCIViewIE, - ABCIViewShowSeriesIE, -) -from .abcnews import ( - AbcNewsIE, - AbcNewsVideoIE, -) -from .abcotvs import ( - ABCOTVSIE, - ABCOTVSClipsIE, -) -from .abematv import ( - AbemaTVIE, - AbemaTVTitleIE, -) -from .academicearth import AcademicEarthCourseIE -from .acast import ( - ACastIE, - ACastChannelIE, -) -from .adn import ADNIE -from .adobeconnect import AdobeConnectIE -from .adobetv import ( - AdobeTVEmbedIE, - AdobeTVIE, - AdobeTVShowIE, - AdobeTVChannelIE, - AdobeTVVideoIE, -) -from .adultswim import AdultSwimIE -from .aenetworks import ( - AENetworksIE, - AENetworksCollectionIE, - AENetworksShowIE, - HistoryTopicIE, - HistoryPlayerIE, - BiographyIE, -) -from .afreecatv import ( - AfreecaTVIE, - AfreecaTVLiveIE, - AfreecaTVUserIE, -) -from .airmozilla import AirMozillaIE -from .aljazeera import AlJazeeraIE -from .alphaporno import AlphaPornoIE -from .amara import AmaraIE -from .alura import ( - AluraIE, - AluraCourseIE -) -from .amcnetworks import AMCNetworksIE -from .animelab import ( - AnimeLabIE, - AnimeLabShowsIE, -) -from .amazon import AmazonStoreIE -from .americastestkitchen import ( - AmericasTestKitchenIE, - AmericasTestKitchenSeasonIE, -) -from .animeondemand import AnimeOnDemandIE -from .anvato import AnvatoIE -from .aol import AolIE -from .allocine import AllocineIE -from .aliexpress import AliExpressLiveIE -from .alsace20tv import ( - Alsace20TVIE, - Alsace20TVEmbedIE, -) -from .apa import APAIE -from .aparat import AparatIE -from .appleconnect import AppleConnectIE -from .appletrailers import ( - AppleTrailersIE, - AppleTrailersSectionIE, -) -from .applepodcasts import ApplePodcastsIE -from .archiveorg import ( - ArchiveOrgIE, - YoutubeWebArchiveIE, -) -from .arcpublishing import ArcPublishingIE -from .arkena import ArkenaIE -from .ard import ( - ARDBetaMediathekIE, - ARDIE, - ARDMediathekIE, -) -from .arte import ( - ArteTVIE, - ArteTVEmbedIE, - ArteTVPlaylistIE, - ArteTVCategoryIE, -) -from .arnes import ArnesIE -from .asiancrush import ( - AsianCrushIE, - AsianCrushPlaylistIE, -) -from .atresplayer import AtresPlayerIE -from .atttechchannel import ATTTechChannelIE -from .atvat import ATVAtIE -from .audimedia import AudiMediaIE -from .audioboom import AudioBoomIE -from .audiomack import AudiomackIE, AudiomackAlbumIE -from .audius import ( - AudiusIE, - AudiusTrackIE, - AudiusPlaylistIE, - AudiusProfileIE, -) -from .awaan import ( - AWAANIE, - AWAANVideoIE, - AWAANLiveIE, - AWAANSeasonIE, -) -from .azmedien import AZMedienIE -from .baidu import BaiduVideoIE -from .banbye import ( - BanByeIE, - BanByeChannelIE, -) -from .bandaichannel import BandaiChannelIE -from .bandcamp import ( - BandcampIE, - BandcampAlbumIE, - BandcampWeeklyIE, - BandcampUserIE, -) -from .bannedvideo import BannedVideoIE -from .bbc import ( - BBCCoUkIE, - BBCCoUkArticleIE, - BBCCoUkIPlayerEpisodesIE, - BBCCoUkIPlayerGroupIE, - BBCCoUkPlaylistIE, - BBCIE, -) -from .beeg import BeegIE -from .behindkink import BehindKinkIE -from .bellmedia import BellMediaIE -from .beatport import BeatportIE -from .bet import BetIE -from .bfi import BFIPlayerIE -from .bfmtv import ( - BFMTVIE, - BFMTVLiveIE, - BFMTVArticleIE, -) -from .bibeltv import BibelTVIE -from .bigflix import BigflixIE -from .bigo import BigoIE -from .bild import BildIE -from .bilibili import ( - BiliBiliIE, - BiliBiliSearchIE, - BilibiliCategoryIE, - BiliBiliBangumiIE, - BilibiliAudioIE, - BilibiliAudioAlbumIE, - BiliBiliPlayerIE, - BilibiliChannelIE, - BiliIntlIE, - BiliIntlSeriesIE, - BiliLiveIE, -) -from .biobiochiletv import BioBioChileTVIE -from .bitchute import ( - BitChuteIE, - BitChuteChannelIE, -) -from .bitwave import ( - BitwaveReplayIE, - BitwaveStreamIE, -) -from .biqle import BIQLEIE -from .blackboardcollaborate import BlackboardCollaborateIE -from .bleacherreport import ( - BleacherReportIE, - BleacherReportCMSIE, -) -from .blogger import BloggerIE -from .bloomberg import BloombergIE -from .bokecc import BokeCCIE -from .bongacams import BongaCamsIE -from .bostonglobe import BostonGlobeIE -from .box import BoxIE -from .bpb import BpbIE -from .br import ( - BRIE, - BRMediathekIE, -) -from .bravotv import BravoTVIE -from .breakcom import BreakIE -from .breitbart import BreitBartIE -from .brightcove import ( - BrightcoveLegacyIE, - BrightcoveNewIE, -) -from .businessinsider import BusinessInsiderIE -from .buzzfeed import BuzzFeedIE -from .byutv import BYUtvIE -from .c56 import C56IE -from .cableav import CableAVIE -from .callin import CallinIE -from .caltrans import CaltransIE -from .cam4 import CAM4IE -from .camdemy import ( - CamdemyIE, - CamdemyFolderIE -) -from .cammodels import CamModelsIE -from .camwithher import CamWithHerIE -from .canalalpha import CanalAlphaIE -from .canalplus import CanalplusIE -from .canalc2 import Canalc2IE -from .canvas import ( - CanvasIE, - CanvasEenIE, - VrtNUIE, - DagelijkseKostIE, -) -from .carambatv import ( - CarambaTVIE, - CarambaTVPageIE, -) -from .cartoonnetwork import CartoonNetworkIE -from .cbc import ( - CBCIE, - CBCPlayerIE, - CBCGemIE, - CBCGemPlaylistIE, - CBCGemLiveIE, -) -from .cbs import CBSIE -from .cbslocal import ( - CBSLocalIE, - CBSLocalArticleIE, -) -from .cbsinteractive import CBSInteractiveIE -from .cbsnews import ( - CBSNewsEmbedIE, - CBSNewsIE, - CBSNewsLiveVideoIE, -) -from .cbssports import ( - CBSSportsEmbedIE, - CBSSportsIE, - TwentyFourSevenSportsIE, -) -from .ccc import ( - CCCIE, - CCCPlaylistIE, -) -from .ccma import CCMAIE -from .cctv import CCTVIE -from .cda import CDAIE -from .ceskatelevize import CeskaTelevizeIE -from .cgtn import CGTNIE -from .channel9 import Channel9IE -from .charlierose import CharlieRoseIE -from .chaturbate import ChaturbateIE -from .chilloutzone import ChilloutzoneIE -from .chingari import ( - ChingariIE, - ChingariUserIE, -) -from .chirbit import ( - ChirbitIE, - ChirbitProfileIE, -) -from .cinchcast import CinchcastIE -from .cinemax import CinemaxIE -from .ciscolive import ( - CiscoLiveSessionIE, - CiscoLiveSearchIE, -) -from .ciscowebex import CiscoWebexIE -from .cjsw import CJSWIE -from .cliphunter import CliphunterIE -from .clippit import ClippitIE -from .cliprs import ClipRsIE -from .clipsyndicate import ClipsyndicateIE -from .closertotruth import CloserToTruthIE -from .cloudflarestream import CloudflareStreamIE -from .cloudy import CloudyIE -from .clubic import ClubicIE -from .clyp import ClypIE -from .cmt import CMTIE -from .cnbc import ( - CNBCIE, - CNBCVideoIE, -) -from .cnn import ( - CNNIE, - CNNBlogsIE, - CNNArticleIE, -) -from .coub import CoubIE -from .comedycentral import ( - ComedyCentralIE, - ComedyCentralTVIE, -) -from .commonmistakes import CommonMistakesIE, UnicodeBOMIE -from .commonprotocols import ( - MmsIE, - RtmpIE, - ViewSourceIE, -) -from .condenast import CondeNastIE -from .contv import CONtvIE -from .corus import CorusIE -from .cpac import ( - CPACIE, - CPACPlaylistIE, -) -from .cozytv import CozyTVIE -from .cracked import CrackedIE -from .crackle import CrackleIE -from .craftsy import CraftsyIE -from .crooksandliars import CrooksAndLiarsIE -from .crowdbunker import ( - CrowdBunkerIE, - CrowdBunkerChannelIE, -) -from .crunchyroll import ( - CrunchyrollIE, - CrunchyrollShowPlaylistIE, - CrunchyrollBetaIE, - CrunchyrollBetaShowIE, -) -from .cspan import CSpanIE, CSpanCongressIE -from .ctsnews import CtsNewsIE -from .ctv import CTVIE -from .ctvnews import CTVNewsIE -from .cultureunplugged import CultureUnpluggedIE -from .curiositystream import ( - CuriosityStreamIE, - CuriosityStreamCollectionsIE, - CuriosityStreamSeriesIE, -) -from .cwtv import CWTVIE -from .cybrary import ( - CybraryIE, - CybraryCourseIE -) -from .daftsex import DaftsexIE -from .dailymail import DailyMailIE -from .dailymotion import ( - DailymotionIE, - DailymotionPlaylistIE, - DailymotionUserIE, -) -from .damtomo import ( - DamtomoRecordIE, - DamtomoVideoIE, -) -from .daum import ( - DaumIE, - DaumClipIE, - DaumPlaylistIE, - DaumUserIE, -) -from .daystar import DaystarClipIE -from .dbtv import DBTVIE -from .dctp import DctpTvIE -from .deezer import ( - DeezerPlaylistIE, - DeezerAlbumIE, -) -from .democracynow import DemocracynowIE -from .dfb import DFBIE -from .dhm import DHMIE -from .digg import DiggIE -from .dotsub import DotsubIE -from .douyutv import ( - DouyuShowIE, - DouyuTVIE, -) -from .dplay import ( - DPlayIE, - DiscoveryPlusIE, - HGTVDeIE, - GoDiscoveryIE, - TravelChannelIE, - CookingChannelIE, - HGTVUsaIE, - FoodNetworkIE, - InvestigationDiscoveryIE, - DestinationAmericaIE, - AmHistoryChannelIE, - ScienceChannelIE, - DIYNetworkIE, - DiscoveryLifeIE, - AnimalPlanetIE, - TLCIE, - DiscoveryPlusIndiaIE, - DiscoveryNetworksDeIE, - DiscoveryPlusItalyIE, - DiscoveryPlusItalyShowIE, - DiscoveryPlusIndiaShowIE, -) -from .dreisat import DreiSatIE -from .drbonanza import DRBonanzaIE -from .drtuber import DrTuberIE -from .drtv import ( - DRTVIE, - DRTVLiveIE, -) -from .dtube import DTubeIE -from .dvtv import DVTVIE -from .duboku import ( - DubokuIE, - DubokuPlaylistIE -) -from .dumpert import DumpertIE -from .defense import DefenseGouvFrIE -from .digitalconcerthall import DigitalConcertHallIE -from .discovery import DiscoveryIE -from .disney import DisneyIE -from .dispeak import DigitallySpeakingIE -from .doodstream import DoodStreamIE -from .dropbox import DropboxIE -from .dropout import ( - DropoutSeasonIE, - DropoutIE -) -from .dw import ( - DWIE, - DWArticleIE, -) -from .eagleplatform import EaglePlatformIE -from .ebaumsworld import EbaumsWorldIE -from .echomsk import EchoMskIE -from .egghead import ( - EggheadCourseIE, - EggheadLessonIE, -) -from .ehow import EHowIE -from .eighttracks import EightTracksIE -from .einthusan import EinthusanIE -from .eitb import EitbIE -from .ellentube import ( - EllenTubeIE, - EllenTubeVideoIE, - EllenTubePlaylistIE, -) -from .elonet import ElonetIE -from .elpais import ElPaisIE -from .embedly import EmbedlyIE -from .engadget import EngadgetIE -from .epicon import ( - EpiconIE, - EpiconSeriesIE, -) -from .eporner import EpornerIE -from .eroprofile import ( - EroProfileIE, - EroProfileAlbumIE, -) -from .ertgr import ( - ERTFlixCodenameIE, - ERTFlixIE, - ERTWebtvEmbedIE, -) -from .escapist import EscapistIE -from .espn import ( - ESPNIE, - ESPNArticleIE, - FiveThirtyEightIE, - ESPNCricInfoIE, -) -from .esri import EsriVideoIE -from .europa import EuropaIE -from .europeantour import EuropeanTourIE -from .euscreen import EUScreenIE -from .expotv import ExpoTVIE -from .expressen import ExpressenIE -from .extremetube import ExtremeTubeIE -from .eyedotv import EyedoTVIE -from .facebook import ( - FacebookIE, - FacebookPluginsVideoIE, - FacebookRedirectURLIE, -) -from .fancode import ( - FancodeVodIE, - FancodeLiveIE -) +from ..utils import load_plugins -from .faz import FazIE -from .fc2 import ( - FC2IE, - FC2EmbedIE, - FC2LiveIE, -) -from .fczenit import FczenitIE -from .fifa import FifaIE -from .filmmodu import FilmmoduIE -from .filmon import ( - FilmOnIE, - FilmOnChannelIE, -) -from .filmweb import FilmwebIE -from .firsttv import FirstTVIE -from .fivetv import FiveTVIE -from .flickr import FlickrIE -from .folketinget import FolketingetIE -from .footyroom import FootyRoomIE -from .formula1 import Formula1IE -from .fourtube import ( - FourTubeIE, - PornTubeIE, - PornerBrosIE, - FuxIE, -) -from .fox import FOXIE -from .fox9 import ( - FOX9IE, - FOX9NewsIE, -) -from .foxgay import FoxgayIE -from .foxnews import ( - FoxNewsIE, - FoxNewsArticleIE, -) -from .foxsports import FoxSportsIE -from .fptplay import FptplayIE -from .franceculture import FranceCultureIE -from .franceinter import FranceInterIE -from .francetv import ( - FranceTVIE, - FranceTVSiteIE, - FranceTVInfoIE, -) -from .freesound import FreesoundIE -from .freespeech import FreespeechIE -from .frontendmasters import ( - FrontendMastersIE, - FrontendMastersLessonIE, - FrontendMastersCourseIE -) -from .fujitv import FujiTVFODPlus7IE -from .funimation import ( - FunimationIE, - FunimationPageIE, - FunimationShowIE, -) -from .funk import FunkIE -from .fusion import FusionIE -from .gab import ( - GabTVIE, - GabIE, -) -from .gaia import GaiaIE -from .gameinformer import GameInformerIE -from .gamejolt import ( - GameJoltIE, - GameJoltUserIE, - GameJoltGameIE, - GameJoltGameSoundtrackIE, - GameJoltCommunityIE, - GameJoltSearchIE, -) -from .gamespot import GameSpotIE -from .gamestar import GameStarIE -from .gaskrank import GaskrankIE -from .gazeta import GazetaIE -from .gdcvault import GDCVaultIE -from .gedidigital import GediDigitalIE -from .generic import GenericIE -from .gettr import ( - GettrIE, - GettrStreamingIE, -) -from .gfycat import GfycatIE -from .giantbomb import GiantBombIE -from .giga import GigaIE -from .glide import GlideIE -from .globo import ( - GloboIE, - GloboArticleIE, -) -from .go import GoIE -from .godtube import GodTubeIE -from .gofile import GofileIE -from .golem import GolemIE -from .goodgame import GoodGameIE -from .googledrive import GoogleDriveIE -from .googlepodcasts import ( - GooglePodcastsIE, - GooglePodcastsFeedIE, -) -from .googlesearch import GoogleSearchIE -from .gopro import GoProIE -from .goshgay import GoshgayIE -from .gotostage import GoToStageIE -from .gputechconf import GPUTechConfIE -from .gronkh import ( - GronkhIE, - GronkhFeedIE, - GronkhVodsIE -) -from .groupon import GrouponIE -from .hbo import HBOIE -from .hearthisat import HearThisAtIE -from .heise import HeiseIE -from .hellporno import HellPornoIE -from .helsinki import HelsinkiIE -from .hentaistigma import HentaiStigmaIE -from .hgtv import HGTVComShowIE -from .hketv import HKETVIE -from .hidive import HiDiveIE -from .historicfilms import HistoricFilmsIE -from .hitbox import HitboxIE, HitboxLiveIE -from .hitrecord import HitRecordIE -from .hotnewhiphop import HotNewHipHopIE -from .hotstar import ( - HotStarIE, - HotStarPrefixIE, - HotStarPlaylistIE, - HotStarSeriesIE, -) -from .howcast import HowcastIE -from .howstuffworks import HowStuffWorksIE -from .hrfensehen import HRFernsehenIE -from .hrti import ( - HRTiIE, - HRTiPlaylistIE, -) -from .hse import ( - HSEShowIE, - HSEProductIE, -) -from .huajiao import HuajiaoIE -from .huya import HuyaLiveIE -from .huffpost import HuffPostIE -from .hungama import ( - HungamaIE, - HungamaSongIE, - HungamaAlbumPlaylistIE, -) -from .hypem import HypemIE -from .icareus import IcareusIE -from .ichinanalive import ( - IchinanaLiveIE, - IchinanaLiveClipIE, -) -from .ign import ( - IGNIE, - IGNVideoIE, - IGNArticleIE, -) -from .iheart import ( - IHeartRadioIE, - IHeartRadioPodcastIE, -) -from .imdb import ( - ImdbIE, - ImdbListIE -) -from .imgur import ( - ImgurIE, - ImgurAlbumIE, - ImgurGalleryIE, -) -from .ina import InaIE -from .inc import IncIE -from .indavideo import IndavideoEmbedIE -from .infoq import InfoQIE -from .instagram import ( - InstagramIE, - InstagramIOSIE, - InstagramUserIE, - InstagramTagIE, - InstagramStoryIE, -) -from .internazionale import InternazionaleIE -from .internetvideoarchive import InternetVideoArchiveIE -from .iprima import ( - IPrimaIE, - IPrimaCNNIE -) -from .iqiyi import ( - IqiyiIE, - IqIE, - IqAlbumIE -) +_LAZY_LOADER = False +if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): + with contextlib.suppress(ImportError): + from .lazy_extractors import * # noqa: F403 + from .lazy_extractors import _ALL_CLASSES + _LAZY_LOADER = True -from .itprotv import ( - ITProTVIE, - ITProTVCourseIE -) +if not _LAZY_LOADER: + from ._extractors import * # noqa: F403 + _ALL_CLASSES = [ # noqa: F811 + klass + for name, klass in globals().items() + if name.endswith('IE') and name != 'GenericIE' + ] + _ALL_CLASSES.append(GenericIE) # noqa: F405 -from .itv import ( - ITVIE, - ITVBTCCIE, -) -from .ivi import ( - IviIE, - IviCompilationIE -) -from .ivideon import IvideonIE -from .iwara import ( - IwaraIE, - IwaraPlaylistIE, - IwaraUserIE, -) -from .izlesene import IzleseneIE -from .jable import ( - JableIE, - JablePlaylistIE, -) -from .jamendo import ( - JamendoIE, - JamendoAlbumIE, -) -from .jeuxvideo import JeuxVideoIE -from .jove import JoveIE -from .joj import JojIE -from .jwplatform import JWPlatformIE -from .kakao import KakaoIE -from .kaltura import KalturaIE -from .karaoketv import KaraoketvIE -from .karrierevideos import KarriereVideosIE -from .keezmovies import KeezMoviesIE -from .kelbyone import KelbyOneIE -from .ketnet import KetnetIE -from .khanacademy import ( - KhanAcademyIE, - KhanAcademyUnitIE, -) -from .kickstarter import KickStarterIE -from .kinja import KinjaEmbedIE -from .kinopoisk import KinoPoiskIE -from .konserthusetplay import KonserthusetPlayIE -from .koo import KooIE -from .krasview import KrasViewIE -from .ku6 import Ku6IE -from .kusi import KUSIIE -from .kuwo import ( - KuwoIE, - KuwoAlbumIE, - KuwoChartIE, - KuwoSingerIE, - KuwoCategoryIE, - KuwoMvIE, -) -from .la7 import ( - LA7IE, - LA7PodcastEpisodeIE, - LA7PodcastIE, -) -from .laola1tv import ( - Laola1TvEmbedIE, - Laola1TvIE, - EHFTVIE, - ITTFIE, -) -from .lastfm import ( - LastFMIE, - LastFMPlaylistIE, - LastFMUserIE, -) -from .lbry import ( - LBRYIE, - LBRYChannelIE, -) -from .lci import LCIIE -from .lcp import ( - LcpPlayIE, - LcpIE, -) -from .lecture2go import Lecture2GoIE -from .lecturio import ( - LecturioIE, - LecturioCourseIE, - LecturioDeCourseIE, -) -from .leeco import ( - LeIE, - LePlaylistIE, - LetvCloudIE, -) -from .lego import LEGOIE -from .lemonde import LemondeIE -from .lenta import LentaIE -from .libraryofcongress import LibraryOfCongressIE -from .libsyn import LibsynIE -from .lifenews import ( - LifeNewsIE, - LifeEmbedIE, -) -from .likee import ( - LikeeIE, - LikeeUserIE -) -from .limelight import ( - LimelightMediaIE, - LimelightChannelIE, - LimelightChannelListIE, -) -from .line import ( - LineLiveIE, - LineLiveChannelIE, -) -from .linkedin import ( - LinkedInIE, - LinkedInLearningIE, - LinkedInLearningCourseIE, -) -from .linuxacademy import LinuxAcademyIE -from .litv import LiTVIE -from .livejournal import LiveJournalIE -from .livestream import ( - LivestreamIE, - LivestreamOriginalIE, - LivestreamShortenerIE, -) -from .lnkgo import ( - LnkGoIE, - LnkIE, -) -from .localnews8 import LocalNews8IE -from .lovehomeporn import LoveHomePornIE -from .lrt import ( - LRTVODIE, - LRTStreamIE -) -from .lynda import ( - LyndaIE, - LyndaCourseIE -) -from .m6 import M6IE -from .magentamusik360 import MagentaMusik360IE -from .mailru import ( - MailRuIE, - MailRuMusicIE, - MailRuMusicSearchIE, -) -from .mainstreaming import MainStreamingIE -from .malltv import MallTVIE -from .mangomolo import ( - MangomoloVideoIE, - MangomoloLiveIE, -) -from .manoto import ( - ManotoTVIE, - ManotoTVShowIE, - ManotoTVLiveIE, -) -from .manyvids import ManyVidsIE -from .maoritv import MaoriTVIE -from .markiza import ( - MarkizaIE, - MarkizaPageIE, -) -from .massengeschmacktv import MassengeschmackTVIE -from .masters import MastersIE -from .matchtv import MatchTVIE -from .mdr import MDRIE -from .medaltv import MedalTVIE -from .mediaite import MediaiteIE -from .mediaklikk import MediaKlikkIE -from .mediaset import ( - MediasetIE, - MediasetShowIE, -) -from .mediasite import ( - MediasiteIE, - MediasiteCatalogIE, - MediasiteNamedCatalogIE, -) -from .medici import MediciIE -from .megaphone import MegaphoneIE -from .meipai import MeipaiIE -from .melonvod import MelonVODIE -from .meta import METAIE -from .metacafe import MetacafeIE -from .metacritic import MetacriticIE -from .mgoon import MgoonIE -from .mgtv import MGTVIE -from .miaopai import MiaoPaiIE -from .microsoftstream import MicrosoftStreamIE -from .microsoftvirtualacademy import ( - MicrosoftVirtualAcademyIE, - MicrosoftVirtualAcademyCourseIE, -) -from .mildom import ( - MildomIE, - MildomVodIE, - MildomClipIE, - MildomUserVodIE, -) -from .minds import ( - MindsIE, - MindsChannelIE, - MindsGroupIE, -) -from .ministrygrid import MinistryGridIE -from .minoto import MinotoIE -from .miomio import MioMioIE -from .mirrativ import ( - MirrativIE, - MirrativUserIE, -) -from .mit import TechTVMITIE, OCWMITIE -from .mitele import MiTeleIE -from .mixch import ( - MixchIE, - MixchArchiveIE, -) -from .mixcloud import ( - MixcloudIE, - MixcloudUserIE, - MixcloudPlaylistIE, -) -from .mlb import ( - MLBIE, - MLBVideoIE, -) -from .mlssoccer import MLSSoccerIE -from .mnet import MnetIE -from .moevideo import MoeVideoIE -from .mofosex import ( - MofosexIE, - MofosexEmbedIE, -) -from .mojvideo import MojvideoIE -from .morningstar import MorningstarIE -from .motherless import ( - MotherlessIE, - MotherlessGroupIE -) -from .motorsport import MotorsportIE -from .movieclips import MovieClipsIE -from .moviepilot import MoviepilotIE -from .moviezine import MoviezineIE -from .movingimage import MovingImageIE -from .msn import MSNIE -from .mtv import ( - MTVIE, - MTVVideoIE, - MTVServicesEmbeddedIE, - MTVDEIE, - MTVJapanIE, - MTVItaliaIE, - MTVItaliaProgrammaIE, -) -from .muenchentv import MuenchenTVIE -from .murrtube import MurrtubeIE, MurrtubeUserIE -from .musescore import MuseScoreIE -from .musicdex import ( - MusicdexSongIE, - MusicdexAlbumIE, - MusicdexArtistIE, - MusicdexPlaylistIE, -) -from .mwave import MwaveIE, MwaveMeetGreetIE -from .mxplayer import ( - MxplayerIE, - MxplayerShowIE, -) -from .mychannels import MyChannelsIE -from .myspace import MySpaceIE, MySpaceAlbumIE -from .myspass import MySpassIE -from .myvi import ( - MyviIE, - MyviEmbedIE, -) -from .myvideoge import MyVideoGeIE -from .myvidster import MyVidsterIE -from .n1 import ( - N1InfoAssetIE, - N1InfoIIE, -) -from .nate import ( - NateIE, - NateProgramIE, -) -from .nationalgeographic import ( - NationalGeographicVideoIE, - NationalGeographicTVIE, -) -from .naver import ( - NaverIE, - NaverLiveIE, -) -from .nba import ( - NBAWatchEmbedIE, - NBAWatchIE, - NBAWatchCollectionIE, - NBAEmbedIE, - NBAIE, - NBAChannelIE, -) -from .nbc import ( - NBCIE, - NBCNewsIE, - NBCOlympicsIE, - NBCOlympicsStreamIE, - NBCSportsIE, - NBCSportsStreamIE, - NBCSportsVPlayerIE, -) -from .ndr import ( - NDRIE, - NJoyIE, - NDREmbedBaseIE, - NDREmbedIE, - NJoyEmbedIE, -) -from .ndtv import NDTVIE -from .nebula import ( - NebulaIE, - NebulaSubscriptionsIE, - NebulaChannelIE, -) -from .nerdcubed import NerdCubedFeedIE -from .netzkino import NetzkinoIE -from .neteasemusic import ( - NetEaseMusicIE, - NetEaseMusicAlbumIE, - NetEaseMusicSingerIE, - NetEaseMusicListIE, - NetEaseMusicMvIE, - NetEaseMusicProgramIE, - NetEaseMusicDjRadioIE, -) -from .newgrounds import ( - NewgroundsIE, - NewgroundsPlaylistIE, - NewgroundsUserIE, -) -from .newstube import NewstubeIE -from .newsy import NewsyIE -from .nextmedia import ( - NextMediaIE, - NextMediaActionNewsIE, - AppleDailyIE, - NextTVIE, -) -from .nexx import ( - NexxIE, - NexxEmbedIE, -) -from .nfb import NFBIE -from .nfhsnetwork import NFHSNetworkIE -from .nfl import ( - NFLIE, - NFLArticleIE, -) -from .nhk import ( - NhkVodIE, - NhkVodProgramIE, - NhkForSchoolBangumiIE, - NhkForSchoolSubjectIE, - NhkForSchoolProgramListIE, -) -from .nhl import NHLIE -from .nick import ( - NickIE, - NickBrIE, - NickDeIE, - NickNightIE, - NickRuIE, -) -from .niconico import ( - NiconicoIE, - NiconicoPlaylistIE, - NiconicoUserIE, - NiconicoSeriesIE, - NiconicoHistoryIE, - NicovideoSearchDateIE, - NicovideoSearchIE, - NicovideoSearchURLIE, - NicovideoTagURLIE, -) -from .ninecninemedia import ( - NineCNineMediaIE, - CPTwentyFourIE, -) -from .ninegag import NineGagIE -from .ninenow import NineNowIE -from .nintendo import NintendoIE -from .nitter import NitterIE -from .njpwworld import NJPWWorldIE -from .nobelprize import NobelPrizeIE -from .nonktube import NonkTubeIE -from .noodlemagazine import NoodleMagazineIE -from .noovo import NoovoIE -from .normalboots import NormalbootsIE -from .nosvideo import NosVideoIE -from .nova import ( - NovaEmbedIE, - NovaIE, -) -from .novaplay import NovaPlayIE -from .nowness import ( - NownessIE, - NownessPlaylistIE, - NownessSeriesIE, -) -from .noz import NozIE -from .npo import ( - AndereTijdenIE, - NPOIE, - NPOLiveIE, - NPORadioIE, - NPORadioFragmentIE, - SchoolTVIE, - HetKlokhuisIE, - VPROIE, - WNLIE, -) -from .npr import NprIE -from .nrk import ( - NRKIE, - NRKPlaylistIE, - NRKSkoleIE, - NRKTVIE, - NRKTVDirekteIE, - NRKRadioPodkastIE, - NRKTVEpisodeIE, - NRKTVEpisodesIE, - NRKTVSeasonIE, - NRKTVSeriesIE, -) -from .nrl import NRLTVIE -from .ntvcojp import NTVCoJpCUIE -from .ntvde import NTVDeIE -from .ntvru import NTVRuIE -from .nytimes import ( - NYTimesIE, - NYTimesArticleIE, - NYTimesCookingIE, -) -from .nuvid import NuvidIE -from .nzherald import NZHeraldIE -from .nzz import NZZIE -from .odatv import OdaTVIE -from .odnoklassniki import OdnoklassnikiIE -from .oktoberfesttv import OktoberfestTVIE -from .olympics import OlympicsReplayIE -from .on24 import On24IE -from .ondemandkorea import OnDemandKoreaIE -from .onefootball import OneFootballIE -from .onet import ( - OnetIE, - OnetChannelIE, - OnetMVPIE, - OnetPlIE, -) -from .onionstudios import OnionStudiosIE -from .ooyala import ( - OoyalaIE, - OoyalaExternalIE, -) -from .opencast import ( - OpencastIE, - OpencastPlaylistIE, -) -from .openrec import ( - OpenRecIE, - OpenRecCaptureIE, - OpenRecMovieIE, -) -from .ora import OraTVIE -from .orf import ( - ORFTVthekIE, - ORFFM4IE, - ORFFM4StoryIE, - ORFOE1IE, - ORFOE3IE, - ORFNOEIE, - ORFWIEIE, - ORFBGLIE, - ORFOOEIE, - ORFSTMIE, - ORFKTNIE, - ORFSBGIE, - ORFTIRIE, - ORFVBGIE, - ORFIPTVIE, -) -from .outsidetv import OutsideTVIE -from .packtpub import ( - PacktPubIE, - PacktPubCourseIE, -) -from .palcomp3 import ( - PalcoMP3IE, - PalcoMP3ArtistIE, - PalcoMP3VideoIE, -) -from .pandoratv import PandoraTVIE -from .panopto import ( - PanoptoIE, - PanoptoListIE, - PanoptoPlaylistIE -) -from .paramountplus import ( - ParamountPlusIE, - ParamountPlusSeriesIE, -) -from .parliamentliveuk import ParliamentLiveUKIE -from .parlview import ParlviewIE -from .patreon import ( - PatreonIE, - PatreonUserIE -) -from .pbs import PBSIE -from .pearvideo import PearVideoIE -from .peekvids import PeekVidsIE, PlayVidsIE -from .peertube import ( - PeerTubeIE, - PeerTubePlaylistIE, -) -from .peertv import PeerTVIE -from .peloton import ( - PelotonIE, - PelotonLiveIE -) -from .people import PeopleIE -from .performgroup import PerformGroupIE -from .periscope import ( - PeriscopeIE, - PeriscopeUserIE, -) -from .philharmoniedeparis import PhilharmonieDeParisIE -from .phoenix import PhoenixIE -from .photobucket import PhotobucketIE -from .piapro import PiaproIE -from .picarto import ( - PicartoIE, - PicartoVodIE, -) -from .piksel import PikselIE -from .pinkbike import PinkbikeIE -from .pinterest import ( - PinterestIE, - PinterestCollectionIE, -) -from .pixivsketch import ( - PixivSketchIE, - PixivSketchUserIE, -) -from .pladform import PladformIE -from .planetmarathi import PlanetMarathiIE -from .platzi import ( - PlatziIE, - PlatziCourseIE, -) -from .playfm import PlayFMIE -from .playplustv import PlayPlusTVIE -from .plays import PlaysTVIE -from .playstuff import PlayStuffIE -from .playtvak import PlaytvakIE -from .playvid import PlayvidIE -from .playwire import PlaywireIE -from .plutotv import PlutoTVIE -from .pluralsight import ( - PluralsightIE, - PluralsightCourseIE, -) -from .podchaser import PodchaserIE -from .podomatic import PodomaticIE -from .pokemon import ( - PokemonIE, - PokemonWatchIE, - PokemonSoundLibraryIE, -) -from .pokergo import ( - PokerGoIE, - PokerGoCollectionIE, -) -from .polsatgo import PolsatGoIE -from .polskieradio import ( - PolskieRadioIE, - PolskieRadioCategoryIE, - PolskieRadioPlayerIE, - PolskieRadioPodcastIE, - PolskieRadioPodcastListIE, - PolskieRadioRadioKierowcowIE, -) -from .popcorntimes import PopcorntimesIE -from .popcorntv import PopcornTVIE -from .porn91 import Porn91IE -from .porncom import PornComIE -from .pornflip import PornFlipIE -from .pornhd import PornHdIE -from .pornhub import ( - PornHubIE, - PornHubUserIE, - PornHubPlaylistIE, - PornHubPagedVideoListIE, - PornHubUserVideosUploadIE, -) -from .pornotube import PornotubeIE -from .pornovoisines import PornoVoisinesIE -from .pornoxo import PornoXOIE -from .pornez import PornezIE -from .puhutv import ( - PuhuTVIE, - PuhuTVSerieIE, -) -from .presstv import PressTVIE -from .projectveritas import ProjectVeritasIE -from .prosiebensat1 import ProSiebenSat1IE -from .prx import ( - PRXStoryIE, - PRXSeriesIE, - PRXAccountIE, - PRXStoriesSearchIE, - PRXSeriesSearchIE -) -from .puls4 import Puls4IE -from .pyvideo import PyvideoIE -from .qqmusic import ( - QQMusicIE, - QQMusicSingerIE, - QQMusicAlbumIE, - QQMusicToplistIE, - QQMusicPlaylistIE, -) -from .r7 import ( - R7IE, - R7ArticleIE, -) -from .radiko import RadikoIE, RadikoRadioIE -from .radiocanada import ( - RadioCanadaIE, - RadioCanadaAudioVideoIE, -) -from .radiode import RadioDeIE -from .radiojavan import RadioJavanIE -from .radiobremen import RadioBremenIE -from .radiofrance import RadioFranceIE -from .radiozet import RadioZetPodcastIE -from .radiokapital import ( - RadioKapitalIE, - RadioKapitalShowIE, -) -from .radlive import ( - RadLiveIE, - RadLiveChannelIE, - RadLiveSeasonIE, -) -from .rai import ( - RaiPlayIE, - RaiPlayLiveIE, - RaiPlayPlaylistIE, - RaiPlaySoundIE, - RaiPlaySoundLiveIE, - RaiPlaySoundPlaylistIE, - RaiIE, -) -from .raywenderlich import ( - RayWenderlichIE, - RayWenderlichCourseIE, -) -from .rbmaradio import RBMARadioIE -from .rcs import ( - RCSIE, - RCSEmbedsIE, - RCSVariousIE, -) -from .rcti import ( - RCTIPlusIE, - RCTIPlusSeriesIE, - RCTIPlusTVIE, -) -from .rds import RDSIE -from .redbulltv import ( - RedBullTVIE, - RedBullEmbedIE, - RedBullTVRrnContentIE, - RedBullIE, -) -from .reddit import RedditIE -from .redgifs import ( - RedGifsIE, - RedGifsSearchIE, - RedGifsUserIE, -) -from .redtube import RedTubeIE -from .regiotv import RegioTVIE -from .rentv import ( - RENTVIE, - RENTVArticleIE, -) -from .restudy import RestudyIE -from .reuters import ReutersIE -from .reverbnation import ReverbNationIE -from .rice import RICEIE -from .rmcdecouverte import RMCDecouverteIE -from .rockstargames import RockstarGamesIE -from .rokfin import ( - RokfinIE, - RokfinStackIE, - RokfinChannelIE, - RokfinSearchIE, -) -from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE -from .rottentomatoes import RottenTomatoesIE -from .rozhlas import RozhlasIE -from .rtbf import RTBFIE -from .rte import RteIE, RteRadioIE -from .rtlnl import RtlNlIE -from .rtl2 import ( - RTL2IE, - RTL2YouIE, - RTL2YouSeriesIE, -) -from .rtnews import ( - RTNewsIE, - RTDocumentryIE, - RTDocumentryPlaylistIE, - RuptlyIE, -) -from .rtp import RTPIE -from .rtrfm import RTRFMIE -from .rts import RTSIE -from .rtve import ( - RTVEALaCartaIE, - RTVEAudioIE, - RTVELiveIE, - RTVEInfantilIE, - RTVETelevisionIE, -) -from .rtvnh import RTVNHIE -from .rtvs import RTVSIE -from .ruhd import RUHDIE -from .rule34video import Rule34VideoIE -from .rumble import ( - RumbleEmbedIE, - RumbleChannelIE, -) -from .rutube import ( - RutubeIE, - RutubeChannelIE, - RutubeEmbedIE, - RutubeMovieIE, - RutubePersonIE, - RutubePlaylistIE, - RutubeTagsIE, -) -from .glomex import ( - GlomexIE, - GlomexEmbedIE, -) -from .megatvcom import ( - MegaTVComIE, - MegaTVComEmbedIE, -) -from .ant1newsgr import ( - Ant1NewsGrWatchIE, - Ant1NewsGrArticleIE, - Ant1NewsGrEmbedIE, -) -from .rutv import RUTVIE -from .ruutu import RuutuIE -from .ruv import ( - RuvIE, - RuvSpilaIE -) -from .safari import ( - SafariIE, - SafariApiIE, - SafariCourseIE, -) -from .saitosan import SaitosanIE -from .samplefocus import SampleFocusIE -from .sapo import SapoIE -from .savefrom import SaveFromIE -from .sbs import SBSIE -from .screencast import ScreencastIE -from .screencastomatic import ScreencastOMaticIE -from .scrippsnetworks import ( - ScrippsNetworksWatchIE, - ScrippsNetworksIE, -) -from .scte import ( - SCTEIE, - SCTECourseIE, -) -from .seeker import SeekerIE -from .senategov import SenateISVPIE, SenateGovIE -from .sendtonews import SendtoNewsIE -from .servus import ServusIE -from .sevenplus import SevenPlusIE -from .sexu import SexuIE -from .seznamzpravy import ( - SeznamZpravyIE, - SeznamZpravyArticleIE, -) -from .shahid import ( - ShahidIE, - ShahidShowIE, -) -from .shared import ( - SharedIE, - VivoIE, -) -from .shemaroome import ShemarooMeIE -from .showroomlive import ShowRoomLiveIE -from .simplecast import ( - SimplecastIE, - SimplecastEpisodeIE, - SimplecastPodcastIE, -) -from .sina import SinaIE -from .sixplay import SixPlayIE -from .skeb import SkebIE -from .skyit import ( - SkyItPlayerIE, - SkyItVideoIE, - SkyItVideoLiveIE, - SkyItIE, - SkyItAcademyIE, - SkyItArteIE, - CieloTVItIE, - TV8ItIE, -) -from .skylinewebcams import SkylineWebcamsIE -from .skynewsarabia import ( - SkyNewsArabiaIE, - SkyNewsArabiaArticleIE, -) -from .skynewsau import SkyNewsAUIE -from .sky import ( - SkyNewsIE, - SkyNewsStoryIE, - SkySportsIE, - SkySportsNewsIE, -) -from .slideshare import SlideshareIE -from .slideslive import SlidesLiveIE -from .slutload import SlutloadIE -from .snotr import SnotrIE -from .sohu import SohuIE -from .sonyliv import ( - SonyLIVIE, - SonyLIVSeriesIE, -) -from .soundcloud import ( - SoundcloudEmbedIE, - SoundcloudIE, - SoundcloudSetIE, - SoundcloudRelatedIE, - SoundcloudUserIE, - SoundcloudTrackStationIE, - SoundcloudPlaylistIE, - SoundcloudSearchIE, -) -from .soundgasm import ( - SoundgasmIE, - SoundgasmProfileIE -) -from .southpark import ( - SouthParkIE, - SouthParkDeIE, - SouthParkDkIE, - SouthParkEsIE, - SouthParkNlIE -) -from .sovietscloset import ( - SovietsClosetIE, - SovietsClosetPlaylistIE -) -from .spankbang import ( - SpankBangIE, - SpankBangPlaylistIE, -) -from .spankwire import SpankwireIE -from .spiegel import SpiegelIE -from .spike import ( - BellatorIE, - ParamountNetworkIE, -) -from .stitcher import ( - StitcherIE, - StitcherShowIE, -) -from .sport5 import Sport5IE -from .sportbox import SportBoxIE -from .sportdeutschland import SportDeutschlandIE -from .spotify import ( - SpotifyIE, - SpotifyShowIE, -) -from .spreaker import ( - SpreakerIE, - SpreakerPageIE, - SpreakerShowIE, - SpreakerShowPageIE, -) -from .springboardplatform import SpringboardPlatformIE -from .sprout import SproutIE -from .srgssr import ( - SRGSSRIE, - SRGSSRPlayIE, -) -from .srmediathek import SRMediathekIE -from .stanfordoc import StanfordOpenClassroomIE -from .startv import StarTVIE -from .steam import SteamIE -from .storyfire import ( - StoryFireIE, - StoryFireUserIE, - StoryFireSeriesIE, -) -from .streamable import StreamableIE -from .streamanity import StreamanityIE -from .streamcloud import StreamcloudIE -from .streamcz import StreamCZIE -from .streamff import StreamFFIE -from .streetvoice import StreetVoiceIE -from .stretchinternet import StretchInternetIE -from .stripchat import StripchatIE -from .stv import STVPlayerIE -from .sunporno import SunPornoIE -from .sverigesradio import ( - SverigesRadioEpisodeIE, - SverigesRadioPublicationIE, -) -from .svt import ( - SVTIE, - SVTPageIE, - SVTPlayIE, - SVTSeriesIE, -) -from .swrmediathek import SWRMediathekIE -from .syfy import SyfyIE -from .sztvhu import SztvHuIE -from .tagesschau import TagesschauIE -from .tass import TassIE -from .tbs import TBSIE -from .tdslifeway import TDSLifewayIE -from .teachable import ( - TeachableIE, - TeachableCourseIE, -) -from .teachertube import ( - TeacherTubeIE, - TeacherTubeUserIE, -) -from .teachingchannel import TeachingChannelIE -from .teamcoco import TeamcocoIE -from .teamtreehouse import TeamTreeHouseIE -from .techtalks import TechTalksIE -from .ted import ( - TedEmbedIE, - TedPlaylistIE, - TedSeriesIE, - TedTalkIE, -) -from .tele5 import Tele5IE -from .tele13 import Tele13IE -from .telebruxelles import TeleBruxellesIE -from .telecinco import TelecincoIE -from .telegraaf import TelegraafIE -from .telegram import TelegramEmbedIE -from .telemb import TeleMBIE -from .telemundo import TelemundoIE -from .telequebec import ( - TeleQuebecIE, - TeleQuebecSquatIE, - TeleQuebecEmissionIE, - TeleQuebecLiveIE, - TeleQuebecVideoIE, -) -from .teletask import TeleTaskIE -from .telewebion import TelewebionIE -from .tennistv import TennisTVIE -from .tenplay import TenPlayIE -from .testurl import TestURLIE -from .tf1 import TF1IE -from .tfo import TFOIE -from .theintercept import TheInterceptIE -from .theplatform import ( - ThePlatformIE, - ThePlatformFeedIE, -) -from .thestar import TheStarIE -from .thesun import TheSunIE -from .theta import ( - ThetaVideoIE, - ThetaStreamIE, -) -from .theweatherchannel import TheWeatherChannelIE -from .thisamericanlife import ThisAmericanLifeIE -from .thisav import ThisAVIE -from .thisoldhouse import ThisOldHouseIE -from .threespeak import ( - ThreeSpeakIE, - ThreeSpeakUserIE, -) -from .threeqsdn import ThreeQSDNIE -from .tiktok import ( - TikTokIE, - TikTokUserIE, - TikTokSoundIE, - TikTokEffectIE, - TikTokTagIE, - TikTokVMIE, - DouyinIE, -) -from .tinypic import TinyPicIE -from .tmz import TMZIE -from .tnaflix import ( - TNAFlixNetworkEmbedIE, - TNAFlixIE, - EMPFlixIE, - MovieFapIE, -) -from .toggle import ( - ToggleIE, - MeWatchIE, -) -from .toggo import ( - ToggoIE, -) -from .tokentube import ( - TokentubeIE, - TokentubeChannelIE -) -from .tonline import TOnlineIE -from .toongoggles import ToonGogglesIE -from .toutv import TouTvIE -from .toypics import ToypicsUserIE, ToypicsIE -from .traileraddict import TrailerAddictIE -from .trilulilu import TriluliluIE -from .trovo import ( - TrovoIE, - TrovoVodIE, - TrovoChannelVodIE, - TrovoChannelClipIE, -) -from .trueid import TrueIDIE -from .trunews import TruNewsIE -from .trutv import TruTVIE -from .tube8 import Tube8IE -from .tubitv import ( - TubiTvIE, - TubiTvShowIE, -) -from .tumblr import TumblrIE -from .tunein import ( - TuneInClipIE, - TuneInStationIE, - TuneInProgramIE, - TuneInTopicIE, - TuneInShortenerIE, -) -from .tunepk import TunePkIE -from .turbo import TurboIE -from .tv2 import ( - TV2IE, - TV2ArticleIE, - KatsomoIE, - MTVUutisetArticleIE, -) -from .tv2dk import ( - TV2DKIE, - TV2DKBornholmPlayIE, -) -from .tv2hu import ( - TV2HuIE, - TV2HuSeriesIE, -) -from .tv4 import TV4IE -from .tv5mondeplus import TV5MondePlusIE -from .tv5unis import ( - TV5UnisVideoIE, - TV5UnisIE, -) -from .tva import ( - TVAIE, - QubIE, -) -from .tvanouvelles import ( - TVANouvellesIE, - TVANouvellesArticleIE, -) -from .tvc import ( - TVCIE, - TVCArticleIE, -) -from .tver import TVerIE -from .tvigle import TvigleIE -from .tvland import TVLandIE -from .tvn24 import TVN24IE -from .tvnet import TVNetIE -from .tvnoe import TVNoeIE -from .tvnow import ( - TVNowIE, - TVNowFilmIE, - TVNowNewIE, - TVNowSeasonIE, - TVNowAnnualIE, - TVNowShowIE, -) -from .tvopengr import ( - TVOpenGrWatchIE, - TVOpenGrEmbedIE, -) -from .tvp import ( - TVPEmbedIE, - TVPIE, - TVPStreamIE, - TVPWebsiteIE, -) -from .tvplay import ( - TVPlayIE, - ViafreeIE, - TVPlayHomeIE, -) -from .tvplayer import TVPlayerIE -from .tweakers import TweakersIE -from .twentyfourvideo import TwentyFourVideoIE -from .twentymin import TwentyMinutenIE -from .twentythreevideo import TwentyThreeVideoIE -from .twitcasting import ( - TwitCastingIE, - TwitCastingLiveIE, - TwitCastingUserIE, -) -from .twitch import ( - TwitchVodIE, - TwitchCollectionIE, - TwitchVideosIE, - TwitchVideosClipsIE, - TwitchVideosCollectionsIE, - TwitchStreamIE, - TwitchClipsIE, -) -from .twitter import ( - TwitterCardIE, - TwitterIE, - TwitterAmplifyIE, - TwitterBroadcastIE, - TwitterShortenerIE, -) -from .udemy import ( - UdemyIE, - UdemyCourseIE -) -from .udn import UDNEmbedIE -from .ufctv import ( - UFCTVIE, - UFCArabiaIE, -) -from .ukcolumn import UkColumnIE -from .uktvplay import UKTVPlayIE -from .digiteka import DigitekaIE -from .dlive import ( - DLiveVODIE, - DLiveStreamIE, -) -from .drooble import DroobleIE -from .umg import UMGDeIE -from .unistra import UnistraIE -from .unity import UnityIE -from .uol import UOLIE -from .uplynk import ( - UplynkIE, - UplynkPreplayIE, -) -from .urort import UrortIE -from .urplay import URPlayIE -from .usanetwork import USANetworkIE -from .usatoday import USATodayIE -from .ustream import UstreamIE, UstreamChannelIE -from .ustudio import ( - UstudioIE, - UstudioEmbedIE, -) -from .utreon import UtreonIE -from .varzesh3 import Varzesh3IE -from .vbox7 import Vbox7IE -from .veehd import VeeHDIE -from .veo import VeoIE -from .veoh import VeohIE -from .vesti import VestiIE -from .vevo import ( - VevoIE, - VevoPlaylistIE, -) -from .vgtv import ( - BTArticleIE, - BTVestlendingenIE, - VGTVIE, -) -from .vh1 import VH1IE -from .vice import ( - ViceIE, - ViceArticleIE, - ViceShowIE, -) -from .vidbit import VidbitIE -from .viddler import ViddlerIE -from .videa import VideaIE -from .videocampus_sachsen import VideocampusSachsenIE -from .videodetective import VideoDetectiveIE -from .videofyme import VideofyMeIE -from .videomore import ( - VideomoreIE, - VideomoreVideoIE, - VideomoreSeasonIE, -) -from .videopress import VideoPressIE -from .vidio import ( - VidioIE, - VidioPremierIE, - VidioLiveIE -) -from .vidlii import VidLiiIE -from .vier import VierIE, VierVideosIE -from .viewlift import ( - ViewLiftIE, - ViewLiftEmbedIE, -) -from .viidea import ViideaIE -from .vimeo import ( - VimeoIE, - VimeoAlbumIE, - VimeoChannelIE, - VimeoGroupsIE, - VimeoLikesIE, - VimeoOndemandIE, - VimeoReviewIE, - VimeoUserIE, - VimeoWatchLaterIE, - VHXEmbedIE, -) -from .vimm import ( - VimmIE, - VimmRecordingIE, -) -from .vimple import VimpleIE -from .vine import ( - VineIE, - VineUserIE, -) -from .viki import ( - VikiIE, - VikiChannelIE, -) -from .viqeo import ViqeoIE -from .viu import ( - ViuIE, - ViuPlaylistIE, - ViuOTTIE, -) -from .vk import ( - VKIE, - VKUserVideosIE, - VKWallPostIE, -) -from .vlive import ( - VLiveIE, - VLivePostIE, - VLiveChannelIE, -) -from .vodlocker import VodlockerIE -from .vodpl import VODPlIE -from .vodplatform import VODPlatformIE -from .voicerepublic import VoiceRepublicIE -from .voicy import ( - VoicyIE, - VoicyChannelIE, -) -from .voot import ( - VootIE, - VootSeriesIE, -) -from .voxmedia import ( - VoxMediaVolumeIE, - VoxMediaIE, -) -from .vrt import VRTIE -from .vrak import VrakIE -from .vrv import ( - VRVIE, - VRVSeriesIE, -) -from .vshare import VShareIE -from .vtm import VTMIE -from .medialaan import MedialaanIE -from .vuclip import VuClipIE -from .vupload import VuploadIE -from .vvvvid import ( - VVVVIDIE, - VVVVIDShowIE, -) -from .vyborymos import VyboryMosIE -from .vzaar import VzaarIE -from .wakanim import WakanimIE -from .walla import WallaIE -from .washingtonpost import ( - WashingtonPostIE, - WashingtonPostArticleIE, -) -from .wasdtv import ( - WASDTVStreamIE, - WASDTVRecordIE, - WASDTVClipIE, -) -from .wat import WatIE -from .watchbox import WatchBoxIE -from .watchindianporn import WatchIndianPornIE -from .wdr import ( - WDRIE, - WDRPageIE, - WDRElefantIE, - WDRMobileIE, -) -from .webcaster import ( - WebcasterIE, - WebcasterFeedIE, -) -from .webofstories import ( - WebOfStoriesIE, - WebOfStoriesPlaylistIE, -) -from .weibo import ( - WeiboIE, - WeiboMobileIE -) -from .weiqitv import WeiqiTVIE -from .willow import WillowIE -from .wimtv import WimTVIE -from .whowatch import WhoWatchIE -from .wistia import ( - WistiaIE, - WistiaPlaylistIE, -) -from .worldstarhiphop import WorldStarHipHopIE -from .wppilot import ( - WPPilotIE, - WPPilotChannelsIE, -) -from .wsj import ( - WSJIE, - WSJArticleIE, -) -from .wwe import WWEIE -from .xbef import XBefIE -from .xboxclips import XboxClipsIE -from .xfileshare import XFileShareIE -from .xhamster import ( - XHamsterIE, - XHamsterEmbedIE, - XHamsterUserIE, -) -from .xiami import ( - XiamiSongIE, - XiamiAlbumIE, - XiamiArtistIE, - XiamiCollectionIE -) -from .ximalaya import ( - XimalayaIE, - XimalayaAlbumIE -) -from .xinpianchang import XinpianchangIE -from .xminus import XMinusIE -from .xnxx import XNXXIE -from .xstream import XstreamIE -from .xtube import XTubeUserIE, XTubeIE -from .xuite import XuiteIE -from .xvideos import XVideosIE -from .xxxymovies import XXXYMoviesIE -from .yahoo import ( - YahooIE, - YahooSearchIE, - YahooGyaOPlayerIE, - YahooGyaOIE, - YahooJapanNewsIE, -) -from .yandexdisk import YandexDiskIE -from .yandexmusic import ( - YandexMusicTrackIE, - YandexMusicAlbumIE, - YandexMusicPlaylistIE, - YandexMusicArtistTracksIE, - YandexMusicArtistAlbumsIE, -) -from .yandexvideo import ( - YandexVideoIE, - YandexVideoPreviewIE, - ZenYandexIE, - ZenYandexChannelIE, -) -from .yapfiles import YapFilesIE -from .yesjapan import YesJapanIE -from .yinyuetai import YinYueTaiIE -from .ynet import YnetIE -from .youjizz import YouJizzIE -from .youku import ( - YoukuIE, - YoukuShowIE, -) -from .younow import ( - YouNowLiveIE, - YouNowChannelIE, - YouNowMomentIE, -) -from .youporn import YouPornIE -from .yourporn import YourPornIE -from .yourupload import YourUploadIE -from .youtube import ( - YoutubeIE, - YoutubeClipIE, - YoutubeFavouritesIE, - YoutubeNotificationsIE, - YoutubeHistoryIE, - YoutubeTabIE, - YoutubeLivestreamEmbedIE, - YoutubePlaylistIE, - YoutubeRecommendedIE, - YoutubeSearchDateIE, - YoutubeSearchIE, - YoutubeSearchURLIE, - YoutubeMusicSearchURLIE, - YoutubeSubscriptionsIE, - YoutubeStoriesIE, - YoutubeTruncatedIDIE, - YoutubeTruncatedURLIE, - YoutubeYtBeIE, - YoutubeYtUserIE, - YoutubeWatchLaterIE, -) -from .zapiks import ZapiksIE -from .zattoo import ( - BBVTVIE, - EinsUndEinsTVIE, - EWETVIE, - GlattvisionTVIE, - MNetTVIE, - NetPlusIE, - OsnatelTVIE, - QuantumTVIE, - SaltTVIE, - SAKTVIE, - VTXTVIE, - WalyTVIE, - ZattooIE, - ZattooLiveIE, - ZattooMoviesIE, - ZattooRecordingsIE, -) -from .zdf import ZDFIE, ZDFChannelIE -from .zee5 import ( - Zee5IE, - Zee5SeriesIE, -) -from .zhihu import ZhihuIE -from .zingmp3 import ( - ZingMp3IE, - ZingMp3AlbumIE, - ZingMp3ChartHomeIE, - ZingMp3WeekChartIE, - ZingMp3ChartMusicVideoIE, - ZingMp3UserIE, -) -from .zoom import ZoomIE -from .zype import ZypeIE +_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) +_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index de45f9298..5b34f3bff 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -1,18 +1,18 @@ import json import re +import urllib.parse from .common import InfoExtractor from ..compat import ( compat_etree_fromstring, compat_str, compat_urllib_parse_unquote, - compat_urllib_parse_unquote_plus, ) from ..utils import ( + ExtractorError, clean_html, determine_ext, error_to_compat_str, - ExtractorError, float_or_none, get_element_by_id, get_first, @@ -467,7 +467,7 @@ class FacebookIE(InfoExtractor): dash_manifest = video.get('dash_manifest') if dash_manifest: formats.extend(self._parse_mpd_formats( - compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)))) + compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)))) def process_formats(formats): # Downloads with browser's User-Agent are rate limited. Working around diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py index 225677b00..3501c4cf6 100644 --- a/yt_dlp/extractor/fc2.py +++ b/yt_dlp/extractor/fc2.py @@ -1,16 +1,13 @@ import re from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, -) +from ..compat import compat_parse_qs from ..dependencies import websockets from ..utils import ( ExtractorError, WebSocketsWrapper, js_to_json, sanitized_Request, - std_headers, traverse_obj, update_url_query, urlencode_postdata, @@ -81,7 +78,7 @@ class FC2IE(InfoExtractor): webpage = None if not url.startswith('fc2:'): webpage = self._download_webpage(url, video_id) - self._downloader.cookiejar.clear_session_cookies() # must clear + self.cookiejar.clear_session_cookies() # must clear self._login() title, thumbnail, description = None, None, None @@ -207,10 +204,10 @@ class FC2LiveIE(InfoExtractor): 'Cookie': str(self._get_cookies('https://live.fc2.com/'))[12:], 'Origin': 'https://live.fc2.com', 'Accept': '*/*', - 'User-Agent': std_headers['User-Agent'], + 'User-Agent': self.get_param('http_headers')['User-Agent'], }) - self.write_debug('[debug] Sending HLS server request') + self.write_debug('Sending HLS server request') while True: recv = ws.recv() @@ -232,13 +229,10 @@ class FC2LiveIE(InfoExtractor): if not data or not isinstance(data, dict): continue if data.get('name') == '_response_' and data.get('id') == 1: - self.write_debug('[debug] Goodbye.') + self.write_debug('Goodbye') playlist_data = data break - elif self._downloader.params.get('verbose', False): - if len(recv) > 100: - recv = recv[:100] + '...' - self.to_screen('[debug] Server said: %s' % recv) + self.write_debug('Server said: %s%s' % (recv[:100], '...' if len(recv) > 100 else '')) if not playlist_data: raise ExtractorError('Unable to fetch HLS playlist info via WebSocket') diff --git a/yt_dlp/extractor/flickr.py b/yt_dlp/extractor/flickr.py index 552ecd43a..9f60a6b1f 100644 --- a/yt_dlp/extractor/flickr.py +++ b/yt_dlp/extractor/flickr.py @@ -94,7 +94,7 @@ class FlickrIE(InfoExtractor): owner = video_info.get('owner', {}) uploader_id = owner.get('nsid') uploader_path = owner.get('path_alias') or uploader_id - uploader_url = format_field(uploader_path, template='https://www.flickr.com/photos/%s/') + uploader_url = format_field(uploader_path, None, 'https://www.flickr.com/photos/%s/') return { 'id': video_id, diff --git a/yt_dlp/extractor/fourzerostudio.py b/yt_dlp/extractor/fourzerostudio.py new file mode 100644 index 000000000..e1804e39e --- /dev/null +++ b/yt_dlp/extractor/fourzerostudio.py @@ -0,0 +1,107 @@ +from .common import InfoExtractor +from ..utils import traverse_obj, unified_timestamp + + +class FourZeroStudioArchiveIE(InfoExtractor): + _VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/broadcasts/(?P<id>[^/]+)/archive' + IE_NAME = '0000studio:archive' + _TESTS = [{ + 'url': 'https://0000.studio/mumeijiten/broadcasts/1290f433-fce0-4909-a24a-5f7df09665dc/archive', + 'info_dict': { + 'id': '1290f433-fce0-4909-a24a-5f7df09665dc', + 'title': 'noteで『canape』様へのファンレターを執筆します。(数秘術その2)', + 'timestamp': 1653802534, + 'release_timestamp': 1653796604, + 'thumbnails': 'count:1', + 'comments': 'count:7', + 'uploader': '『中崎雄心』の執務室。', + 'uploader_id': 'mumeijiten', + } + }] + + def _real_extract(self, url): + video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') + webpage = self._download_webpage(url, video_id) + nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None) + + pcb = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorBroadcast'), get_all=False) + uploader_internal_id = traverse_obj(nuxt_data, ( + 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'id'), get_all=False) + + formats, subs = self._extract_m3u8_formats_and_subtitles(pcb['archiveUrl'], video_id, ext='mp4') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': pcb.get('title'), + 'age_limit': 18 if pcb.get('isAdult') else None, + 'timestamp': unified_timestamp(pcb.get('finishTime')), + 'release_timestamp': unified_timestamp(pcb.get('createdAt')), + 'thumbnails': [{ + 'url': pcb['thumbnailUrl'], + 'ext': 'png', + }] if pcb.get('thumbnailUrl') else None, + 'formats': formats, + 'subtitles': subs, + 'comments': [{ + 'author': c.get('username'), + 'author_id': c.get('postedUserId'), + 'author_thumbnail': c.get('userThumbnailUrl'), + 'id': c.get('id'), + 'text': c.get('body'), + 'timestamp': unified_timestamp(c.get('createdAt')), + 'like_count': c.get('likeCount'), + 'is_favorited': c.get('isLikedByOwner'), + 'author_is_uploader': c.get('postedUserId') == uploader_internal_id, + } for c in traverse_obj(nuxt_data, ( + 'ssrRefs', ..., lambda _, v: v['__typename'] == 'PublicCreatorBroadcastComment')) or []], + 'uploader_id': uploader_id, + 'uploader': traverse_obj(nuxt_data, ( + 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False), + } + + +class FourZeroStudioClipIE(InfoExtractor): + _VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/archive-clip/(?P<id>[^/]+)' + IE_NAME = '0000studio:clip' + _TESTS = [{ + 'url': 'https://0000.studio/soeji/archive-clip/e46b0278-24cd-40a8-92e1-b8fc2b21f34f', + 'info_dict': { + 'id': 'e46b0278-24cd-40a8-92e1-b8fc2b21f34f', + 'title': 'わたベーさんからイラスト差し入れいただきました。ありがとうございました!', + 'timestamp': 1652109105, + 'like_count': 1, + 'uploader': 'ソエジマケイタ', + 'uploader_id': 'soeji', + } + }] + + def _real_extract(self, url): + video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') + webpage = self._download_webpage(url, video_id) + nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None) + + clip_info = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorArchivedClip'), get_all=False) + + info = next(( + m for m in self._parse_html5_media_entries(url, webpage, video_id) + if 'mp4' in traverse_obj(m, ('formats', ..., 'ext')) + ), None) + if not info: + self.report_warning('Failed to find a desired media element. Falling back to using NUXT data.') + info = { + 'formats': [{ + 'ext': 'mp4', + 'url': url, + } for url in clip_info.get('mediaFiles') or [] if url], + } + return { + **info, + 'id': video_id, + 'title': clip_info.get('clipComment'), + 'timestamp': unified_timestamp(clip_info.get('createdAt')), + 'like_count': clip_info.get('likeCount'), + 'uploader_id': uploader_id, + 'uploader': traverse_obj(nuxt_data, ( + 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False), + } diff --git a/yt_dlp/extractor/foxgay.py b/yt_dlp/extractor/foxgay.py index 4abc2cfd0..b285464ec 100644 --- a/yt_dlp/extractor/foxgay.py +++ b/yt_dlp/extractor/foxgay.py @@ -31,7 +31,7 @@ class FoxgayIE(InfoExtractor): description = get_element_by_id('inf_tit', webpage) # The default user-agent with foxgay cookies leads to pages without videos - self._downloader.cookiejar.clear('.foxgay.com') + self.cookiejar.clear('.foxgay.com') # Find the URL for the iFrame which contains the actual video. iframe_url = self._html_search_regex( r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1', webpage, diff --git a/yt_dlp/extractor/foxnews.py b/yt_dlp/extractor/foxnews.py index cee4d6b49..e8513f2c2 100644 --- a/yt_dlp/extractor/foxnews.py +++ b/yt_dlp/extractor/foxnews.py @@ -59,10 +59,13 @@ class FoxNewsIE(AMPIE): @staticmethod def _extract_urls(webpage): return [ - mobj.group('url') + f'https://video.foxnews.com/v/video-embed.html?video_id={mobj.group("video_id")}' for mobj in re.finditer( - r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1', - webpage)] + r'''(?x) + <(?:script|(?:amp-)?iframe)[^>]+\bsrc=["\'] + (?:https?:)?//video\.foxnews\.com/v/(?:video-embed\.html|embed\.js)\? + (?:[^>"\']+&)?(?:video_)?id=(?P<video_id>\d+) + ''', webpage)] def _real_extract(self, url): host, video_id = self._match_valid_url(url).groups() diff --git a/yt_dlp/extractor/franceculture.py b/yt_dlp/extractor/franceculture.py deleted file mode 100644 index 6bd9912f3..000000000 --- a/yt_dlp/extractor/franceculture.py +++ /dev/null @@ -1,125 +0,0 @@ -import re -from .common import InfoExtractor -from ..utils import ( - determine_ext, - extract_attributes, - int_or_none, - traverse_obj, - unified_strdate, -) - - -class FranceCultureIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)' - _TESTS = [{ - # playlist - 'url': 'https://www.franceculture.fr/emissions/serie/hasta-dente', - 'playlist_count': 12, - 'info_dict': { - 'id': 'hasta-dente', - 'title': 'Hasta Dente', - 'description': 'md5:57479af50648d14e9bb649e6b1f8f911', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20201024', - }, - 'playlist': [{ - 'info_dict': { - 'id': '3c1c2e55-41a0-11e5-9fe0-005056a87c89', - 'ext': 'mp3', - 'title': 'Jeudi, vous avez dit bizarre ?', - 'description': 'md5:47cf1e00cc21c86b0210279996a812c6', - 'duration': 604, - 'upload_date': '20201024', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1603576680 - }, - }, - ], - }, { - 'url': 'https://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks', - 'info_dict': { - 'id': 'rendez-vous-au-pays-des-geeks', - 'display_id': 'rendez-vous-au-pays-des-geeks', - 'ext': 'mp3', - 'title': 'Rendez-vous au pays des geeks', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20140301', - 'vcodec': 'none', - 'duration': 3569, - }, - }, { - # no thumbnail - 'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - info = { - 'id': display_id, - 'title': self._html_search_regex( - r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>', - webpage, 'title', default=self._og_search_title(webpage)), - 'description': self._html_search_regex( - r'(?s)<div[^>]+class="excerpt"[^>]*>(.*?)</div>', webpage, 'description', default=None), - 'thumbnail': self._og_search_thumbnail(webpage), - 'uploader': self._html_search_regex( - r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None), - 'upload_date': unified_strdate(self._html_search_regex( - r'(?s)class="teaser-text-date".*?(\d{2}/\d{2}/\d{4})', webpage, 'date', default=None)), - } - - playlist_data = self._search_regex( - r'''(?sx) - <section[^>]+data-xiti-place="[^"]*?liste_episodes[^"?]*?"[^>]*> - (.*?) - </section> - ''', - webpage, 'playlist data', fatal=False, default=None) - - if playlist_data: - entries = [] - for item, item_description in re.findall( - r'(?s)(<button[^<]*class="[^"]*replay-button[^>]*>).*?<p[^>]*class="[^"]*teaser-text-chapo[^>]*>(.*?)</p>', - playlist_data): - - item_attributes = extract_attributes(item) - entries.append({ - 'id': item_attributes.get('data-emission-uuid'), - 'url': item_attributes.get('data-url'), - 'title': item_attributes.get('data-diffusion-title'), - 'duration': int_or_none(traverse_obj(item_attributes, 'data-duration-seconds', 'data-duration-seconds')), - 'description': item_description, - 'timestamp': int_or_none(item_attributes.get('data-start-time')), - 'thumbnail': info['thumbnail'], - 'uploader': info['uploader'], - }) - - return { - '_type': 'playlist', - 'entries': entries, - **info - } - - video_data = extract_attributes(self._search_regex( - r'''(?sx) - (?: - </h1>| - <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*> - ).*? - (<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>) - ''', - webpage, 'video data')) - video_url = traverse_obj(video_data, 'data-url', 'data-asset-source') - ext = determine_ext(video_url.lower()) - - return { - 'display_id': display_id, - 'url': video_url, - 'ext': ext, - 'vcodec': 'none' if ext == 'mp3' else None, - 'duration': int_or_none(video_data.get('data-duration')), - **info - } diff --git a/yt_dlp/extractor/freetv.py b/yt_dlp/extractor/freetv.py new file mode 100644 index 000000000..f38bae90b --- /dev/null +++ b/yt_dlp/extractor/freetv.py @@ -0,0 +1,141 @@ +import itertools +import re + +from .common import InfoExtractor +from ..utils import int_or_none, traverse_obj, urlencode_postdata + + +class FreeTvBaseIE(InfoExtractor): + def _get_api_response(self, content_id, resource_type, postdata): + return self._download_json( + 'https://www.freetv.com/wordpress/wp-admin/admin-ajax.php', + content_id, data=urlencode_postdata(postdata), + note=f'Downloading {content_id} {resource_type} JSON')['data'] + + +class FreeTvMoviesIE(FreeTvBaseIE): + _VALID_URL = r'https?://(?:www\.)?freetv\.com/peliculas/(?P<id>[^/]+)' + _TESTS = [{ + 'url': 'https://www.freetv.com/peliculas/atrapame-si-puedes/', + 'md5': 'dc62d5abf0514726640077cd1591aa92', + 'info_dict': { + 'id': '428021', + 'title': 'Atrápame Si Puedes', + 'description': 'md5:ca63bc00898aeb2f64ec87c6d3a5b982', + 'ext': 'mp4', + } + }, { + 'url': 'https://www.freetv.com/peliculas/monstruoso/', + 'md5': '509c15c68de41cb708d1f92d071f20aa', + 'info_dict': { + 'id': '377652', + 'title': 'Monstruoso', + 'description': 'md5:333fc19ee327b457b980e54a911ea4a3', + 'ext': 'mp4', + } + }] + + def _extract_video(self, content_id, action='olyott_video_play'): + api_response = self._get_api_response(content_id, 'video', { + 'action': action, + 'contentID': content_id, + }) + + video_id, video_url = api_response['displayMeta']['contentID'], api_response['displayMeta']['streamURLVideo'] + formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': traverse_obj(api_response, ('displayMeta', 'title')), + 'description': traverse_obj(api_response, ('displayMeta', 'desc')), + 'formats': formats, + 'subtitles': subtitles, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + return self._extract_video( + self._search_regex(( + r'class=["\'][^>]+postid-(?P<video_id>\d+)', + r'<link[^>]+freetv.com/\?p=(?P<video_id>\d+)', + r'<div[^>]+data-params=["\'][^>]+post_id=(?P<video_id>\d+)', + ), webpage, 'video id', group='video_id')) + + +class FreeTvIE(FreeTvBaseIE): + IE_NAME = 'freetv:series' + _VALID_URL = r'https?://(?:www\.)?freetv\.com/series/(?P<id>[^/]+)' + _TESTS = [{ + 'url': 'https://www.freetv.com/series/el-detective-l/', + 'info_dict': { + 'id': 'el-detective-l', + 'title': 'El Detective L', + 'description': 'md5:f9f1143bc33e9856ecbfcbfb97a759be' + }, + 'playlist_count': 24, + }, { + 'url': 'https://www.freetv.com/series/esmeraldas/', + 'info_dict': { + 'id': 'esmeraldas', + 'title': 'Esmeraldas', + 'description': 'md5:43d7ec45bd931d8268a4f5afaf4c77bf' + }, + 'playlist_count': 62, + }, { + 'url': 'https://www.freetv.com/series/las-aventuras-de-leonardo/', + 'info_dict': { + 'id': 'las-aventuras-de-leonardo', + 'title': 'Las Aventuras de Leonardo', + 'description': 'md5:0c47130846c141120a382aca059288f6' + }, + 'playlist_count': 13, + }, + ] + + def _extract_series_season(self, season_id, series_title): + episodes = self._get_api_response(season_id, 'series', { + 'contentID': season_id, + 'action': 'olyott_get_dynamic_series_content', + 'type': 'list', + 'perPage': '1000', + })['1'] + + for episode in episodes: + video_id = str(episode['contentID']) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(episode['streamURL'], video_id, 'mp4') + self._sort_formats(formats) + + yield { + 'id': video_id, + 'title': episode.get('fullTitle'), + 'description': episode.get('description'), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnail': episode.get('thumbnail'), + 'series': series_title, + 'series_id': traverse_obj(episode, ('contentMeta', 'displayMeta', 'seriesID')), + 'season_id': traverse_obj(episode, ('contentMeta', 'displayMeta', 'seasonID')), + 'season_number': traverse_obj( + episode, ('contentMeta', 'displayMeta', 'seasonNum'), expected_type=int_or_none), + 'episode_number': traverse_obj( + episode, ('contentMeta', 'displayMeta', 'episodeNum'), expected_type=int_or_none), + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + title = self._html_search_regex( + r'<h1[^>]+class=["\']synopis[^>]>(?P<title>[^<]+)', webpage, 'title', group='title', fatal=False) + description = self._html_search_regex( + r'<div[^>]+class=["\']+synopis content[^>]><p>(?P<description>[^<]+)', + webpage, 'description', group='description', fatal=False) + + return self.playlist_result( + itertools.chain.from_iterable( + self._extract_series_season(season_id, title) + for season_id in re.findall(r'<option[^>]+value=["\'](\d+)["\']', webpage)), + display_id, title, description) diff --git a/yt_dlp/extractor/fuyintv.py b/yt_dlp/extractor/fuyintv.py new file mode 100644 index 000000000..197901d57 --- /dev/null +++ b/yt_dlp/extractor/fuyintv.py @@ -0,0 +1,30 @@ +from .common import InfoExtractor +from ..utils import traverse_obj + + +class FuyinTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?fuyin\.tv/html/(?:\d+)/(?P<id>\d+)\.html' + _TESTS = [{ + 'url': 'https://www.fuyin.tv/html/2733/44129.html', + 'info_dict': { + 'id': '44129', + 'ext': 'mp4', + 'title': '第1集', + 'description': 'md5:21a3d238dc8d49608e1308e85044b9c3', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._download_json( + 'https://www.fuyin.tv/api/api/tv.movie/url', + video_id, query={'urlid': f'{video_id}'}) + webpage = self._download_webpage(url, video_id, fatal=False) + + return { + 'id': video_id, + 'title': traverse_obj(json_data, ('data', 'title')), + 'url': json_data['data']['url'], + 'ext': 'mp4', + 'description': self._html_search_meta('description', webpage), + } diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index f594d02c2..c2f754453 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1,5 +1,6 @@ import os import re +import urllib.parse import xml.etree.ElementTree from .ant1newsgr import Ant1NewsGrEmbedIE @@ -69,11 +70,13 @@ from .spankwire import SpankwireIE from .sportbox import SportBoxIE from .spotify import SpotifyBaseIE from .springboardplatform import SpringboardPlatformIE +from .substack import SubstackIE from .svt import SVTIE from .teachable import TeachableIE from .ted import TedEmbedIE from .theplatform import ThePlatformIE from .threeqsdn import ThreeQSDNIE +from .tiktok import TikTokIE from .tnaflix import TNAFlixNetworkEmbedIE from .tube8 import Tube8IE from .tunein import TuneInBaseIE @@ -104,12 +107,7 @@ from .yapfiles import YapFilesIE from .youporn import YouPornIE from .youtube import YoutubeIE from .zype import ZypeIE -from ..compat import ( - compat_etree_fromstring, - compat_str, - compat_urllib_parse_unquote, - compat_urlparse, -) +from ..compat import compat_etree_fromstring from ..utils import ( KNOWN_EXTENSIONS, ExtractorError, @@ -129,6 +127,7 @@ from ..utils import ( sanitized_Request, smuggle_url, str_or_none, + try_call, unescapeHTML, unified_timestamp, unsmuggle_url, @@ -2526,6 +2525,118 @@ class GenericIE(InfoExtractor): 'upload_date': '20220504', }, }, + { + # Webpage contains double BOM + 'url': 'https://www.filmarkivet.se/movies/paris-d-moll/', + 'md5': 'df02cadc719dcc63d43288366f037754', + 'info_dict': { + 'id': 'paris-d-moll', + 'ext': 'mp4', + 'upload_date': '20220518', + 'title': 'Paris d-moll', + 'description': 'md5:319e37ea5542293db37e1e13072fe330', + 'thumbnail': 'https://www.filmarkivet.se/wp-content/uploads/parisdmoll2.jpg', + 'timestamp': 1652833414, + 'age_limit': 0, + } + }, + { + 'url': 'https://www.mollymovieclub.com/p/interstellar?s=r#details', + 'md5': '198bde8bed23d0b23c70725c83c9b6d9', + 'info_dict': { + 'id': '53602801', + 'ext': 'mpga', + 'title': 'Interstellar', + 'description': 'Listen now | Episode One', + 'thumbnail': 'md5:c30d9c83f738e16d8551d7219d321538', + 'uploader': 'Molly Movie Club', + 'uploader_id': '839621', + }, + }, + { + 'url': 'https://www.blockedandreported.org/p/episode-117-lets-talk-about-depp?s=r', + 'md5': 'c0cc44ee7415daeed13c26e5b56d6aa0', + 'info_dict': { + 'id': '57962052', + 'ext': 'mpga', + 'title': 'md5:855b2756f0ee10f6723fa00b16266f8d', + 'description': 'md5:fe512a5e94136ad260c80bde00ea4eef', + 'thumbnail': 'md5:2218f27dfe517bb5ac16c47d0aebac59', + 'uploader': 'Blocked and Reported', + 'uploader_id': '500230', + }, + }, + { + 'url': 'https://www.skimag.com/video/ski-people-1980/', + 'info_dict': { + 'id': 'ski-people-1980', + 'title': 'Ski People (1980)', + }, + 'playlist_count': 1, + 'playlist': [{ + 'md5': '022a7e31c70620ebec18deeab376ee03', + 'info_dict': { + 'id': 'YTmgRiNU', + 'ext': 'mp4', + 'title': '1980 Ski People', + 'timestamp': 1610407738, + 'description': 'md5:cf9c3d101452c91e141f292b19fe4843', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/YTmgRiNU/poster.jpg?width=720', + 'duration': 5688.0, + 'upload_date': '20210111', + } + }] + }, + { + 'note': 'Rumble embed', + 'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html', + 'md5': '53af34098a7f92c4e51cf0bd1c33f009', + 'info_dict': { + 'id': 'vb0ofn', + 'ext': 'mp4', + 'timestamp': 1612662578, + 'uploader': 'LovingMontana', + 'channel': 'LovingMontana', + 'upload_date': '20210207', + 'title': 'Winter-loving dog helps girls dig a snow fort ', + 'channel_url': 'https://rumble.com/c/c-546523', + 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/f/x/x/5fxxb.OvCc.1-small-Moose-The-Dog-Helps-Girls-D.jpg', + 'duration': 103, + } + }, + { + 'note': 'Rumble JS embed', + 'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it', + 'md5': '4701209ac99095592e73dbba21889690', + 'info_dict': { + 'id': 'v15eqxl', + 'ext': 'mp4', + 'channel': 'Mr Producer Media', + 'duration': 92, + 'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh', + 'channel_url': 'https://rumble.com/c/RichSementa', + 'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.OvCc-small-911-Audio-From-The-Man-Who-.jpg', + 'timestamp': 1654892716, + 'uploader': 'Mr Producer Media', + 'upload_date': '20220610', + } + }, + { + 'note': 'JSON LD with multiple @type', + 'url': 'https://www.nu.nl/280161/video/hoe-een-bladvlo-dit-verwoestende-japanse-onkruid-moet-vernietigen.html', + 'md5': 'c7949f34f57273013fb7ccb1156393db', + 'info_dict': { + 'id': 'ipy2AcGL', + 'ext': 'mp4', + 'description': 'md5:6a9d644bab0dc2dc06849c2505d8383d', + 'thumbnail': r're:https://media\.nu\.nl/m/.+\.jpg', + 'title': 'Hoe een bladvlo dit verwoestende Japanse onkruid moet vernietigen', + 'timestamp': 1586577474, + 'upload_date': '20200411', + 'age_limit': 0, + 'duration': 111.0, + } + }, ] def report_following_redirect(self, new_url): @@ -2536,66 +2647,44 @@ class GenericIE(InfoExtractor): self._downloader.write_debug(f'Identified a {name}') def _extract_rss(self, url, video_id, doc): - playlist_title = doc.find('./channel/title').text - playlist_desc_el = doc.find('./channel/description') - playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text - NS_MAP = { 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd', } entries = [] for it in doc.findall('./channel/item'): - next_url = None - enclosure_nodes = it.findall('./enclosure') - for e in enclosure_nodes: - next_url = e.attrib.get('url') - if next_url: - break - - if not next_url: - next_url = xpath_text(it, 'link', fatal=False) - + next_url = next( + (e.attrib.get('url') for e in it.findall('./enclosure')), + xpath_text(it, 'link', fatal=False)) if not next_url: continue - if it.find('guid').text is not None: - next_url = smuggle_url(next_url, {'force_videoid': it.find('guid').text}) + guid = try_call(lambda: it.find('guid').text) + if guid: + next_url = smuggle_url(next_url, {'force_videoid': guid}) def itunes(key): - return xpath_text( - it, xpath_with_ns('./itunes:%s' % key, NS_MAP), - default=None) - - duration = itunes('duration') - explicit = (itunes('explicit') or '').lower() - if explicit in ('true', 'yes'): - age_limit = 18 - elif explicit in ('false', 'no'): - age_limit = 0 - else: - age_limit = None + return xpath_text(it, xpath_with_ns(f'./itunes:{key}', NS_MAP), default=None) entries.append({ '_type': 'url_transparent', 'url': next_url, - 'title': it.find('title').text, + 'title': try_call(lambda: it.find('title').text), 'description': xpath_text(it, 'description', default=None), - 'timestamp': unified_timestamp( - xpath_text(it, 'pubDate', default=None)), - 'duration': int_or_none(duration) or parse_duration(duration), + 'timestamp': unified_timestamp(xpath_text(it, 'pubDate', default=None)), + 'duration': parse_duration(itunes('duration')), 'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')), 'episode': itunes('title'), 'episode_number': int_or_none(itunes('episode')), 'season_number': int_or_none(itunes('season')), - 'age_limit': age_limit, + 'age_limit': {'true': 18, 'yes': 18, 'false': 0, 'no': 0}.get((itunes('explicit') or '').lower()), }) return { '_type': 'playlist', 'id': url, - 'title': playlist_title, - 'description': playlist_desc, + 'title': try_call(lambda: doc.find('./channel/title').text), + 'description': try_call(lambda: doc.find('./channel/description').text), 'entries': entries, } @@ -2610,7 +2699,7 @@ class GenericIE(InfoExtractor): title = self._html_search_meta('DC.title', webpage, fatal=True) - camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg) + camtasia_url = urllib.parse.urljoin(url, camtasia_cfg) camtasia_cfg = self._download_xml( camtasia_url, video_id, note='Downloading camtasia configuration', @@ -2626,7 +2715,7 @@ class GenericIE(InfoExtractor): entries.append({ 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0], 'title': f'{title} - {n.tag}', - 'url': compat_urlparse.urljoin(url, url_n.text), + 'url': urllib.parse.urljoin(url, url_n.text), 'duration': float_or_none(n.find('./duration').text), }) @@ -2678,7 +2767,7 @@ class GenericIE(InfoExtractor): if url.startswith('//'): return self.url_result(self.http_scheme() + url) - parsed_url = compat_urlparse.urlparse(url) + parsed_url = urllib.parse.urlparse(url) if not parsed_url.scheme: default_search = self.get_param('default_search') if default_search is None: @@ -2754,7 +2843,7 @@ class GenericIE(InfoExtractor): m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type) if m: self.report_detected('direct video link') - format_id = compat_str(m.group('format_id')) + format_id = str(m.group('format_id')) subtitles = {} if format_id.endswith('mpegurl'): formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4') @@ -2873,7 +2962,7 @@ class GenericIE(InfoExtractor): # Unescaping the whole page allows to handle those cases in a generic way # FIXME: unescaping the whole page may break URLs, commenting out for now. # There probably should be a second run of generic extractor on unescaped webpage. - # webpage = compat_urllib_parse_unquote(webpage) + # webpage = urllib.parse.unquote(webpage) # Unescape squarespace embeds to be detected by generic extractor, # see https://github.com/ytdl-org/youtube-dl/issues/21294 @@ -2975,7 +3064,7 @@ class GenericIE(InfoExtractor): if vimeo_urls: return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key()) - vhx_url = VHXEmbedIE._extract_url(webpage) + vhx_url = VHXEmbedIE._extract_url(url, webpage) if vhx_url: return self.url_result(vhx_url, VHXEmbedIE.ie_key()) @@ -3023,6 +3112,7 @@ class GenericIE(InfoExtractor): wistia_urls = WistiaIE._extract_urls(webpage) if wistia_urls: playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key()) + playlist['entries'] = list(playlist['entries']) for entry in playlist['entries']: entry.update({ '_type': 'url_transparent', @@ -3042,6 +3132,11 @@ class GenericIE(InfoExtractor): # Don't set the extractor because it can be a track url or an album return self.url_result(burl) + # Check for Substack custom domains + substack_url = SubstackIE._extract_url(webpage, url) + if substack_url: + return self.url_result(substack_url, SubstackIE) + # Look for embedded Vevo player mobj = re.search( r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage) @@ -3140,7 +3235,7 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url')) mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage) if mobj is not None: - return self.url_result(compat_urllib_parse_unquote(mobj.group('url'))) + return self.url_result(urllib.parse.unquote(mobj.group('url'))) # Look for funnyordie embed matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage) @@ -3393,7 +3488,7 @@ class GenericIE(InfoExtractor): r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage) if mobj is not None: return self.url_result( - compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed') + urllib.parse.urljoin(url, mobj.group('url')), 'UDNEmbed') # Look for Senate ISVP iframe senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) @@ -3626,7 +3721,7 @@ class GenericIE(InfoExtractor): if mediasite_urls: entries = [ self.url_result(smuggle_url( - compat_urlparse.urljoin(url, mediasite_url), + urllib.parse.urljoin(url, mediasite_url), {'UrlReferrer': url}), ie=MediasiteIE.ie_key()) for mediasite_url in mediasite_urls] return self.playlist_result(entries, video_id, video_title) @@ -3762,6 +3857,11 @@ class GenericIE(InfoExtractor): if ruutu_urls: return self.playlist_from_matches(ruutu_urls, video_id, video_title) + # Look for Tiktok embeds + tiktok_urls = TikTokIE._extract_urls(webpage) + if tiktok_urls: + return self.playlist_from_matches(tiktok_urls, video_id, video_title) + # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: @@ -3816,11 +3916,11 @@ class GenericIE(InfoExtractor): subtitles = {} for source in sources: src = source.get('src') - if not src or not isinstance(src, compat_str): + if not src or not isinstance(src, str): continue - src = compat_urlparse.urljoin(url, src) + src = urllib.parse.urljoin(url, src) src_type = source.get('type') - if isinstance(src_type, compat_str): + if isinstance(src_type, str): src_type = src_type.lower() ext = determine_ext(src).lower() if src_type == 'video/youtube': @@ -3854,7 +3954,7 @@ class GenericIE(InfoExtractor): if not src: continue subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({ - 'url': compat_urlparse.urljoin(url, src), + 'url': urllib.parse.urljoin(url, src), 'name': sub.get('label'), 'http_headers': { 'Referer': full_response.geturl(), @@ -3871,22 +3971,17 @@ class GenericIE(InfoExtractor): json_ld = self._search_json_ld(webpage, video_id, default={}) if json_ld.get('url') not in (url, None): self.report_detected('JSON LD') - if determine_ext(json_ld['url']) == 'm3u8': - json_ld['formats'], json_ld['subtitles'] = self._extract_m3u8_formats_and_subtitles( - json_ld['url'], video_id, 'mp4') - json_ld.pop('url') - self._sort_formats(json_ld['formats']) - else: - json_ld['_type'] = 'url_transparent' - json_ld['url'] = smuggle_url(json_ld['url'], {'force_videoid': video_id, 'to_generic': True}) - return merge_dicts(json_ld, info_dict) + return merge_dicts({ + '_type': 'url_transparent', + 'url': smuggle_url(json_ld['url'], {'force_videoid': video_id, 'to_generic': True}), + }, json_ld, info_dict) def check_video(vurl): if YoutubeIE.suitable(vurl): return True if RtmpIE.suitable(vurl): return True - vpath = compat_urlparse.urlparse(vurl).path + vpath = urllib.parse.urlparse(vurl).path vext = determine_ext(vpath, None) return vext not in (None, 'swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml') @@ -4014,7 +4109,7 @@ class GenericIE(InfoExtractor): if refresh_header: found = re.search(REDIRECT_REGEX, refresh_header) if found: - new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1))) + new_url = urllib.parse.urljoin(url, unescapeHTML(found.group(1))) if new_url != url: self.report_following_redirect(new_url) return { @@ -4040,8 +4135,8 @@ class GenericIE(InfoExtractor): for video_url in orderedSet(found): video_url = unescapeHTML(video_url) video_url = video_url.replace('\\/', '/') - video_url = compat_urlparse.urljoin(url, video_url) - video_id = compat_urllib_parse_unquote(os.path.basename(video_url)) + video_url = urllib.parse.urljoin(url, video_url) + video_id = urllib.parse.unquote(os.path.basename(video_url)) # Sometimes, jwplayer extraction will result in a YouTube URL if YoutubeIE.suitable(video_url): diff --git a/yt_dlp/extractor/giga.py b/yt_dlp/extractor/giga.py index 9e835a6da..e728598f7 100644 --- a/yt_dlp/extractor/giga.py +++ b/yt_dlp/extractor/giga.py @@ -1,13 +1,8 @@ import itertools from .common import InfoExtractor -from ..utils import ( - qualities, - compat_str, - parse_duration, - parse_iso8601, - str_to_int, -) +from ..compat import compat_str +from ..utils import parse_duration, parse_iso8601, qualities, str_to_int class GigaIE(InfoExtractor): diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py index c0905f86a..d7475b6da 100644 --- a/yt_dlp/extractor/googledrive.py +++ b/yt_dlp/extractor/googledrive.py @@ -264,7 +264,7 @@ class GoogleDriveIE(InfoExtractor): subtitles_id = ttsurl.encode('utf-8').decode( 'unicode_escape').split('=')[-1] - self._downloader.cookiejar.clear(domain='.google.com', path='/', name='NID') + self.cookiejar.clear(domain='.google.com', path='/', name='NID') return { 'id': video_id, @@ -276,3 +276,59 @@ class GoogleDriveIE(InfoExtractor): 'automatic_captions': self.extract_automatic_captions( video_id, subtitles_id, hl), } + + +class GoogleDriveFolderIE(InfoExtractor): + IE_NAME = 'GoogleDrive:Folder' + _VALID_URL = r'https?://(?:docs|drive)\.google\.com/drive/folders/(?P<id>[\w-]{28,})' + _TESTS = [{ + 'url': 'https://drive.google.com/drive/folders/1dQ4sx0-__Nvg65rxTSgQrl7VyW_FZ9QI', + 'info_dict': { + 'id': '1dQ4sx0-__Nvg65rxTSgQrl7VyW_FZ9QI', + 'title': 'Forrest' + }, + 'playlist_count': 3, + }] + _BOUNDARY = '=====vc17a3rwnndj=====' + _REQUEST = "/drive/v2beta/files?openDrive=true&reason=102&syncType=0&errorRecovery=false&q=trashed%20%3D%20false%20and%20'{folder_id}'%20in%20parents&fields=kind%2CnextPageToken%2Citems(kind%2CmodifiedDate%2CmodifiedByMeDate%2ClastViewedByMeDate%2CfileSize%2Cowners(kind%2CpermissionId%2Cid)%2ClastModifyingUser(kind%2CpermissionId%2Cid)%2ChasThumbnail%2CthumbnailVersion%2Ctitle%2Cid%2CresourceKey%2Cshared%2CsharedWithMeDate%2CuserPermission(role)%2CexplicitlyTrashed%2CmimeType%2CquotaBytesUsed%2Ccopyable%2CfileExtension%2CsharingUser(kind%2CpermissionId%2Cid)%2Cspaces%2Cversion%2CteamDriveId%2ChasAugmentedPermissions%2CcreatedDate%2CtrashingUser(kind%2CpermissionId%2Cid)%2CtrashedDate%2Cparents(id)%2CshortcutDetails(targetId%2CtargetMimeType%2CtargetLookupStatus)%2Ccapabilities(canCopy%2CcanDownload%2CcanEdit%2CcanAddChildren%2CcanDelete%2CcanRemoveChildren%2CcanShare%2CcanTrash%2CcanRename%2CcanReadTeamDrive%2CcanMoveTeamDriveItem)%2Clabels(starred%2Ctrashed%2Crestricted%2Cviewed))%2CincompleteSearch&appDataFilter=NO_APP_DATA&spaces=drive&pageToken={page_token}&maxResults=50&supportsTeamDrives=true&includeItemsFromAllDrives=true&corpora=default&orderBy=folder%2Ctitle_natural%20asc&retryCount=0&key={key} HTTP/1.1" + _DATA = f'''--{_BOUNDARY} +content-type: application/http +content-transfer-encoding: binary + +GET %s + +--{_BOUNDARY} +''' + + def _call_api(self, folder_id, key, data, **kwargs): + response = self._download_webpage( + 'https://clients6.google.com/batch/drive/v2beta', + folder_id, data=data.encode('utf-8'), + headers={ + 'Content-Type': 'text/plain;charset=UTF-8;', + 'Origin': 'https://drive.google.com', + }, query={ + '$ct': f'multipart/mixed; boundary="{self._BOUNDARY}"', + 'key': key + }, **kwargs) + return self._search_json('', response, 'api response', folder_id, **kwargs) or {} + + def _get_folder_items(self, folder_id, key): + page_token = '' + while page_token is not None: + request = self._REQUEST.format(folder_id=folder_id, page_token=page_token, key=key) + page = self._call_api(folder_id, key, self._DATA % request) + yield from page['items'] + page_token = page.get('nextPageToken') + + def _real_extract(self, url): + folder_id = self._match_id(url) + + webpage = self._download_webpage(url, folder_id) + key = self._search_regex(r'"(\w{39})"', webpage, 'key') + + folder_info = self._call_api(folder_id, key, self._DATA % f'/drive/v2beta/files/{folder_id} HTTP/1.1', fatal=False) + + return self.playlist_from_matches( + self._get_folder_items(folder_id, key), folder_id, folder_info.get('title'), + ie=GoogleDriveIE, getter=lambda item: f'https://drive.google.com/file/d/{item["id"]}') diff --git a/yt_dlp/extractor/hitbox.py b/yt_dlp/extractor/hitbox.py index a7e4424b6..6ecdd390c 100644 --- a/yt_dlp/extractor/hitbox.py +++ b/yt_dlp/extractor/hitbox.py @@ -1,13 +1,13 @@ import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( clean_html, - parse_iso8601, + determine_ext, float_or_none, int_or_none, - compat_str, - determine_ext, + parse_iso8601, ) diff --git a/yt_dlp/extractor/ina.py b/yt_dlp/extractor/ina.py index 56038f1ca..9e2c9cf47 100644 --- a/yt_dlp/extractor/ina.py +++ b/yt_dlp/extractor/ina.py @@ -1,23 +1,19 @@ from .common import InfoExtractor -from ..utils import ( - determine_ext, - int_or_none, - strip_or_none, - xpath_attr, - xpath_text, -) +from ..utils import unified_strdate class InaIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)' + _VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:[^/]+/)?(?:video|audio)/(?P<id>\w+)' _TESTS = [{ - 'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', - 'md5': 'a667021bf2b41f8dc6049479d9bb38a3', + 'url': 'https://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', + 'md5': 'c5a09e5cb5604ed10709f06e7a377dda', 'info_dict': { 'id': 'I12055569', 'ext': 'mp4', 'title': 'François Hollande "Je crois que c\'est clair"', - 'description': 'md5:3f09eb072a06cb286b8f7e4f77109663', + 'description': 'md5:08201f1c86fb250611f0ba415d21255a', + 'upload_date': '20070712', + 'thumbnail': 'https://cdn-hub.ina.fr/notice/690x517/3c4/I12055569.jpeg', } }, { 'url': 'https://www.ina.fr/video/S806544_001/don-d-organes-des-avancees-mais-d-importants-besoins-video.html', @@ -31,53 +27,37 @@ class InaIE(InfoExtractor): }, { 'url': 'http://m.ina.fr/video/I12055569', 'only_matching': True, + }, { + 'url': 'https://www.ina.fr/ina-eclaire-actu/video/cpb8205116303/les-jeux-electroniques', + 'md5': '4b8284a9a3a184fdc7e744225b8251e7', + 'info_dict': { + 'id': 'CPB8205116303', + 'ext': 'mp4', + 'title': 'Les jeux électroniques', + 'description': 'md5:e09f7683dad1cc60b74950490127d233', + 'upload_date': '19821204', + 'duration': 657, + 'thumbnail': 'https://cdn-hub.ina.fr/notice/690x517/203/CPB8205116303.jpeg', + } }] def _real_extract(self, url): - video_id = self._match_id(url) - info_doc = self._download_xml( - 'http://player.ina.fr/notices/%s.mrss' % video_id, video_id) - item = info_doc.find('channel/item') - title = xpath_text(item, 'title', fatal=True) - media_ns_xpath = lambda x: self._xpath_ns(x, 'http://search.yahoo.com/mrss/') - content = item.find(media_ns_xpath('content')) + video_id = self._match_id(url).upper() + webpage = self._download_webpage(url, video_id) - get_furl = lambda x: xpath_attr(content, media_ns_xpath(x), 'url') - formats = [] - for q, w, h in (('bq', 400, 300), ('mq', 512, 384), ('hq', 768, 576)): - q_url = get_furl(q) - if not q_url: - continue - formats.append({ - 'format_id': q, - 'url': q_url, - 'width': w, - 'height': h, - }) - if not formats: - furl = get_furl('player') or content.attrib['url'] - ext = determine_ext(furl) - formats = [{ - 'url': furl, - 'vcodec': 'none' if ext == 'mp3' else None, - 'ext': ext, - }] + api_url = self._html_search_regex( + r'asset-details-url\s*=\s*["\'](?P<api_url>[^"\']+)', + webpage, 'api_url').replace(video_id, f'{video_id}.json') - thumbnails = [] - for thumbnail in content.findall(media_ns_xpath('thumbnail')): - thumbnail_url = thumbnail.get('url') - if not thumbnail_url: - continue - thumbnails.append({ - 'url': thumbnail_url, - 'height': int_or_none(thumbnail.get('height')), - 'width': int_or_none(thumbnail.get('width')), - }) + api_response = self._download_json(api_url, video_id) return { 'id': video_id, - 'formats': formats, - 'title': title, - 'description': strip_or_none(xpath_text(item, 'description')), - 'thumbnails': thumbnails, + 'url': api_response['resourceUrl'], + 'ext': {'video': 'mp4', 'audio': 'mp3'}.get(api_response.get('type')), + 'title': api_response.get('title'), + 'description': api_response.get('description'), + 'upload_date': unified_strdate(api_response.get('dateOfBroadcast')), + 'duration': api_response.get('duration'), + 'thumbnail': api_response.get('resourceThumbnail'), } diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 05000e2fb..5a824b500 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -410,7 +410,7 @@ class InstagramIE(InstagramBaseIE): if nodes: return self.playlist_result( self._extract_nodes(nodes, True), video_id, - format_field(username, template='Post by %s'), description) + format_field(username, None, 'Post by %s'), description) video_url = self._og_search_video_url(webpage, secure=False) diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index a0298f1a1..5c316687c 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -351,7 +351,7 @@ class IqIE(InfoExtractor): ''' def _extract_vms_player_js(self, webpage, video_id): - player_js_cache = self._downloader.cache.load('iq', 'player_js') + player_js_cache = self.cache.load('iq', 'player_js') if player_js_cache: return player_js_cache webpack_js_url = self._proto_relative_url(self._search_regex( @@ -364,7 +364,7 @@ class IqIE(InfoExtractor): f'https://stc.iqiyipic.com/_next/static/chunks/{webpack_map1.get(module_index, module_index)}.{webpack_map2[module_index]}.js', video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or '' if 'vms request' in module_js: - self._downloader.cache.store('iq', 'player_js', module_js) + self.cache.store('iq', 'player_js', module_js) return module_js raise ExtractorError('Unable to extract player JS') @@ -440,7 +440,7 @@ class IqIE(InfoExtractor): preview_time = traverse_obj( initial_format_data, ('boss_ts', (None, 'data'), ('previewTime', 'rtime')), expected_type=float_or_none, get_all=False) if traverse_obj(initial_format_data, ('boss_ts', 'data', 'prv'), expected_type=int_or_none): - self.report_warning('This preview video is limited%s' % format_field(preview_time, template=' to %s seconds')) + self.report_warning('This preview video is limited%s' % format_field(preview_time, None, ' to %s seconds')) # TODO: Extract audio-only formats for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none, default=[])): diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py index 4b88da35f..f77c5d44d 100644 --- a/yt_dlp/extractor/iwara.py +++ b/yt_dlp/extractor/iwara.py @@ -1,15 +1,16 @@ +import itertools import re -import urllib +import urllib.parse from .common import InfoExtractor from ..utils import ( int_or_none, mimetype2ext, remove_end, + strip_or_none, + unified_strdate, url_or_none, urljoin, - unified_strdate, - strip_or_none, ) @@ -171,37 +172,70 @@ class IwaraUserIE(IwaraBaseIE): IE_NAME = 'iwara:user' _TESTS = [{ - 'url': 'https://ecchi.iwara.tv/users/CuteMMD', + 'note': 'number of all videos page is just 1 page. less than 40 videos', + 'url': 'https://ecchi.iwara.tv/users/infinityyukarip', 'info_dict': { - 'id': 'CuteMMD', + 'title': 'Uploaded videos from Infinity_YukariP', + 'id': 'infinityyukarip', + 'uploader': 'Infinity_YukariP', + 'uploader_id': 'infinityyukarip', }, - 'playlist_mincount': 198, + 'playlist_mincount': 39, }, { - # urlencoded - 'url': 'https://ecchi.iwara.tv/users/%E5%92%95%E5%98%BF%E5%98%BF', + 'note': 'no even all videos page. probably less than 10 videos', + 'url': 'https://ecchi.iwara.tv/users/mmd-quintet', 'info_dict': { - 'id': '咕嘿嘿', + 'title': 'Uploaded videos from mmd quintet', + 'id': 'mmd-quintet', + 'uploader': 'mmd quintet', + 'uploader_id': 'mmd-quintet', }, - 'playlist_mincount': 141, + 'playlist_mincount': 6, + }, { + 'note': 'has paging. more than 40 videos', + 'url': 'https://ecchi.iwara.tv/users/theblackbirdcalls', + 'info_dict': { + 'title': 'Uploaded videos from TheBlackbirdCalls', + 'id': 'theblackbirdcalls', + 'uploader': 'TheBlackbirdCalls', + 'uploader_id': 'theblackbirdcalls', + }, + 'playlist_mincount': 420, + }, { + 'note': 'foreign chars in URL. there must be foreign characters in URL', + 'url': 'https://ecchi.iwara.tv/users/ぶた丼', + 'info_dict': { + 'title': 'Uploaded videos from ぶた丼', + 'id': 'ぶた丼', + 'uploader': 'ぶた丼', + 'uploader_id': 'ぶた丼', + }, + 'playlist_mincount': 170, }] - def _entries(self, playlist_id, base_url, webpage): - yield from self._extract_playlist(base_url, webpage) - - page_urls = re.findall( - r'class="pager-item"[^>]*>\s*<a[^<]+href="([^"]+)', webpage) - - for n, path in enumerate(page_urls, 2): + def _entries(self, playlist_id, base_url): + webpage = self._download_webpage( + f'{base_url}/users/{playlist_id}', playlist_id) + videos_url = self._search_regex(r'<a href="(/users/[^/]+/videos)(?:\?[^"]+)?">', webpage, 'all videos url', default=None) + if not videos_url: + yield from self._extract_playlist(base_url, webpage) + return + + videos_url = urljoin(base_url, videos_url) + + for n in itertools.count(1): + page = self._download_webpage( + videos_url, playlist_id, note=f'Downloading playlist page {n}', + query={'page': str(n - 1)} if n > 1 else {}) yield from self._extract_playlist( - base_url, self._download_webpage( - urljoin(base_url, path), playlist_id, note=f'Downloading playlist page {n}')) + base_url, page) + + if f'page={n}' not in page: + break def _real_extract(self, url): playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url') playlist_id = urllib.parse.unquote(playlist_id) - webpage = self._download_webpage( - f'{base_url}/users/{playlist_id}/videos', playlist_id) - return self.playlist_result( - self._entries(playlist_id, base_url, webpage), playlist_id) + self._entries(playlist_id, base_url), playlist_id) diff --git a/yt_dlp/extractor/ixigua.py b/yt_dlp/extractor/ixigua.py new file mode 100644 index 000000000..163edf480 --- /dev/null +++ b/yt_dlp/extractor/ixigua.py @@ -0,0 +1,84 @@ +import base64 + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + get_element_by_id, + int_or_none, + js_to_json, + str_or_none, + traverse_obj, +) + + +class IxiguaIE(InfoExtractor): + _VALID_URL = r'https?://(?:\w+\.)?ixigua\.com/(?:video/)?(?P<id>\d+).+' + _TESTS = [{ + 'url': 'https://www.ixigua.com/6996881461559165471', + 'info_dict': { + 'id': '6996881461559165471', + 'ext': 'mp4', + 'title': '盲目涉水风险大,亲身示范高水位行车注意事项', + 'description': 'md5:8c82f46186299add4a1c455430740229', + 'tags': ['video_car'], + 'like_count': int, + 'dislike_count': int, + 'view_count': int, + 'uploader': '懂车帝原创', + 'uploader_id': '6480145787', + 'thumbnail': r're:^https?://.+\.(avif|webp)', + 'timestamp': 1629088414, + 'duration': 1030, + } + }] + + def _get_json_data(self, webpage, video_id): + js_data = get_element_by_id('SSR_HYDRATED_DATA', webpage) + if not js_data: + if self._cookies_passed: + raise ExtractorError('Failed to get SSR_HYDRATED_DATA') + raise ExtractorError('Cookies (not necessarily logged in) are needed', expected=True) + + return self._parse_json( + js_data.replace('window._SSR_HYDRATED_DATA=', ''), video_id, transform_source=js_to_json) + + def _media_selector(self, json_data): + for path, override in ( + (('video_list', ), {}), + (('dynamic_video', 'dynamic_video_list'), {'acodec': 'none'}), + (('dynamic_video', 'dynamic_audio_list'), {'vcodec': 'none', 'ext': 'm4a'}), + ): + for media in traverse_obj(json_data, (..., *path, lambda _, v: v['main_url'])): + yield { + 'url': base64.b64decode(media['main_url']).decode(), + 'width': int_or_none(media.get('vwidth')), + 'height': int_or_none(media.get('vheight')), + 'fps': int_or_none(media.get('fps')), + 'vcodec': media.get('codec_type'), + 'format_id': str_or_none(media.get('quality_type')), + 'filesize': int_or_none(media.get('size')), + 'ext': 'mp4', + **override, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + json_data = self._get_json_data(webpage, video_id)['anyVideo']['gidInformation']['packerData']['video'] + + formats = list(self._media_selector(json_data.get('videoResource'))) + self._sort_formats(formats) + return { + 'id': video_id, + 'title': json_data.get('title'), + 'description': json_data.get('video_abstract'), + 'formats': formats, + 'like_count': json_data.get('video_like_count'), + 'duration': int_or_none(json_data.get('duration')), + 'tags': [json_data.get('tag')], + 'uploader_id': traverse_obj(json_data, ('user_info', 'user_id')), + 'uploader': traverse_obj(json_data, ('user_info', 'name')), + 'view_count': json_data.get('video_watch_count'), + 'dislike_count': json_data.get('video_unlike_count'), + 'timestamp': int_or_none(json_data.get('video_publish_time')), + } diff --git a/yt_dlp/extractor/joj.py b/yt_dlp/extractor/joj.py index a01411be1..1c4676e95 100644 --- a/yt_dlp/extractor/joj.py +++ b/yt_dlp/extractor/joj.py @@ -70,7 +70,7 @@ class JojIE(InfoExtractor): r'(\d+)[pP]\.', format_url, 'height', default=None) formats.append({ 'url': format_url, - 'format_id': format_field(height, template='%sp'), + 'format_id': format_field(height, None, '%sp'), 'height': int(height), }) if not formats: diff --git a/yt_dlp/extractor/jwplatform.py b/yt_dlp/extractor/jwplatform.py index 8dbbb2926..2cb7ca3d7 100644 --- a/yt_dlp/extractor/jwplatform.py +++ b/yt_dlp/extractor/jwplatform.py @@ -5,7 +5,7 @@ from ..utils import unsmuggle_url class JWPlatformIE(InfoExtractor): - _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})' + _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})' _TESTS = [{ 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js', 'md5': 'fa8899fa601eb7c83a64e9d568bdf325', @@ -37,6 +37,9 @@ class JWPlatformIE(InfoExtractor): webpage) if ret: return ret + mobj = re.search(r'<div\b[^>]* data-video-jw-id="([a-zA-Z0-9]{8})"', webpage) + if mobj: + return [f'jwplatform:{mobj.group(1)}'] def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py index afad279bd..f4092aa71 100644 --- a/yt_dlp/extractor/kaltura.py +++ b/yt_dlp/extractor/kaltura.py @@ -382,5 +382,5 @@ class KalturaIE(InfoExtractor): 'duration': info.get('duration'), 'timestamp': info.get('createdAt'), 'uploader_id': format_field(info, 'userId', ignore=('None', None)), - 'view_count': info.get('plays'), + 'view_count': int_or_none(info.get('plays')), } diff --git a/yt_dlp/extractor/keezmovies.py b/yt_dlp/extractor/keezmovies.py index 79f9c7fa7..1c2d5c01c 100644 --- a/yt_dlp/extractor/keezmovies.py +++ b/yt_dlp/extractor/keezmovies.py @@ -68,7 +68,7 @@ class KeezMoviesIE(InfoExtractor): video_url, title, 32).decode('utf-8') formats.append({ 'url': format_url, - 'format_id': format_field(height, template='%dp'), + 'format_id': format_field(height, None, '%dp'), 'height': height, 'tbr': tbr, }) diff --git a/yt_dlp/extractor/kicker.py b/yt_dlp/extractor/kicker.py new file mode 100644 index 000000000..a2c7dd4e8 --- /dev/null +++ b/yt_dlp/extractor/kicker.py @@ -0,0 +1,55 @@ +from .common import InfoExtractor +from .dailymotion import DailymotionIE + + +class KickerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)kicker\.(?:de)/(?P<id>[\w-]+)/video' + _TESTS = [{ + 'url': 'https://www.kicker.de/pogba-dembel-co-die-top-11-der-abloesefreien-spieler-905049/video', + 'info_dict': { + 'id': 'km04mrK0DrRAVxy2GcA', + 'title': 'md5:b91d145bac5745ac58d5479d8347a875', + 'ext': 'mp4', + 'duration': 350, + 'description': 'md5:a5a3dd77dbb6550dbfb997be100b9998', + 'uploader_id': 'x2dfupo', + 'timestamp': 1654677626, + 'like_count': int, + 'uploader': 'kicker.de', + 'view_count': int, + 'age_limit': 0, + 'thumbnail': r're:https://s\d+\.dmcdn\.net/v/T-x741YeYAx8aSZ0Z/x1080', + 'tags': ['published', 'category.InternationalSoccer'], + 'upload_date': '20220608' + } + }, { + 'url': 'https://www.kicker.de/ex-unioner-in-der-bezirksliga-felix-kroos-vereinschallenge-in-pankow-902825/video', + 'info_dict': { + 'id': 'k2omNsJKdZ3TxwxYSFJ', + 'title': 'md5:72ec24d7f84b8436fe1e89d198152adf', + 'ext': 'mp4', + 'uploader_id': 'x2dfupo', + 'duration': 331, + 'timestamp': 1652966015, + 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/TxU4Z1YYCmtisTbMq/x1080', + 'tags': ['FELIX KROOS', 'EINFACH MAL LUPPEN', 'KROOS', 'FSV FORTUNA PANKOW', 'published', 'category.Amateurs', 'marketingpreset.Spreekick'], + 'age_limit': 0, + 'view_count': int, + 'upload_date': '20220519', + 'uploader': 'kicker.de', + 'description': 'md5:0c2060c899a91c8bf40f578f78c5846f', + 'like_count': int, + } + }] + + def _real_extract(self, url): + video_slug = self._match_id(url) + + webpage = self._download_webpage(url, video_slug) + dailymotion_video_id = self._search_regex( + r'data-dmprivateid\s*=\s*[\'"](?P<video_id>\w+)', webpage, + 'video id', group='video_id') + + return self.url_result( + f'https://www.dailymotion.com/video/{dailymotion_video_id}', + ie=DailymotionIE, video_title=self._html_extract_title(webpage)) diff --git a/yt_dlp/extractor/kth.py b/yt_dlp/extractor/kth.py new file mode 100644 index 000000000..e17c6db91 --- /dev/null +++ b/yt_dlp/extractor/kth.py @@ -0,0 +1,28 @@ +from .common import InfoExtractor +from ..utils import smuggle_url + + +class KTHIE(InfoExtractor): + _VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)' + _TEST = { + 'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9', + 'md5': 'd83ada6d00ca98b73243a88efe19e8a6', + 'info_dict': { + 'id': '0_uoop6oz9', + 'ext': 'mp4', + 'title': 'md5:bd1d6931facb6828762a33e6ce865f37', + 'thumbnail': 're:https?://.+/thumbnail/.+', + 'duration': 3516, + 'timestamp': 1647345358, + 'upload_date': '20220315', + 'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + result = self.url_result( + smuggle_url('kaltura:308:%s' % video_id, { + 'service_url': 'https://api.kaltura.nordu.net'}), + 'Kaltura') + return result diff --git a/yt_dlp/extractor/kusi.py b/yt_dlp/extractor/kusi.py index f1221ef1b..4fec2c2b2 100644 --- a/yt_dlp/extractor/kusi.py +++ b/yt_dlp/extractor/kusi.py @@ -1,10 +1,10 @@ import random +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote_plus from ..utils import ( - int_or_none, float_or_none, + int_or_none, timeconvert, update_url_query, xpath_text, @@ -66,7 +66,7 @@ class KUSIIE(InfoExtractor): formats = [] for quality in quality_options: formats.append({ - 'url': compat_urllib_parse_unquote_plus(quality.attrib['url']), + 'url': urllib.parse.unquote_plus(quality.attrib['url']), 'height': int_or_none(quality.attrib.get('height')), 'width': int_or_none(quality.attrib.get('width')), 'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000), diff --git a/yt_dlp/extractor/lastfm.py b/yt_dlp/extractor/lastfm.py index 7ba666d06..f14198cfd 100644 --- a/yt_dlp/extractor/lastfm.py +++ b/yt_dlp/extractor/lastfm.py @@ -15,7 +15,7 @@ class LastFMPlaylistBaseIE(InfoExtractor): for page_number in range(start_page_number, (last_page_number or start_page_number) + 1): webpage = self._download_webpage( url, playlist_id, - note='Downloading page %d%s' % (page_number, format_field(last_page_number, template=' of %d')), + note='Downloading page %d%s' % (page_number, format_field(last_page_number, None, ' of %d')), query={'page': page_number}) page_entries = [ self.url_result(player_url, 'Youtube') diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py index 953ce2e18..909720e8b 100644 --- a/yt_dlp/extractor/lbry.py +++ b/yt_dlp/extractor/lbry.py @@ -192,10 +192,11 @@ class LBRYIE(LBRYBaseIE): claim_id, is_live = result['signing_channel']['claim_id'], True headers = {'referer': 'https://player.odysee.live/'} live_data = self._download_json( - f'https://api.live.odysee.com/v1/odysee/live/{claim_id}', claim_id, + 'https://api.odysee.live/livestream/is_live', claim_id, + query={'channel_claim_id': claim_id}, note='Downloading livestream JSON metadata')['data'] - streaming_url = final_url = live_data.get('url') - if not final_url and not live_data.get('live'): + streaming_url = final_url = live_data.get('VideoURL') + if not final_url and not live_data.get('Live'): self.raise_no_formats('This stream is not live', True, claim_id) else: raise UnsupportedError(url) diff --git a/yt_dlp/extractor/line.py b/yt_dlp/extractor/line.py index 63b6c002a..09c512e50 100644 --- a/yt_dlp/extractor/line.py +++ b/yt_dlp/extractor/line.py @@ -34,7 +34,7 @@ class LineLiveBaseIE(InfoExtractor): 'timestamp': int_or_none(item.get('createdAt')), 'channel': channel.get('name'), 'channel_id': channel_id, - 'channel_url': format_field(channel_id, template='https://live.line.me/channels/%s'), + 'channel_url': format_field(channel_id, None, 'https://live.line.me/channels/%s'), 'duration': int_or_none(item.get('archiveDuration')), 'view_count': int_or_none(item.get('viewerCount')), 'comment_count': int_or_none(item.get('chatCount')), diff --git a/yt_dlp/extractor/lnkgo.py b/yt_dlp/extractor/lnkgo.py index 3bb52777f..9ea08ec5a 100644 --- a/yt_dlp/extractor/lnkgo.py +++ b/yt_dlp/extractor/lnkgo.py @@ -1,7 +1,7 @@ from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( clean_html, - compat_str, format_field, int_or_none, parse_iso8601, diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py index 527b50cb0..5f0a9b42f 100644 --- a/yt_dlp/extractor/medaltv.py +++ b/yt_dlp/extractor/medaltv.py @@ -116,7 +116,7 @@ class MedalTVIE(InfoExtractor): author = try_get( hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {} author_id = str_or_none(author.get('id')) - author_url = format_field(author_id, template='https://medal.tv/users/%s') + author_url = format_field(author_id, None, 'https://medal.tv/users/%s') return { 'id': video_id, diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index 60c454dda..f396c1bd3 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -20,10 +20,10 @@ class MediasetIE(ThePlatformBaseIE): (?: mediaset:| https?:// - (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/ + (?:\w+\.)+mediaset\.it/ (?: (?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_| - player/index\.html\?.*?\bprogramGuid= + player/(?:v\d+/)?index\.html\?.*?\bprogramGuid= ) )(?P<id>[0-9A-Z]{16,}) ''' @@ -159,6 +159,12 @@ class MediasetIE(ThePlatformBaseIE): }, { 'url': 'https://www.mediasetplay.mediaset.it/movie/herculeslaleggendahainizio/hercules-la-leggenda-ha-inizio_F305927501000102', 'only_matching': True, + }, { + 'url': 'https://mediasetinfinity.mediaset.it/video/braveandbeautiful/episodio-113_F310948005000402', + 'only_matching': True, + }, { + 'url': 'https://static3.mediasetplay.mediaset.it/player/v2/index.html?partnerId=wittytv&configId=&programGuid=FD00000000153323', + 'only_matching': True, }] @staticmethod @@ -286,7 +292,7 @@ class MediasetShowIE(MediasetIE): _VALID_URL = r'''(?x) (?: https?:// - (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/ + (\w+\.)+mediaset\.it/ (?: (?:fiction|programmi-tv|serie-tv|kids)/(?:.+?/)? (?:[a-z-]+)_SE(?P<id>\d{12}) diff --git a/yt_dlp/extractor/metacafe.py b/yt_dlp/extractor/metacafe.py index 31fec86d2..048c74e68 100644 --- a/yt_dlp/extractor/metacafe.py +++ b/yt_dlp/extractor/metacafe.py @@ -1,17 +1,14 @@ import json import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse, - compat_urllib_parse_unquote, -) +from ..compat import compat_parse_qs, compat_urllib_parse_unquote from ..utils import ( - determine_ext, ExtractorError, - int_or_none, + determine_ext, get_element_by_attribute, + int_or_none, mimetype2ext, ) @@ -143,7 +140,7 @@ class MetacafeIE(InfoExtractor): headers = { # Disable family filter - 'Cookie': 'user=%s; ' % compat_urllib_parse.quote(json.dumps({'ffilter': False})) + 'Cookie': 'user=%s; ' % urllib.parse.quote(json.dumps({'ffilter': False})) } # AnyClip videos require the flashversion cookie so that we get the link diff --git a/yt_dlp/extractor/minds.py b/yt_dlp/extractor/minds.py index 393d20604..8079bbb39 100644 --- a/yt_dlp/extractor/minds.py +++ b/yt_dlp/extractor/minds.py @@ -118,7 +118,7 @@ class MindsIE(MindsBaseIE): 'timestamp': int_or_none(entity.get('time_created')), 'uploader': strip_or_none(owner.get('name')), 'uploader_id': uploader_id, - 'uploader_url': format_field(uploader_id, template='https://www.minds.com/%s'), + 'uploader_url': format_field(uploader_id, None, 'https://www.minds.com/%s'), 'view_count': int_or_none(entity.get('play:count')), 'like_count': int_or_none(entity.get('thumbs:up:count')), 'dislike_count': int_or_none(entity.get('thumbs:down:count')), diff --git a/yt_dlp/extractor/mirrorcouk.py b/yt_dlp/extractor/mirrorcouk.py new file mode 100644 index 000000000..7b4f95b4b --- /dev/null +++ b/yt_dlp/extractor/mirrorcouk.py @@ -0,0 +1,98 @@ +from .common import InfoExtractor +from ..utils import unescapeHTML + + +class MirrorCoUKIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?mirror\.co\.uk/[/+[\w-]+-(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.mirror.co.uk/tv/tv-news/love-island-fans-baffled-after-27163139', + 'info_dict': { + 'id': 'voyyS7SV', + 'ext': 'mp4', + 'title': 'Love Island: Gemma Owen enters the villa', + 'description': 'Love Island: Michael Owen\'s daughter Gemma Owen enters the villa.', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/voyyS7SV/poster.jpg?width=720', + 'display_id': '27163139', + 'timestamp': 1654547895, + 'duration': 57.0, + 'upload_date': '20220606', + }, + }, { + 'url': 'https://www.mirror.co.uk/3am/celebrity-news/michael-jacksons-son-blankets-new-25344890', + 'info_dict': { + 'id': 'jyXpdvxp', + 'ext': 'mp4', + 'title': 'Michael Jackson’s son Bigi calls for action on climate change', + 'description': 'md5:d39ceaba2b7a615b4ca6557e7bc40222', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/jyXpdvxp/poster.jpg?width=720', + 'display_id': '25344890', + 'timestamp': 1635749907, + 'duration': 56.0, + 'upload_date': '20211101', + }, + }, { + 'url': 'https://www.mirror.co.uk/sport/football/news/antonio-conte-next-tottenham-manager-25346042', + 'info_dict': { + 'id': 'q6FkKa4p', + 'ext': 'mp4', + 'title': 'Nuno sacked by Tottenham after fifth Premier League defeat of the season', + 'description': 'Nuno Espirito Santo has been sacked as Tottenham boss after only four months in charge.', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/q6FkKa4p/poster.jpg?width=720', + 'display_id': '25346042', + 'timestamp': 1635763157, + 'duration': 40.0, + 'upload_date': '20211101', + }, + }, { + 'url': 'https://www.mirror.co.uk/3am/celebrity-news/johnny-depp-splashes-50k-curry-27160737', + 'info_dict': { + 'id': 'IT0oa1nH', + 'ext': 'mp4', + 'title': 'Johnny Depp Leaves The Grand Hotel in Birmingham', + 'description': 'Johnny Depp Leaves The Grand Hotel in Birmingham.', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/IT0oa1nH/poster.jpg?width=720', + 'display_id': '27160737', + 'timestamp': 1654524120, + 'duration': 65.0, + 'upload_date': '20220606', + }, + }, { + 'url': 'https://www.mirror.co.uk/tv/tv-news/love-islands-liam-could-first-27162602', + 'info_dict': { + 'id': 'EaPr5Z2j', + 'ext': 'mp4', + 'title': 'Love Island: Davide reveals plot twist after receiving text', + 'description': 'Love Island: Davide reveals plot twist after receiving text', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/EaPr5Z2j/poster.jpg?width=720', + 'display_id': '27162602', + 'timestamp': 1654552597, + 'duration': 23.0, + 'upload_date': '20220606', + }, + }, { + 'url': 'https://www.mirror.co.uk/news/uk-news/william-kate-sent-message-george-27160572', + 'info_dict': { + 'id': 'ygtceXIu', + 'ext': 'mp4', + 'title': 'Prince William and Kate arrive in Wales with George and Charlotte', + 'description': 'Prince William and Kate Middleton arrive in Wales with children Prince George and Princess Charlotte.', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/ygtceXIu/poster.jpg?width=720', + 'display_id': '27160572', + 'timestamp': 1654349678, + 'duration': 106.0, + 'upload_date': '20220604', + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + data = self._search_json(r'div\s+class="json-placeholder"\s+data-json="', + webpage, 'data', display_id, transform_source=unescapeHTML)['videoData'] + + return { + '_type': 'url_transparent', + 'url': f'jwplatform:{data["videoId"]}', + 'ie_key': 'JWPlatform', + 'display_id': display_id, + } diff --git a/yt_dlp/extractor/mixcloud.py b/yt_dlp/extractor/mixcloud.py index 796f268f4..a77d7e682 100644 --- a/yt_dlp/extractor/mixcloud.py +++ b/yt_dlp/extractor/mixcloud.py @@ -3,7 +3,6 @@ import itertools from .common import InfoExtractor from ..compat import ( compat_b64decode, - compat_chr, compat_ord, compat_str, compat_urllib_parse_unquote, @@ -72,7 +71,7 @@ class MixcloudIE(MixcloudBaseIE): def _decrypt_xor_cipher(key, ciphertext): """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR.""" return ''.join([ - compat_chr(compat_ord(ch) ^ compat_ord(k)) + chr(compat_ord(ch) ^ compat_ord(k)) for ch, k in zip(ciphertext, itertools.cycle(key))]) def _real_extract(self, url): diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py index a230d9cdd..c3b063ffe 100644 --- a/yt_dlp/extractor/naver.py +++ b/yt_dlp/extractor/naver.py @@ -1,13 +1,19 @@ +import itertools import re +from urllib.parse import urlparse, parse_qs from .common import InfoExtractor from ..utils import ( + ExtractorError, clean_html, dict_get, - ExtractorError, int_or_none, + merge_dicts, parse_duration, + traverse_obj, + try_call, try_get, + unified_timestamp, update_url_query, ) @@ -247,3 +253,134 @@ class NaverLiveIE(InfoExtractor): 'categories': [meta.get('categoryId')], 'is_live': True } + + +class NaverNowIE(NaverBaseIE): + IE_NAME = 'navernow' + _VALID_URL = r'https?://now\.naver\.com/show/(?P<id>[0-9]+)' + _PAGE_SIZE = 30 + _API_URL = 'https://apis.naver.com/now_web/nowcms-api-xhmac/cms/v1' + _TESTS = [{ + 'url': 'https://now.naver.com/show/4759?shareReplayId=5901#replay=', + 'md5': 'e05854162c21c221481de16b2944a0bc', + 'info_dict': { + 'id': '4759-5901', + 'title': '아이키X노제\r\n💖꽁냥꽁냥💖(1)', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.*\.jpg', + 'timestamp': 1650369600, + 'upload_date': '20220419', + 'uploader_id': 'now', + 'view_count': int, + }, + 'params': { + 'noplaylist': True, + } + }, { + 'url': 'https://now.naver.com/show/4759?shareHightlight=1078#highlight=', + 'md5': '9f6118e398aa0f22b2152f554ea7851b', + 'info_dict': { + 'id': '4759-1078', + 'title': '아이키: 나 리정한테 흔들렸어,,, 질투 폭발하는 노제 여보😾 [아이키의 떰즈업]ㅣ네이버 NOW.', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.*\.jpg', + 'upload_date': '20220504', + 'timestamp': 1651648042, + 'uploader_id': 'now', + 'view_count': int, + }, + 'params': { + 'noplaylist': True, + }, + }, { + 'url': 'https://now.naver.com/show/4759', + 'info_dict': { + 'id': '4759', + 'title': '아이키의 떰즈업', + }, + 'playlist_mincount': 48 + }, { + 'url': 'https://now.naver.com/show/4759?shareReplayId=5901#replay', + 'info_dict': { + 'id': '4759', + 'title': '아이키의 떰즈업', + }, + 'playlist_mincount': 48, + }, { + 'url': 'https://now.naver.com/show/4759?shareHightlight=1078#highlight=', + 'info_dict': { + 'id': '4759', + 'title': '아이키의 떰즈업', + }, + 'playlist_mincount': 48, + }] + + def _extract_replay(self, show_id, replay_id): + vod_info = self._download_json(f'{self._API_URL}/shows/{show_id}/vod/{replay_id}', replay_id) + in_key = self._download_json(f'{self._API_URL}/shows/{show_id}/vod/{replay_id}/inkey', replay_id)['inKey'] + return merge_dicts({ + 'id': f'{show_id}-{replay_id}', + 'title': traverse_obj(vod_info, ('episode', 'title')), + 'timestamp': unified_timestamp(traverse_obj(vod_info, ('episode', 'start_time'))), + 'thumbnail': vod_info.get('thumbnail_image_url'), + }, self._extract_video_info(replay_id, vod_info['video_id'], in_key)) + + def _extract_show_replays(self, show_id): + page = 0 + while True: + show_vod_info = self._download_json( + f'{self._API_URL}/vod-shows/{show_id}', show_id, + query={'offset': page * self._PAGE_SIZE, 'limit': self._PAGE_SIZE}, + note=f'Downloading JSON vod list for show {show_id} - page {page}' + )['response']['result'] + for v in show_vod_info.get('vod_list') or []: + yield self._extract_replay(show_id, v['id']) + + if try_call(lambda: show_vod_info['count'] <= self._PAGE_SIZE * (page + 1)): + break + page += 1 + + def _extract_show_highlights(self, show_id, highlight_id=None): + page = 0 + while True: + highlights_videos = self._download_json( + f'{self._API_URL}/shows/{show_id}/highlights/videos/', show_id, + query={'offset': page * self._PAGE_SIZE, 'limit': self._PAGE_SIZE}, + note=f'Downloading JSON highlights for show {show_id} - page {page}') + + for highlight in highlights_videos.get('results') or []: + if highlight_id and highlight.get('id') != int(highlight_id): + continue + yield merge_dicts({ + 'id': f'{show_id}-{highlight["id"]}', + 'title': highlight.get('title'), + 'timestamp': unified_timestamp(highlight.get('regdate')), + 'thumbnail': highlight.get('thumbnail_url'), + }, self._extract_video_info(highlight['id'], highlight['video_id'], highlight['video_inkey'])) + + if try_call(lambda: highlights_videos['count'] <= self._PAGE_SIZE * (page + 1)): + break + page += 1 + + def _extract_highlight(self, show_id, highlight_id): + try: + return next(self._extract_show_highlights(show_id, highlight_id)) + except StopIteration: + raise ExtractorError(f'Unable to find highlight {highlight_id} for show {show_id}') + + def _real_extract(self, url): + show_id = self._match_id(url) + qs = parse_qs(urlparse(url).query) + + if not self._yes_playlist(show_id, qs.get('shareHightlight')): + return self._extract_highlight(show_id, qs['shareHightlight'][0]) + elif not self._yes_playlist(show_id, qs.get('shareReplayId')): + return self._extract_replay(show_id, qs['shareReplayId'][0]) + + show_info = self._download_json( + f'{self._API_URL}/shows/{show_id}', show_id, + note=f'Downloading JSON vod list for show {show_id}') + + return self.playlist_result( + itertools.chain(self._extract_show_replays(show_id), self._extract_show_highlights(show_id)), + show_id, show_info.get('title')) diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py index de0142ccf..ad8dbd7a7 100644 --- a/yt_dlp/extractor/ndr.py +++ b/yt_dlp/extractor/ndr.py @@ -1,11 +1,15 @@ +import re + from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse from ..utils import ( determine_ext, + ExtractorError, int_or_none, - parse_duration, + merge_dicts, + parse_iso8601, qualities, try_get, - unified_strdate, urljoin, ) @@ -14,120 +18,139 @@ class NDRBaseIE(InfoExtractor): def _real_extract(self, url): mobj = self._match_valid_url(url) display_id = next(group for group in mobj.groups() if group) - id = mobj.group('id') webpage = self._download_webpage(url, display_id) - return self._extract_embed(webpage, display_id, id) + return self._extract_embed(webpage, display_id, url) class NDRIE(NDRBaseIE): IE_NAME = 'ndr' IE_DESC = 'NDR.de - Norddeutscher Rundfunk' - _VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<display_id>[^/?#]+),(?P<id>[\da-z]+)\.html' + _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html' _TESTS = [{ + # httpVideo, same content id 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', + 'md5': '6515bc255dc5c5f8c85bbc38e035a659', 'info_dict': { 'id': 'hafengeburtstag988', + 'display_id': 'Party-Poette-und-Parade', 'ext': 'mp4', 'title': 'Party, Pötte und Parade', - 'thumbnail': 'https://www.ndr.de/fernsehen/hafengeburtstag990_v-contentxl.jpg', 'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c', - 'series': None, - 'channel': 'NDR Fernsehen', - 'upload_date': '20150508', + 'uploader': 'ndrtv', + 'timestamp': 1431255671, + 'upload_date': '20150510', 'duration': 3498, }, - }, { - 'url': 'https://www.ndr.de/sport/fussball/Rostocks-Matchwinner-Froede-Ein-Hansa-Debuet-wie-im-Maerchen,hansa10312.html', - 'only_matching': True - }, { - 'url': 'https://www.ndr.de/nachrichten/niedersachsen/kommunalwahl_niedersachsen_2021/Grosse-Parteien-zufrieden-mit-Ergebnissen-der-Kommunalwahl,kommunalwahl1296.html', - 'info_dict': { - 'id': 'kommunalwahl1296', - 'ext': 'mp4', - 'title': 'Die Spitzenrunde: Die Wahl aus Sicht der Landespolitik', - 'thumbnail': 'https://www.ndr.de/fernsehen/screenshot1194912_v-contentxl.jpg', - 'description': 'md5:5c6e2ad744cef499135735a1036d7aa7', - 'series': 'Hallo Niedersachsen', - 'channel': 'NDR Fernsehen', - 'upload_date': '20210913', - 'duration': 438, + 'params': { + 'skip_download': True, }, + 'expected_warnings': ['Unable to download f4m manifest'], }, { - 'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html', + # httpVideo, different content id + 'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html', + 'md5': '1043ff203eab307f0c51702ec49e9a71', 'info_dict': { - 'id': 'sendung1091858', + 'id': 'osna272', + 'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch', 'ext': 'mp4', - 'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring', - 'thumbnail': 'https://www.ndr.de/fernsehen/screenshot983938_v-contentxl.jpg', - 'description': 'md5:700f6de264010585012a72f97b0ac0c9', - 'series': 'extra 3', - 'channel': 'NDR Fernsehen', - 'upload_date': '20201111', - 'duration': 1749, - } + 'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights', + 'description': 'md5:32e9b800b3d2d4008103752682d5dc01', + 'uploader': 'ndrtv', + 'timestamp': 1442059200, + 'upload_date': '20150912', + 'duration': 510, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'No longer available', }, { + # httpAudio, same content id 'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html', + 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', 'info_dict': { 'id': 'audio51535', + 'display_id': 'La-Valette-entgeht-der-Hinrichtung', 'ext': 'mp3', 'title': 'La Valette entgeht der Hinrichtung', - 'thumbnail': 'https://www.ndr.de/mediathek/mediathekbild140_v-podcast.jpg', 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536', - 'upload_date': '20140729', - 'duration': 884.0, + 'uploader': 'ndrinfo', + 'timestamp': 1631711863, + 'upload_date': '20210915', + 'duration': 884, }, - 'expected_warnings': ['unable to extract json url'], + 'params': { + 'skip_download': True, + }, + }, { + # with subtitles + 'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html', + 'info_dict': { + 'id': 'extra18674', + 'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring', + 'ext': 'mp4', + 'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring', + 'description': 'md5:700f6de264010585012a72f97b0ac0c9', + 'uploader': 'ndrtv', + 'upload_date': '20201207', + 'timestamp': 1614349457, + 'duration': 1749, + 'subtitles': { + 'de': [{ + 'ext': 'ttml', + 'url': r're:^https://www\.ndr\.de.+', + }], + }, + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest'], + }, { + 'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html', + 'only_matching': True, }] - def _extract_embed(self, webpage, display_id, id): - formats = [] - base_url = 'https://www.ndr.de' - json_url = self._search_regex(r'<iframe[^>]+src=\"([^\"]+)_theme-ndrde[^\.]*\.html\"', webpage, - 'json url', fatal=False) - if json_url: - data_json = self._download_json(base_url + json_url.replace('ardplayer_image', 'ardjson_image') + '.json', - id, fatal=False) - info_json = data_json.get('_info', {}) - media_json = try_get(data_json, lambda x: x['_mediaArray'][0]['_mediaStreamArray']) - for media in media_json: - if media.get('_quality') == 'auto': - formats.extend(self._extract_m3u8_formats(media['_stream'], id)) - subtitles = {} - sub_url = data_json.get('_subtitleUrl') - if sub_url: - subtitles.setdefault('de', []).append({ - 'url': base_url + sub_url, - }) - self._sort_formats(formats) - return { - 'id': id, - 'title': info_json.get('clipTitle'), - 'thumbnail': base_url + data_json.get('_previewImage'), - 'description': info_json.get('clipDescription'), - 'series': info_json.get('seriesTitle') or None, - 'channel': info_json.get('channelTitle'), - 'upload_date': unified_strdate(info_json.get('clipDate')), - 'duration': data_json.get('_duration'), - 'formats': formats, - 'subtitles': subtitles, - } - else: - json_url = base_url + self._search_regex(r'apiUrl\s?=\s?\'([^\']+)\'', webpage, 'json url').replace( - '_belongsToPodcast-', '') - data_json = self._download_json(json_url, id, fatal=False) - return { - 'id': id, - 'title': data_json.get('title'), - 'thumbnail': base_url + data_json.get('poster'), - 'description': data_json.get('summary'), - 'upload_date': unified_strdate(data_json.get('publicationDate')), - 'duration': parse_duration(data_json.get('duration')), - 'formats': [{ - 'url': try_get(data_json, (lambda x: x['audio'][0]['url'], lambda x: x['files'][0]['url'])), - 'vcodec': 'none', - 'ext': 'mp3', - }], - } + def _extract_embed(self, webpage, display_id, url): + embed_url = ( + self._html_search_meta( + 'embedURL', webpage, 'embed URL', + default=None) + or self._search_regex( + r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'embed URL', group='url', default=None) + or self._search_regex( + r'\bvar\s*sophoraID\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'embed URL', group='url', default='')) + # some more work needed if we only found sophoraID + if re.match(r'^[a-z]+\d+$', embed_url): + # get the initial part of the url path,. eg /panorama/archiv/2022/ + parsed_url = compat_urllib_parse_urlparse(url) + path = self._search_regex(r'(.+/)%s' % display_id, parsed_url.path or '', 'embed URL', default='') + # find tell-tale image with the actual ID + ndr_id = self._search_regex(r'%s([a-z]+\d+)(?!\.)\b' % (path, ), webpage, 'embed URL', default=None) + # or try to use special knowledge! + NDR_INFO_URL_TPL = 'https://www.ndr.de/info/%s-player.html' + embed_url = 'ndr:%s' % (ndr_id, ) if ndr_id else NDR_INFO_URL_TPL % (embed_url, ) + if not embed_url: + raise ExtractorError('Unable to extract embedUrl') + + description = self._search_regex( + r'<p[^>]+itemprop="description">([^<]+)</p>', + webpage, 'description', default=None) or self._og_search_description(webpage) + timestamp = parse_iso8601( + self._search_regex( + (r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="(?P<cont>[^"]+)"', + r'\bvar\s*pdt\s*=\s*(?P<q>["\'])(?P<cont>(?:(?!(?P=q)).)+)(?P=q)', ), + webpage, 'upload date', group='cont', default=None)) + info = self._search_json_ld(webpage, display_id, default={}) + return merge_dicts({ + '_type': 'url_transparent', + 'url': embed_url, + 'display_id': display_id, + 'description': description, + 'timestamp': timestamp, + }, info) class NJoyIE(NDRBaseIE): @@ -151,19 +174,19 @@ class NJoyIE(NDRBaseIE): 'params': { 'skip_download': True, }, + 'skip': 'No longer available', }, { # httpVideo, different content id 'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html', 'md5': '417660fffa90e6df2fda19f1b40a64d8', 'info_dict': { - 'id': 'dockville882', + 'id': 'livestream283', 'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-', - 'ext': 'mp4', - 'title': '"Ich hab noch nie" mit Felix Jaehn', - 'description': 'md5:85dd312d53be1b99e1f998a16452a2f3', + 'ext': 'mp3', + 'title': 'Das frueheste DJ Set des Nordens live mit Felix Jaehn', + 'description': 'md5:681698f527b8601e511e7b79edde7d2c', 'uploader': 'njoy', - 'upload_date': '20150822', - 'duration': 211, + 'upload_date': '20210830', }, 'params': { 'skip_download': True, @@ -173,18 +196,25 @@ class NJoyIE(NDRBaseIE): 'only_matching': True, }] - def _extract_embed(self, webpage, display_id, id): + def _extract_embed(self, webpage, display_id, url=None): + # find tell-tale URL with the actual ID, or ... video_id = self._search_regex( - r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id') - description = self._search_regex( - r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>', - webpage, 'description', fatal=False) + (r'''\bsrc\s*=\s*["']?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''', + r'<iframe[^>]+id="pp_([\da-z]+)"', ), + webpage, 'NDR id', default=None) + + description = ( + self._html_search_meta('description', webpage) + or self._search_regex( + r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>', + webpage, 'description', fatal=False)) return { '_type': 'url_transparent', 'ie_key': 'NDREmbedBase', 'url': 'ndr:%s' % video_id, 'display_id': display_id, 'description': description, + 'title': display_id.replace('-', ' ').strip(), } @@ -287,7 +317,7 @@ class NDREmbedBaseIE(InfoExtractor): class NDREmbedIE(NDREmbedBaseIE): IE_NAME = 'ndr:embed' - _VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html' + _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html' _TESTS = [{ 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html', 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9', @@ -300,6 +330,7 @@ class NDREmbedIE(NDREmbedBaseIE): 'upload_date': '20150907', 'duration': 132, }, + 'skip': 'No longer available', }, { 'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html', 'md5': '002085c44bae38802d94ae5802a36e78', @@ -315,6 +346,7 @@ class NDREmbedIE(NDREmbedBaseIE): 'params': { 'skip_download': True, }, + 'skip': 'No longer available', }, { 'url': 'http://www.ndr.de/info/audio51535-player.html', 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', @@ -324,7 +356,7 @@ class NDREmbedIE(NDREmbedBaseIE): 'title': 'La Valette entgeht der Hinrichtung', 'is_live': False, 'uploader': 'ndrinfo', - 'upload_date': '20140729', + 'upload_date': '20210915', 'duration': 884, }, 'params': { @@ -345,15 +377,17 @@ class NDREmbedIE(NDREmbedBaseIE): 'params': { 'skip_download': True, }, + 'skip': 'No longer available', }, { # httpVideoLive 'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html', 'info_dict': { 'id': 'livestream217', - 'ext': 'flv', + 'ext': 'mp4', 'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'is_live': True, - 'upload_date': '20150910', + 'upload_date': '20210409', + 'uploader': 'ndrtv', }, 'params': { 'skip_download': True, @@ -391,9 +425,10 @@ class NJoyEmbedIE(NDREmbedBaseIE): 'ext': 'mp4', 'title': 'Zehn Jahre Reeperbahn Festival - die Doku', 'is_live': False, - 'upload_date': '20150807', + 'upload_date': '20200826', 'duration': 1011, }, + 'expected_warnings': ['Unable to download f4m manifest'], }, { # httpAudio 'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html', @@ -410,6 +445,7 @@ class NJoyEmbedIE(NDREmbedBaseIE): 'params': { 'skip_download': True, }, + 'skip': 'No longer available', }, { # httpAudioLive, no explicit ext 'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html', @@ -419,7 +455,7 @@ class NJoyEmbedIE(NDREmbedBaseIE): 'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'is_live': True, 'uploader': 'njoy', - 'upload_date': '20150810', + 'upload_date': '20210830', }, 'params': { 'skip_download': True, diff --git a/yt_dlp/extractor/ndtv.py b/yt_dlp/extractor/ndtv.py index fbb033169..bfe52f77d 100644 --- a/yt_dlp/extractor/ndtv.py +++ b/yt_dlp/extractor/ndtv.py @@ -1,13 +1,7 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote_plus -) -from ..utils import ( - parse_duration, - remove_end, - unified_strdate, - urljoin -) +from ..utils import parse_duration, remove_end, unified_strdate, urljoin class NDTVIE(InfoExtractor): @@ -80,7 +74,7 @@ class NDTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) # '__title' does not contain extra words such as sub-site name, "Video" etc. - title = compat_urllib_parse_unquote_plus( + title = urllib.parse.unquote_plus( self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None) or self._og_search_title(webpage)) diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py index ff9a2adf0..7057b8b26 100644 --- a/yt_dlp/extractor/nebula.py +++ b/yt_dlp/extractor/nebula.py @@ -1,14 +1,11 @@ import itertools import json import time -import urllib +import urllib.error +import urllib.parse -from ..utils import ( - ExtractorError, - parse_iso8601, - try_get, -) from .common import InfoExtractor +from ..utils import ExtractorError, parse_iso8601, try_get class NebulaBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index 4def7e76b..f9a67876a 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -1,18 +1,12 @@ -from hashlib import md5 +import itertools +import re from base64 import b64encode from datetime import datetime -import re +from hashlib import md5 from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_str, - compat_itertools_count, -) -from ..utils import ( - sanitized_Request, - float_or_none, -) +from ..compat import compat_str, compat_urllib_parse_urlencode +from ..utils import float_or_none, sanitized_Request class NetEaseMusicBaseIE(InfoExtractor): @@ -449,7 +443,7 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE): name = None desc = None entries = [] - for offset in compat_itertools_count(start=0, step=self._PAGE_SIZE): + for offset in itertools.count(start=0, step=self._PAGE_SIZE): info = self.query_api( 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d' % (self._PAGE_SIZE, dj_id, offset), diff --git a/yt_dlp/extractor/netverse.py b/yt_dlp/extractor/netverse.py new file mode 100644 index 000000000..f529682a3 --- /dev/null +++ b/yt_dlp/extractor/netverse.py @@ -0,0 +1,176 @@ +import functools + +from .common import InfoExtractor +from .dailymotion import DailymotionIE +from ..utils import ( + InAdvancePagedList, + smuggle_url, + traverse_obj, +) + + +class NetverseBaseIE(InfoExtractor): + _ENDPOINTS = { + 'watch': 'watchvideo', + 'video': 'watchvideo', + 'webseries': 'webseries', + } + + def _call_api(self, url, query={}): + display_id, sites_type = self._match_valid_url(url).group('display_id', 'type') + + json_data = self._download_json( + f'https://api.netverse.id/medias/api/v2/{self._ENDPOINTS[sites_type]}/{display_id}', + display_id, query=query) + + return display_id, json_data + + +class NetverseIE(NetverseBaseIE): + _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>watch|video)/(?P<display_id>[^/?#&]+)' + _TESTS = [{ + # Watch video + 'url': 'https://www.netverse.id/watch/waktu-indonesia-bercanda-edisi-spesial-lebaran-2016', + 'info_dict': { + 'id': 'k4yhqUwINAGtmHx3NkL', + 'title': 'Waktu Indonesia Bercanda - Edisi Spesial Lebaran 2016', + 'ext': 'mp4', + 'season': 'Season 2016', + 'description': 'md5:fc27747c0aa85067b6967c816f01617c', + 'thumbnail': 'https://vplayed-uat.s3-ap-southeast-1.amazonaws.com/images/webseries/thumbnails/2021/11/619cfce45c827.jpeg', + 'episode_number': 22, + 'series': 'Waktu Indonesia Bercanda', + 'episode': 'Episode 22', + 'uploader_id': 'x2ir3vq', + 'age_limit': 0, + 'tags': [], + 'view_count': int, + 'display_id': 'waktu-indonesia-bercanda-edisi-spesial-lebaran-2016', + 'duration': 2990, + 'upload_date': '20210722', + 'timestamp': 1626919804, + 'like_count': int, + 'uploader': 'Net Prime', + } + }, { + # series + 'url': 'https://www.netverse.id/watch/jadoo-seorang-model', + 'info_dict': { + 'id': 'x88izwc', + 'title': 'Jadoo Seorang Model', + 'ext': 'mp4', + 'season': 'Season 2', + 'description': 'md5:c616e8e59d3edf2d3d506e3736120d99', + 'thumbnail': 'https://storage.googleapis.com/netprime-live/images/webseries/thumbnails/2021/11/619cf63f105d3.jpeg', + 'episode_number': 2, + 'series': 'Hello Jadoo', + 'episode': 'Episode 2', + 'view_count': int, + 'like_count': int, + 'display_id': 'jadoo-seorang-model', + 'uploader_id': 'x2ir3vq', + 'duration': 635, + 'timestamp': 1646372927, + 'tags': ['PG069497-hellojadooseason2eps2'], + 'upload_date': '20220304', + 'uploader': 'Net Prime', + 'age_limit': 0, + }, + 'skip': 'video get Geo-blocked for some country' + }, { + # non www host + 'url': 'https://netverse.id/watch/tetangga-baru', + 'info_dict': { + 'id': 'k4CNGz7V0HJ7vfwZbXy', + 'ext': 'mp4', + 'title': 'Tetangga Baru', + 'season': 'Season 1', + 'description': 'md5:ed6dd355bed84d139b1154c3d8d65957', + 'thumbnail': 'https://vplayed-uat.s3-ap-southeast-1.amazonaws.com/images/webseries/thumbnails/2021/11/619cfd9d32c5f.jpeg', + 'episode_number': 1, + 'series': 'Tetangga Masa Gitu', + 'episode': 'Episode 1', + 'timestamp': 1624538169, + 'view_count': int, + 'upload_date': '20210624', + 'age_limit': 0, + 'uploader_id': 'x2ir3vq', + 'like_count': int, + 'uploader': 'Net Prime', + 'tags': ['PG008534', 'tetangga', 'Baru'], + 'display_id': 'tetangga-baru', + 'duration': 1406, + }, + }, { + # /video url + 'url': 'https://www.netverse.id/video/pg067482-hellojadoo-season1', + 'title': 'Namaku Choi Jadoo', + 'info_dict': { + 'id': 'x887jzz', + 'ext': 'mp4', + 'thumbnail': 'https://storage.googleapis.com/netprime-live/images/webseries/thumbnails/2021/11/619cf63f105d3.jpeg', + 'season': 'Season 1', + 'episode_number': 1, + 'description': 'md5:c616e8e59d3edf2d3d506e3736120d99', + 'title': 'Namaku Choi Jadoo', + 'series': 'Hello Jadoo', + 'episode': 'Episode 1', + 'age_limit': 0, + 'like_count': int, + 'view_count': int, + 'tags': ['PG067482', 'PG067482-HelloJadoo-season1'], + 'duration': 780, + 'display_id': 'pg067482-hellojadoo-season1', + 'uploader_id': 'x2ir3vq', + 'uploader': 'Net Prime', + 'timestamp': 1645764984, + 'upload_date': '20220225', + }, + 'skip': 'This video get Geo-blocked for some country' + }] + + def _real_extract(self, url): + display_id, program_json = self._call_api(url) + videos = program_json['response']['videos'] + + return { + '_type': 'url_transparent', + 'ie_key': DailymotionIE.ie_key(), + 'url': smuggle_url(videos['dailymotion_url'], {'query': {'embedder': 'https://www.netverse.id'}}), + 'display_id': display_id, + 'title': videos.get('title'), + 'season': videos.get('season_name'), + 'thumbnail': traverse_obj(videos, ('program_detail', 'thumbnail_image')), + 'description': traverse_obj(videos, ('program_detail', 'description')), + 'episode_number': videos.get('episode_order'), + 'series': traverse_obj(videos, ('program_detail', 'title')), + } + + +class NetversePlaylistIE(NetverseBaseIE): + _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>webseries)/(?P<display_id>[^/?#&]+)' + _TEST = { + 'url': 'https://netverse.id/webseries/tetangga-masa-gitu', + 'info_dict': { + 'id': 'tetangga-masa-gitu', + 'title': 'Tetangga Masa Gitu', + }, + 'playlist_count': 46, + } + + def parse_playlist(self, url, page_num): + _, playlist_json = self._call_api(url, query={'page': page_num + 1}) + for slug in traverse_obj(playlist_json, ('response', 'related', 'data', ..., 'slug')): + yield self.url_result(f'https://www.netverse.id/video/{slug}', NetverseIE) + + def _real_extract(self, url): + _, playlist_data = self._call_api(url) + webseries_related_info = playlist_data['response']['related'] + # TODO: get video from other season + # The season has id and the next season video is located at api_url/<season_id>?page=<page> + return self.playlist_result( + InAdvancePagedList(functools.partial(self.parse_playlist, url), + webseries_related_info['last_page'], + webseries_related_info['to'] - webseries_related_info['from'] + 1), + traverse_obj(playlist_data, ('response', 'webseries_info', 'slug')), + traverse_obj(playlist_data, ('response', 'webseries_info', 'title'))) diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index cf2ec7b79..60d76d1b1 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -11,7 +11,7 @@ from ..utils import ( class NhkBaseIE(InfoExtractor): - _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json' + _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json' _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand' _TYPE_REGEX = r'/(?P<type>video|audio)/' @@ -27,7 +27,7 @@ class NhkBaseIE(InfoExtractor): def _extract_episode_info(self, url, episode=None): fetch_episode = episode is None lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups() - if episode_id.isdigit(): + if len(episode_id) == 7: episode_id = episode_id[:4] + '-' + episode_id[4:] is_video = m_type == 'video' @@ -89,7 +89,8 @@ class NhkBaseIE(InfoExtractor): class NhkVodIE(NhkBaseIE): - _VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX) + # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg + _VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX) # Content available only for a limited period of time. Visit # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. _TESTS = [{ @@ -129,6 +130,19 @@ class NhkVodIE(NhkBaseIE): }, { 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/', 'only_matching': True, + }, { + # video, alphabetic character in ID #29670 + 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/', + 'only_matching': True, + 'info_dict': { + 'id': 'qfjay6cg', + 'ext': 'mp4', + 'title': 'DESIGN TALKS plus - Fishermen’s Finery', + 'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448', + 'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$', + 'upload_date': '20210615', + 'timestamp': 1623722008, + } }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index a80b544f8..82fb27631 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -647,14 +647,14 @@ class NiconicoSeriesIE(InfoExtractor): 'id': '110226', 'title': 'ご立派ァ!のシリーズ', }, - 'playlist_mincount': 10, # as of 2021/03/17 + 'playlist_mincount': 10, }, { 'url': 'https://www.nicovideo.jp/series/12312/', 'info_dict': { 'id': '12312', 'title': 'バトルスピリッツ お勧めカード紹介(調整中)', }, - 'playlist_mincount': 97, # as of 2021/03/17 + 'playlist_mincount': 103, }, { 'url': 'https://nico.ms/series/203559', 'only_matching': True, @@ -672,7 +672,7 @@ class NiconicoSeriesIE(InfoExtractor): title = unescapeHTML(title) playlist = [ self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id) - for v_id in re.findall(r'href="/watch/([a-z0-9]+)" data-href="/watch/\1', webpage)] + for v_id in re.findall(r'data-href=[\'"](?:https://www\.nicovideo\.jp)?/watch/([a-z0-9]+)', webpage)] return self.playlist_result(playlist, list_id, title) diff --git a/yt_dlp/extractor/npr.py b/yt_dlp/extractor/npr.py index 6d93f154c..e677e862d 100644 --- a/yt_dlp/extractor/npr.py +++ b/yt_dlp/extractor/npr.py @@ -1,9 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - int_or_none, - qualities, - url_or_none, -) +from ..utils import int_or_none, qualities, traverse_obj, url_or_none class NprIE(InfoExtractor): @@ -51,6 +47,15 @@ class NprIE(InfoExtractor): # multimedia, no formats, stream 'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert', 'only_matching': True, + }, { + 'url': 'https://www.npr.org/2022/03/15/1084896560/bonobo-tiny-desk-home-concert', + 'info_dict': { + 'id': '1086468851', + 'ext': 'mp4', + 'title': 'Bonobo: Tiny Desk (Home) Concert', + 'duration': 1061, + 'thumbnail': r're:^https?://media.npr.org/assets/img/.*\.jpg$', + }, }] def _real_extract(self, url): @@ -110,6 +115,12 @@ class NprIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( stream_url, stream_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + + if not formats: + raw_json_ld = self._yield_json_ld(self._download_webpage(url, playlist_id), playlist_id, fatal=False) + m3u8_url = traverse_obj(list(raw_json_ld), (..., 'subjectOf', ..., 'embedUrl'), get_all=False) + formats = self._extract_m3u8_formats(m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False) + self._sort_formats(formats) entries.append({ diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py index 553c55132..fcbafe418 100644 --- a/yt_dlp/extractor/nrk.py +++ b/yt_dlp/extractor/nrk.py @@ -3,18 +3,17 @@ import random import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import compat_HTTPError, compat_str from ..utils import ( - compat_HTTPError, - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_duration, parse_iso8601, str_or_none, try_get, - urljoin, url_or_none, + urljoin, ) diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index 61e3a8b86..79dad09e3 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -9,7 +9,6 @@ from ..utils import ( ExtractorError, Popen, check_executable, - encodeArgument, get_exe_version, is_outdated_version, ) @@ -132,7 +131,7 @@ class PhantomJSwrapper: os.remove(self._TMP_FILES[name].name) def _save_cookies(self, url): - cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar) + cookies = cookie_jar_to_list(self.extractor.cookiejar) for cookie in cookies: if 'path' not in cookie: cookie['path'] = '/' @@ -213,16 +212,14 @@ class PhantomJSwrapper: else: self.extractor.to_screen(f'{video_id}: {note2}') - p = Popen( + stdout, stderr, returncode = Popen.run( [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = p.communicate_or_kill() - if p.returncode != 0: - raise ExtractorError( - 'Executing JS failed\n:' + encodeArgument(err)) + text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if returncode: + raise ExtractorError(f'Executing JS failed:\n{stderr}') with open(self._TMP_FILES['html'].name, 'rb') as f: html = f.read().decode('utf-8') self._load_cookies() - return (html, encodeArgument(out)) + return html, stdout diff --git a/yt_dlp/extractor/peloton.py b/yt_dlp/extractor/peloton.py index 8e50ffc7f..3fc05d1f2 100644 --- a/yt_dlp/extractor/peloton.py +++ b/yt_dlp/extractor/peloton.py @@ -1,11 +1,9 @@ import json import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_urllib_parse, -) +from ..compat import compat_HTTPError from ..utils import ( ExtractorError, float_or_none, @@ -125,7 +123,7 @@ class PelotonIE(InfoExtractor): is_live = False if ride_data.get('content_format') == 'audio': - url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), compat_urllib_parse.quote(token)) + url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), urllib.parse.quote(token)) formats = [{ 'url': url, 'ext': 'm4a', @@ -138,9 +136,9 @@ class PelotonIE(InfoExtractor): url = 'https://members.onepeloton.com/.netlify/functions/m3u8-proxy?displayLanguage=en&acceptedSubtitles=%s&url=%s?hdnea=%s' % ( ','.join([re.sub('^([a-z]+)-([A-Z]+)$', r'\1', caption) for caption in ride_data['captions']]), ride_data['vod_stream_url'], - compat_urllib_parse.quote(compat_urllib_parse.quote(token))) + urllib.parse.quote(urllib.parse.quote(token))) elif ride_data.get('live_stream_url'): - url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), compat_urllib_parse.quote(token)) + url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), urllib.parse.quote(token)) is_live = True else: raise ExtractorError('Missing video URL') diff --git a/yt_dlp/extractor/playsuisse.py b/yt_dlp/extractor/playsuisse.py new file mode 100644 index 000000000..a635ac92f --- /dev/null +++ b/yt_dlp/extractor/playsuisse.py @@ -0,0 +1,147 @@ +import json + +from .common import InfoExtractor +from ..utils import int_or_none, traverse_obj + + +class PlaySuisseIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/watch/(?P<id>[0-9]+)' + _TESTS = [ + { + 'url': 'https://www.playsuisse.ch/watch/763211/0', + 'md5': '82df2a470b2dfa60c2d33772a8a60cf8', + 'info_dict': { + 'id': '763211', + 'ext': 'mp4', + 'title': 'Knochen', + 'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8', + 'duration': 3344, + 'series': 'Wilder', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Knochen', + 'episode_number': 1, + 'thumbnail': 'md5:9260abe0c0ec9b69914d0a10d54c5878' + } + }, + { + 'url': 'https://www.playsuisse.ch/watch/808675/0', + 'md5': '818b94c1d2d7c4beef953f12cb8f3e75', + 'info_dict': { + 'id': '808675', + 'ext': 'mp4', + 'title': 'Der Läufer', + 'description': 'md5:9f61265c7e6dcc3e046137a792b275fd', + 'duration': 5280, + 'episode': 'Der Läufer', + 'thumbnail': 'md5:44af7d65ee02bbba4576b131868bb783' + } + }, + { + 'url': 'https://www.playsuisse.ch/watch/817193/0', + 'md5': '1d6c066f92cd7fffd8b28a53526d6b59', + 'info_dict': { + 'id': '817193', + 'ext': 'mp4', + 'title': 'Die Einweihungsparty', + 'description': 'md5:91ebf04d3a42cb3ab70666acf750a930', + 'duration': 1380, + 'series': 'Nr. 47', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Die Einweihungsparty', + 'episode_number': 1, + 'thumbnail': 'md5:637585fb106e3a4bcd991958924c7e44' + } + } + ] + + _GRAPHQL_QUERY = ''' + query AssetWatch($assetId: ID!) { + assetV2(id: $assetId) { + ...Asset + episodes { + ...Asset + } + } + } + fragment Asset on AssetV2 { + id + name + description + duration + episodeNumber + seasonNumber + seriesName + medias { + type + url + } + thumbnail16x9 { + ...ImageDetails + } + thumbnail2x3 { + ...ImageDetails + } + thumbnail16x9WithTitle { + ...ImageDetails + } + thumbnail2x3WithTitle { + ...ImageDetails + } + } + fragment ImageDetails on AssetImage { + id + url + }''' + + def _get_media_data(self, media_id): + # NOTE In the web app, the "locale" header is used to switch between languages, + # However this doesn't seem to take effect when passing the header here. + response = self._download_json( + 'https://4bbepzm4ef.execute-api.eu-central-1.amazonaws.com/prod/graphql', + media_id, data=json.dumps({ + 'operationName': 'AssetWatch', + 'query': self._GRAPHQL_QUERY, + 'variables': {'assetId': media_id} + }).encode('utf-8'), + headers={'Content-Type': 'application/json', 'locale': 'de'}) + + return response['data']['assetV2'] + + def _real_extract(self, url): + media_id = self._match_id(url) + media_data = self._get_media_data(media_id) + info = self._extract_single(media_data) + if media_data.get('episodes'): + info.update({ + '_type': 'playlist', + 'entries': map(self._extract_single, media_data['episodes']), + }) + return info + + def _extract_single(self, media_data): + thumbnails = traverse_obj(media_data, lambda k, _: k.startswith('thumbnail')) + + formats, subtitles = [], {} + for media in traverse_obj(media_data, 'medias', default=[]): + if not media.get('url') or media.get('type') != 'HLS': + continue + f, subs = self._extract_m3u8_formats_and_subtitles( + media['url'], media_data['id'], 'mp4', m3u8_id='HLS', fatal=False) + formats.extend(f) + self._merge_subtitles(subs, target=subtitles) + + return { + 'id': media_data['id'], + 'title': media_data.get('name'), + 'description': media_data.get('description'), + 'thumbnails': thumbnails, + 'duration': int_or_none(media_data.get('duration')), + 'formats': formats, + 'subtitles': subtitles, + 'series': media_data.get('seriesName'), + 'season_number': int_or_none(media_data.get('seasonNumber')), + 'episode': media_data.get('name'), + 'episode_number': int_or_none(media_data.get('episodeNumber')), + } diff --git a/yt_dlp/extractor/playvid.py b/yt_dlp/extractor/playvid.py index 5ffefc934..18aeda7de 100644 --- a/yt_dlp/extractor/playvid.py +++ b/yt_dlp/extractor/playvid.py @@ -1,14 +1,9 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_urllib_parse_unquote_plus, -) -from ..utils import ( - clean_html, - ExtractorError, -) +from ..compat import compat_urllib_parse_unquote +from ..utils import ExtractorError, clean_html class PlayvidIE(InfoExtractor): @@ -62,7 +57,7 @@ class PlayvidIE(InfoExtractor): val = videovars_match.group(2) if key == 'title': - video_title = compat_urllib_parse_unquote_plus(val) + video_title = urllib.parse.unquote_plus(val) if key == 'duration': try: duration = int(val) diff --git a/yt_dlp/extractor/pokemon.py b/yt_dlp/extractor/pokemon.py index eef0d02ca..0911893d4 100644 --- a/yt_dlp/extractor/pokemon.py +++ b/yt_dlp/extractor/pokemon.py @@ -1,5 +1,3 @@ -import re - from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -136,42 +134,3 @@ class PokemonWatchIE(InfoExtractor): 'episode': video_data.get('title'), 'episode_number': int_or_none(video_data.get('episode')), }) - - -class PokemonSoundLibraryIE(InfoExtractor): - _VALID_URL = r'https?://soundlibrary\.pokemon\.co\.jp' - - _TESTS = [{ - 'url': 'https://soundlibrary.pokemon.co.jp/', - 'info_dict': { - 'title': 'Pokémon Diamond and Pearl Sound Tracks', - }, - 'playlist_mincount': 149, - }] - - def _real_extract(self, url): - musicbox_webpage = self._download_webpage( - 'https://soundlibrary.pokemon.co.jp/musicbox', None, - 'Downloading list of songs') - song_titles = [x.group(1) for x in re.finditer(r'<span>([^>]+?)</span><br/>をてもち曲に加えます。', musicbox_webpage)] - song_titles = song_titles[4::2] - - # each songs don't have permalink; instead we return all songs at once - song_entries = [{ - 'id': f'pokemon-soundlibrary-{song_id}', - 'url': f'https://soundlibrary.pokemon.co.jp/api/assets/signing/sounds/wav/{song_id}.wav', - # note: the server always serves MP3 files, despite its extension of the URL above - 'ext': 'mp3', - 'acodec': 'mp3', - 'vcodec': 'none', - 'title': song_title, - 'track': song_title, - 'artist': 'Nintendo / Creatures Inc. / GAME FREAK inc.', - 'uploader': 'Pokémon', - 'release_year': 2006, - 'release_date': '20060928', - 'track_number': song_id, - 'album': 'Pokémon Diamond and Pearl', - } for song_id, song_title in enumerate(song_titles, 1)] - - return self.playlist_result(song_entries, playlist_title='Pokémon Diamond and Pearl Sound Tracks') diff --git a/yt_dlp/extractor/popcorntimes.py b/yt_dlp/extractor/popcorntimes.py index ed741a07b..ddc5ec8c8 100644 --- a/yt_dlp/extractor/popcorntimes.py +++ b/yt_dlp/extractor/popcorntimes.py @@ -1,8 +1,5 @@ from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_chr, -) +from ..compat import compat_b64decode from ..utils import int_or_none @@ -50,7 +47,7 @@ class PopcorntimesIE(InfoExtractor): c_ord += 13 if upper < c_ord: c_ord -= 26 - loc_b64 += compat_chr(c_ord) + loc_b64 += chr(c_ord) video_url = compat_b64decode(loc_b64).decode('utf-8') diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index d296ccacb..35468b4fc 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -3,28 +3,26 @@ import itertools import math import operator import re +import urllib.request from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, - compat_urllib_request, -) from .openload import PhantomJSwrapper +from ..compat import compat_HTTPError, compat_str from ..utils import ( + NO_DEFAULT, + ExtractorError, clean_html, determine_ext, - ExtractorError, format_field, int_or_none, merge_dicts, - NO_DEFAULT, orderedSet, remove_quotes, + remove_start, str_to_int, update_url_query, - urlencode_postdata, url_or_none, + urlencode_postdata, ) @@ -49,7 +47,7 @@ class PornHubBaseIE(InfoExtractor): r'document\.location\.reload\(true\)')): url_or_request = args[0] url = (url_or_request.get_full_url() - if isinstance(url_or_request, compat_urllib_request.Request) + if isinstance(url_or_request, urllib.request.Request) else url_or_request) phantom = PhantomJSwrapper(self, required_version='2.0') phantom.get(url, html=webpage) @@ -199,6 +197,16 @@ class PornHubIE(PornHubBaseIE): }, 'skip': 'This video has been disabled', }, { + 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a', + 'info_dict': { + 'id': 'ph601dc30bae19a', + 'uploader': 'Projekt Melody', + 'uploader_id': 'projekt-melody', + 'upload_date': '20210205', + 'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)', + 'thumbnail': r're:https?://.+', + }, + }, { 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'only_matching': True, }, { @@ -429,7 +437,7 @@ class PornHubIE(PornHubBaseIE): default=None)) formats.append({ 'url': format_url, - 'format_id': format_field(height, template='%dp'), + 'format_id': format_field(height, None, '%dp'), 'height': height, }) @@ -457,9 +465,11 @@ class PornHubIE(PornHubBaseIE): self._sort_formats( formats, field_preference=('height', 'width', 'fps', 'format_id')) + model_profile = self._search_json( + r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False) video_uploader = self._html_search_regex( r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', - webpage, 'uploader', default=None) + webpage, 'uploader', default=None) or model_profile.get('username') def extract_vote_count(kind, name): return self._extract_count( @@ -488,6 +498,7 @@ class PornHubIE(PornHubBaseIE): return merge_dicts({ 'id': video_id, 'uploader': video_uploader, + 'uploader_id': remove_start(model_profile.get('modelProfileLink'), '/model/'), 'upload_date': upload_date, 'title': title, 'thumbnail': thumbnail, diff --git a/yt_dlp/extractor/premiershiprugby.py b/yt_dlp/extractor/premiershiprugby.py new file mode 100644 index 000000000..67d41fdfd --- /dev/null +++ b/yt_dlp/extractor/premiershiprugby.py @@ -0,0 +1,39 @@ +from .common import InfoExtractor +from ..utils import int_or_none, traverse_obj + + +class PremiershipRugbyIE(InfoExtractor): + _VALID_URL = r'https?://(?:\w+\.)premiershiprugby\.(?:com)/watch/(?P<id>[\w-]+)' + _TESTS = [{ + 'url': 'https://www.premiershiprugby.com/watch/full-match-harlequins-v-newcastle-falcons', + 'info_dict': { + 'id': '0_mbkb7ldt', + 'title': 'Full Match: Harlequins v Newcastle Falcons', + 'ext': 'mp4', + 'thumbnail': 'https://open.http.mp.streamamg.com/p/3000914/sp/300091400/thumbnail/entry_id/0_mbkb7ldt//width/960/height/540/type/1/quality/75', + 'duration': 6093.0, + 'tags': ['video'], + 'categories': ['Full Match', 'Harlequins', 'Newcastle Falcons', 'gallaher premiership'], + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + json_data = self._download_json( + f'https://article-cms-api.incrowdsports.com/v2/articles/slug/{display_id}', + display_id, query={'clientId': 'PRL'})['data']['article'] + + formats, subs = self._extract_m3u8_formats_and_subtitles( + json_data['heroMedia']['content']['videoLink'], display_id) + + return { + 'id': json_data['heroMedia']['content']['sourceSystemId'], + 'display_id': display_id, + 'title': traverse_obj(json_data, ('heroMedia', 'title')), + 'formats': formats, + 'subtitles': subs, + 'thumbnail': traverse_obj(json_data, ('heroMedia', 'content', 'videoThumbnail')), + 'duration': int_or_none(traverse_obj(json_data, ('heroMedia', 'content', 'metadata', 'msDuration')), scale=1000), + 'tags': json_data.get('tags'), + 'categories': traverse_obj(json_data, ('categories', ..., 'text')), + } diff --git a/yt_dlp/extractor/puls4.py b/yt_dlp/extractor/puls4.py index 3c13d1f56..38c5d1109 100644 --- a/yt_dlp/extractor/puls4.py +++ b/yt_dlp/extractor/puls4.py @@ -1,9 +1,6 @@ from .prosiebensat1 import ProSiebenSat1BaseIE -from ..utils import ( - unified_strdate, - parse_duration, - compat_str, -) +from ..compat import compat_str +from ..utils import parse_duration, unified_strdate class Puls4IE(ProSiebenSat1BaseIE): diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py index dbb748715..498cc6be9 100644 --- a/yt_dlp/extractor/radiko.py +++ b/yt_dlp/extractor/radiko.py @@ -43,7 +43,7 @@ class RadikoBaseIE(InfoExtractor): }).split(',')[0] auth_data = (auth_token, area_id) - self._downloader.cache.store('radiko', 'auth_data', auth_data) + self.cache.store('radiko', 'auth_data', auth_data) return auth_data def _extract_full_key(self): @@ -150,7 +150,7 @@ class RadikoIE(RadikoBaseIE): vid_int = unified_timestamp(video_id, False) prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int) - auth_cache = self._downloader.cache.load('radiko', 'auth_data') + auth_cache = self.cache.load('radiko', 'auth_data') for attempt in range(2): auth_token, area_id = (not attempt and auth_cache) or self._auth_client() formats = self._extract_formats( diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py index 8fef54dab..7b60b2617 100644 --- a/yt_dlp/extractor/radiofrance.py +++ b/yt_dlp/extractor/radiofrance.py @@ -1,6 +1,7 @@ import re from .common import InfoExtractor +from ..utils import parse_duration, unified_strdate class RadioFranceIE(InfoExtractor): @@ -54,3 +55,51 @@ class RadioFranceIE(InfoExtractor): 'description': description, 'uploader': uploader, } + + +class FranceCultureIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?radiofrance\.fr/(?:franceculture|fip|francemusique|mouv|franceinter)/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d+)($|[?#])' + _TESTS = [ + { + 'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487', + 'info_dict': { + 'id': '8440487', + 'display_id': 'la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau', + 'ext': 'mp3', + 'title': 'La physique d’Einstein aiderait-elle à comprendre le cerveau ?', + 'description': 'Existerait-il un pont conceptuel entre la physique de l’espace-temps et les neurosciences ?', + 'thumbnail': 'https://cdn.radiofrance.fr/s3/cruiser-production/2022/05/d184e7a3-4827-4494-bf94-04ed7b120db4/1200x630_gettyimages-200171095-001.jpg', + 'upload_date': '20220514', + 'duration': 2750, + }, + }, + { + 'url': 'https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507', + 'only_matching': True, + } + ] + + def _real_extract(self, url): + video_id, display_id = self._match_valid_url(url).group('id', 'display_id') + webpage = self._download_webpage(url, display_id) + + # _search_json_ld doesn't correctly handle this. See https://github.com/yt-dlp/yt-dlp/pull/3874#discussion_r891903846 + video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'\s*"@type"\s*:\s*"AudioObject"\s*.+') + + return { + 'id': video_id, + 'display_id': display_id, + 'url': video_data['contentUrl'], + 'ext': video_data.get('encodingFormat'), + 'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None, + 'duration': parse_duration(video_data.get('duration')), + 'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>', + webpage, 'title', default=self._og_search_title(webpage)), + 'description': self._html_search_regex( + r'(?s)<meta name="description"\s*content="([^"]+)', webpage, 'description', default=None), + 'thumbnail': self._og_search_thumbnail(webpage), + 'uploader': self._html_search_regex( + r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None), + 'upload_date': unified_strdate(self._search_regex( + r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False)) + } diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py index dc9897305..d89c9563b 100644 --- a/yt_dlp/extractor/radlive.py +++ b/yt_dlp/extractor/radlive.py @@ -80,7 +80,7 @@ class RadLiveIE(InfoExtractor): 'release_timestamp': release_date, 'channel': channel.get('name'), 'channel_id': channel_id, - 'channel_url': format_field(channel_id, template='https://rad.live/content/channel/%s'), + 'channel_url': format_field(channel_id, None, 'https://rad.live/content/channel/%s'), } if content_type == 'episode': diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py index ad53d697e..119c5ea3c 100644 --- a/yt_dlp/extractor/rokfin.py +++ b/yt_dlp/extractor/rokfin.py @@ -146,7 +146,7 @@ class RokfinIE(InfoExtractor): for page_n in itertools.count(): raw_comments = self._download_json( f'{_API_BASE_URL}comment?postId={video_id[5:]}&page={page_n}&size=50', - video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, template=" of %s")}', + video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, None, " of %s")}', fatal=False) or {} for comment in raw_comments.get('content') or []: @@ -318,7 +318,7 @@ class RokfinChannelIE(RokfinPlaylistBaseIE): data_url = f'{_API_BASE_URL}post/search/{tab}?page={page_n}&size=50&creator={channel_id}' metadata = self._download_json( data_url, channel_name, - note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, template=" of %s")}') + note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, None, " of %s")}') yield from self._get_video_data(metadata) pages_total = int_or_none(metadata.get('totalPages')) or None @@ -360,7 +360,7 @@ class RokfinSearchIE(SearchInfoExtractor): _db_access_key = None def _real_initialize(self): - self._db_url, self._db_access_key = self._downloader.cache.load(self.ie_key(), 'auth', default=(None, None)) + self._db_url, self._db_access_key = self.cache.load(self.ie_key(), 'auth', default=(None, None)) if not self._db_url: self._get_db_access_credentials() @@ -369,7 +369,7 @@ class RokfinSearchIE(SearchInfoExtractor): for page_number in itertools.count(1): search_results = self._run_search_query( query, data={'query': query, 'page': {'size': 100, 'current': page_number}}, - note=f'Downloading page {page_number}{format_field(total_pages, template=" of ~%s")}') + note=f'Downloading page {page_number}{format_field(total_pages, None, " of ~%s")}') total_pages = traverse_obj(search_results, ('meta', 'page', 'total_pages'), expected_type=int_or_none) for result in search_results.get('results') or []: @@ -405,6 +405,6 @@ class RokfinSearchIE(SearchInfoExtractor): self._db_url = url_or_none(f'{auth_data["ENDPOINT_BASE"]}/api/as/v1/engines/rokfin-search/search.json') self._db_access_key = f'Bearer {auth_data["SEARCH_KEY"]}' - self._downloader.cache.store(self.ie_key(), 'auth', (self._db_url, self._db_access_key)) + self.cache.store(self.ie_key(), 'auth', (self._db_url, self._db_access_key)) return raise ExtractorError('Unable to extract access credentials') diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py index 42a602968..798dde7fa 100644 --- a/yt_dlp/extractor/rtve.py +++ b/yt_dlp/extractor/rtve.py @@ -1,14 +1,12 @@ import base64 import io +import struct from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_struct_unpack, -) +from ..compat import compat_b64decode from ..utils import ( - determine_ext, ExtractorError, + determine_ext, float_or_none, qualities, remove_end, @@ -73,7 +71,7 @@ class RTVEALaCartaIE(InfoExtractor): def _decrypt_url(png): encrypted_data = io.BytesIO(compat_b64decode(png)[8:]) while True: - length = compat_struct_unpack('!I', encrypted_data.read(4))[0] + length = struct.unpack('!I', encrypted_data.read(4))[0] chunk_type = encrypted_data.read(4) if chunk_type == b'IEND': break diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py index 50c383d79..924f9829f 100644 --- a/yt_dlp/extractor/rumble.py +++ b/yt_dlp/extractor/rumble.py @@ -24,6 +24,11 @@ class RumbleEmbedIE(InfoExtractor): 'title': 'WMAR 2 News Latest Headlines | October 20, 6pm', 'timestamp': 1571611968, 'upload_date': '20191020', + 'channel_url': 'https://rumble.com/c/WMAR', + 'channel': 'WMAR', + 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.OvCc-small-WMAR-2-News-Latest-Headline.jpg', + 'duration': 234, + 'uploader': 'WMAR', } }, { 'url': 'https://rumble.com/embed/vslb7v', @@ -38,19 +43,21 @@ class RumbleEmbedIE(InfoExtractor): 'channel': 'CTNews', 'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg', 'duration': 901, + 'uploader': 'CTNews', } }, { 'url': 'https://rumble.com/embed/ufe9n.v5pv5f', 'only_matching': True, }] - @staticmethod - def _extract_urls(webpage): - return [ - mobj.group('url') - for mobj in re.finditer( - r'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>%s)' % RumbleEmbedIE._VALID_URL, - webpage)] + @classmethod + def _extract_urls(cls, webpage): + embeds = tuple(re.finditer( + fr'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>{cls._VALID_URL})', webpage)) + if embeds: + return [mobj.group('url') for mobj in embeds] + return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer( + r'<script>\s*Rumble\(\s*"play"\s*,\s*{\s*[\'"]video[\'"]\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)] def _real_extract(self, url): video_id = self._match_id(url) @@ -77,17 +84,26 @@ class RumbleEmbedIE(InfoExtractor): formats.append(f) self._sort_formats(formats) + subtitles = { + lang: [{ + 'url': sub_info['path'], + 'name': sub_info.get('language') or '', + }] for lang, sub_info in (video.get('cc') or {}).items() if sub_info.get('path') + } + author = video.get('author') or {} return { 'id': video_id, 'title': title, 'formats': formats, + 'subtitles': subtitles, 'thumbnail': video.get('i'), 'timestamp': parse_iso8601(video.get('pubDate')), 'channel': author.get('name'), 'channel_url': author.get('url'), 'duration': int_or_none(video.get('duration')), + 'uploader': author.get('name'), } diff --git a/yt_dlp/extractor/screencast.py b/yt_dlp/extractor/screencast.py index e3dbaab69..df5e79bef 100644 --- a/yt_dlp/extractor/screencast.py +++ b/yt_dlp/extractor/screencast.py @@ -1,11 +1,8 @@ +import urllib.request + from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_request, -) -from ..utils import ( - ExtractorError, -) +from ..compat import compat_parse_qs +from ..utils import ExtractorError class ScreencastIE(InfoExtractor): @@ -75,7 +72,7 @@ class ScreencastIE(InfoExtractor): flash_vars_s = flash_vars_s.replace(',', '&') if flash_vars_s: flash_vars = compat_parse_qs(flash_vars_s) - video_url_raw = compat_urllib_request.quote( + video_url_raw = urllib.request.quote( flash_vars['content'][0]) video_url = video_url_raw.replace('http%3A', 'http:') diff --git a/yt_dlp/extractor/shared.py b/yt_dlp/extractor/shared.py index 5bc097b0d..9a237b320 100644 --- a/yt_dlp/extractor/shared.py +++ b/yt_dlp/extractor/shared.py @@ -1,14 +1,13 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_urllib_parse_unquote_plus, -) +from ..compat import compat_b64decode from ..utils import ( - determine_ext, + KNOWN_EXTENSIONS, ExtractorError, + determine_ext, int_or_none, js_to_json, - KNOWN_EXTENSIONS, parse_filesize, rot47, url_or_none, @@ -130,7 +129,7 @@ class VivoIE(SharedBaseIE): return stream_url def decode_url(encoded_url): - return rot47(compat_urllib_parse_unquote_plus(encoded_url)) + return rot47(urllib.parse.unquote_plus(encoded_url)) return decode_url(self._parse_json( self._search_regex( diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 6dfa50c60..9e4c8cf25 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -67,7 +67,7 @@ class SoundcloudBaseIE(InfoExtractor): _HEADERS = {} def _store_client_id(self, client_id): - self._downloader.cache.store('soundcloud', 'client_id', client_id) + self.cache.store('soundcloud', 'client_id', client_id) def _update_client_id(self): webpage = self._download_webpage('https://soundcloud.com/', None) @@ -104,7 +104,7 @@ class SoundcloudBaseIE(InfoExtractor): raise def _initialize_pre_login(self): - self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf' + self._CLIENT_ID = self.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf' def _perform_login(self, username, password): if username != 'oauth': diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index 855f1d6d3..7381ac362 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -109,6 +109,49 @@ class SouthParkDeIE(SouthParkIE): return +class SouthParkLatIE(SouthParkIE): + IE_NAME = 'southpark.lat' + _VALID_URL = r'https?://(?:www\.)?southpark\.lat/(?:en/)?(?:video-?clips?|collections|episod(?:e|io)s)/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.southpark.lat/en/video-clips/ct46op/south-park-tooth-fairy-cartman', + 'only_matching': True, + }, { + 'url': 'https://www.southpark.lat/episodios/9h0qbg/south-park-orgia-gatuna-temporada-3-ep-7', + 'only_matching': True, + }, { + 'url': 'https://www.southpark.lat/en/collections/29ve08/south-park-heating-up/lydbrc', + 'only_matching': True, + }, { + # clip + 'url': 'https://www.southpark.lat/en/video-clips/ct46op/south-park-tooth-fairy-cartman', + 'info_dict': { + 'id': 'e99d45ea-ed00-11e0-aca6-0026b9414f30', + 'ext': 'mp4', + 'title': 'Tooth Fairy Cartman', + 'description': 'md5:db02e23818b4dc9cb5f0c5a7e8833a68', + }, + }, { + # episode + 'url': 'https://www.southpark.lat/episodios/9h0qbg/south-park-orgia-gatuna-temporada-3-ep-7', + 'info_dict': { + 'id': 'f5fbd823-04bc-11eb-9b1b-0e40cf2fc285', + 'ext': 'mp4', + 'title': 'South Park', + 'description': 'md5:ae0d875eff169dcbed16b21531857ac1', + }, + }] + + def _get_feed_url(self, uri, url=None): + video_id = self._id_from_uri(uri) + config = self._download_json( + f'http://media.mtvnservices.com/pmt/e1/access/index.html?uri={uri}&configtype=edge&ref={url}', + video_id) + return self._remove_template_parameter(config['feedWithQueryParams']) + + def _get_feed_query(self, uri): + return + + class SouthParkNlIE(SouthParkIE): IE_NAME = 'southpark.nl' _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))' diff --git a/yt_dlp/extractor/spotify.py b/yt_dlp/extractor/spotify.py index a2068a1b6..fef8d8dd2 100644 --- a/yt_dlp/extractor/spotify.py +++ b/yt_dlp/extractor/spotify.py @@ -1,12 +1,15 @@ +import functools import json import re from .common import InfoExtractor from ..utils import ( + OnDemandPagedList, clean_podcast_url, float_or_none, int_or_none, strip_or_none, + traverse_obj, try_get, unified_strdate, ) @@ -25,7 +28,7 @@ class SpotifyBaseIE(InfoExtractor): self._ACCESS_TOKEN = self._download_json( 'https://open.spotify.com/get_access_token', None)['accessToken'] - def _call_api(self, operation, video_id, variables): + def _call_api(self, operation, video_id, variables, **kwargs): return self._download_json( 'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={ 'operationName': 'query' + operation, @@ -35,7 +38,8 @@ class SpotifyBaseIE(InfoExtractor): 'sha256Hash': self._OPERATION_HASHES[operation], }, }) - }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data'] + }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN}, + **kwargs)['data'] def _extract_episode(self, episode, series): episode_id = episode['id'] @@ -143,22 +147,25 @@ class SpotifyShowIE(SpotifyBaseIE): }, 'playlist_mincount': 36, } + _PER_PAGE = 100 + + def _fetch_page(self, show_id, page=0): + return self._call_api('ShowEpisodes', show_id, { + 'limit': 100, + 'offset': page * self._PER_PAGE, + 'uri': f'spotify:show:{show_id}', + }, note=f'Downloading page {page + 1} JSON metadata')['podcast'] def _real_extract(self, url): show_id = self._match_id(url) - podcast = self._call_api('ShowEpisodes', show_id, { - 'limit': 1000000000, - 'offset': 0, - 'uri': 'spotify:show:' + show_id, - })['podcast'] - podcast_name = podcast.get('name') - - entries = [] - for item in (try_get(podcast, lambda x: x['episodes']['items']) or []): - episode = item.get('episode') - if not episode: - continue - entries.append(self._extract_episode(episode, podcast_name)) + first_page = self._fetch_page(show_id) + + def _entries(page): + podcast = self._fetch_page(show_id, page) if page else first_page + yield from map( + functools.partial(self._extract_episode, series=podcast.get('name')), + traverse_obj(podcast, ('episodes', 'items', ..., 'episode'))) return self.playlist_result( - entries, show_id, podcast_name, podcast.get('description')) + OnDemandPagedList(_entries, self._PER_PAGE), + show_id, first_page.get('name'), first_page.get('description')) diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py index 716190220..035747c31 100644 --- a/yt_dlp/extractor/storyfire.py +++ b/yt_dlp/extractor/storyfire.py @@ -44,7 +44,7 @@ class StoryFireBaseIE(InfoExtractor): 'timestamp': int_or_none(video.get('publishDate')), 'uploader': video.get('username'), 'uploader_id': uploader_id, - 'uploader_url': format_field(uploader_id, template='https://storyfire.com/user/%s/video'), + 'uploader_url': format_field(uploader_id, None, 'https://storyfire.com/user/%s/video'), 'episode_number': int_or_none(video.get('episodeNumber') or video.get('episode_number')), } diff --git a/yt_dlp/extractor/streamcz.py b/yt_dlp/extractor/streamcz.py index 85fc3a3c3..849a9882d 100644 --- a/yt_dlp/extractor/streamcz.py +++ b/yt_dlp/extractor/streamcz.py @@ -52,8 +52,8 @@ class StreamCZIE(InfoExtractor): def _extract_formats(self, spl_url, video): for ext, pref, streams in ( - ('ts', -1, traverse_obj(video, ('http_stream', 'qualities'))), - ('mp4', 1, video.get('mp4'))): + ('ts', -1, traverse_obj(video, ('http_stream', 'qualities')) or {}), + ('mp4', 1, video.get('mp4') or {})): for format_id, stream in streams.items(): if not stream.get('url'): continue diff --git a/yt_dlp/extractor/stv.py b/yt_dlp/extractor/stv.py index 618dc4329..c879fb52e 100644 --- a/yt_dlp/extractor/stv.py +++ b/yt_dlp/extractor/stv.py @@ -1,6 +1,6 @@ from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( - compat_str, float_or_none, int_or_none, smuggle_url, diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py new file mode 100644 index 000000000..70cf10515 --- /dev/null +++ b/yt_dlp/extractor/substack.py @@ -0,0 +1,100 @@ +import re +import urllib.parse + +from .common import InfoExtractor +from ..utils import str_or_none, traverse_obj + + +class SubstackIE(InfoExtractor): + _VALID_URL = r'https?://(?P<username>[\w-]+)\.substack\.com/p/(?P<id>[\w-]+)' + _TESTS = [{ + 'url': 'https://haleynahman.substack.com/p/i-made-a-vlog?s=r', + 'md5': 'f27e4fc6252001d48d479f45e65cdfd5', + 'info_dict': { + 'id': '47660949', + 'ext': 'mp4', + 'title': 'I MADE A VLOG', + 'description': 'md5:10c01ff93439a62e70ce963b2aa0b7f6', + 'thumbnail': 'md5:bec758a34d8ee9142d43bcebdf33af18', + 'uploader': 'Maybe Baby', + 'uploader_id': '33628', + } + }, { + 'url': 'https://haleynahman.substack.com/p/-dear-danny-i-found-my-boyfriends?s=r', + 'md5': '0a63eacec877a1171a62cfa69710fcea', + 'info_dict': { + 'id': '51045592', + 'ext': 'mpga', + 'title': "🎧 Dear Danny: I found my boyfriend's secret Twitter account", + 'description': 'md5:a57f2439319e56e0af92dd0c95d75797', + 'thumbnail': 'md5:daa40b6b79249417c14ff8103db29639', + 'uploader': 'Maybe Baby', + 'uploader_id': '33628', + } + }, { + 'url': 'https://andrewzimmern.substack.com/p/mussels-with-black-bean-sauce-recipe', + 'md5': 'fd3c07077b02444ff0130715b5f632bb', + 'info_dict': { + 'id': '47368578', + 'ext': 'mp4', + 'title': 'Mussels with Black Bean Sauce: Recipe of the Week #7', + 'description': 'md5:b96234a2906c7d854d5229818d889515', + 'thumbnail': 'md5:e30bfaa9da40e82aa62354263a9dd232', + 'uploader': "Andrew Zimmern's Spilled Milk ", + 'uploader_id': '577659', + } + }] + + @classmethod + def _extract_url(cls, webpage, url): + if not re.search(r'<script[^>]+src=["\']https://substackcdn.com/[^"\']+\.js', webpage): + return + + mobj = re.search(r'{[^}]*["\']subdomain["\']\s*:\s*["\'](?P<subdomain>[^"]+)', webpage) + if mobj: + parsed = urllib.parse.urlparse(url) + return parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl() + + def _extract_video_formats(self, video_id, username): + formats, subtitles = [], {} + for video_format in ('hls', 'mp4'): + video_url = f'https://{username}.substack.com/api/v1/video/upload/{video_id}/src?type={video_format}' + + if video_format == 'hls': + fmts, subs = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + formats.append({ + 'url': video_url, + 'ext': video_format, + }) + + return formats, subtitles + + def _real_extract(self, url): + display_id, username = self._match_valid_url(url).group('id', 'username') + webpage = self._download_webpage(url, display_id) + + webpage_info = self._search_json(r'<script[^>]*>\s*window\._preloads\s*=', webpage, 'preloads', display_id) + + post_type = webpage_info['post']['type'] + formats, subtitles = [], {} + if post_type == 'podcast': + formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {} + elif post_type == 'video': + formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], username) + else: + self.raise_no_formats(f'Page type "{post_type}" is not supported') + + self._sort_formats(formats) + return { + 'id': str(webpage_info['post']['id']), + 'formats': formats, + 'subtitles': subtitles, + 'title': traverse_obj(webpage_info, ('post', 'title')), + 'description': traverse_obj(webpage_info, ('post', 'description')), + 'thumbnail': traverse_obj(webpage_info, ('post', 'cover_image')), + 'uploader': traverse_obj(webpage_info, ('pub', 'name')), + 'uploader_id': str_or_none(traverse_obj(webpage_info, ('post', 'publication_id'))), + } diff --git a/yt_dlp/extractor/tennistv.py b/yt_dlp/extractor/tennistv.py index 80acaf190..3bd7ce3c4 100644 --- a/yt_dlp/extractor/tennistv.py +++ b/yt_dlp/extractor/tennistv.py @@ -1,16 +1,17 @@ -import json +import urllib.parse from .common import InfoExtractor - from ..utils import ( ExtractorError, + random_uuidv4, unified_timestamp, + urlencode_postdata, ) class TennisTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tennistv\.com/videos/(?P<id>[-a-z0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'https://www.tennistv.com/videos/indian-wells-2018-verdasco-fritz', 'info_dict': { 'id': 'indian-wells-2018-verdasco-fritz', @@ -25,86 +26,132 @@ class TennisTVIE(InfoExtractor): 'skip_download': True, }, 'skip': 'Requires email and password of a subscribed account', - } + }, { + 'url': 'https://www.tennistv.com/videos/2650480/best-matches-of-2022-part-5', + 'info_dict': { + 'id': '2650480', + 'ext': 'mp4', + 'title': 'Best Matches of 2022 - Part 5', + 'description': 'md5:36dec3bfae7ed74bd79e48045b17264c', + 'thumbnail': 'https://open.http.mp.streamamg.com/p/3001482/sp/300148200/thumbnail/entry_id/0_myef18pd/version/100001/height/1920', + }, + 'params': {'skip_download': 'm3u8'}, + 'skip': 'Requires email and password of a subscribed account', + }] _NETRC_MACHINE = 'tennistv' - _session_token = None - - def _perform_login(self, username, password): - login_form = { - 'Email': username, - 'Password': password, - } - login_json = json.dumps(login_form).encode('utf-8') - headers = { - 'content-type': 'application/json', - 'Referer': 'https://www.tennistv.com/login', - 'Origin': 'https://www.tennistv.com', - } - - login_result = self._download_json( - 'https://www.tennistv.com/api/users/v1/login', None, - note='Logging in', - errnote='Login failed (wrong password?)', - headers=headers, - data=login_json) + access_token, refresh_token = None, None + _PARTNER_ID = 3001482 + _FORMAT_URL = 'https://open.http.mp.streamamg.com/p/{partner}/sp/{partner}00/playManifest/entryId/{entry}/format/applehttp/protocol/https/a.m3u8?ks={session}' + _AUTH_BASE_URL = 'https://sso.tennistv.com/auth/realms/TennisTV/protocol/openid-connect' + _HEADERS = { + 'origin': 'https://www.tennistv.com', + 'referer': 'https://www.tennistv.com/', + 'content-Type': 'application/x-www-form-urlencoded' + } - if login_result['error']['errorCode']: - raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, login_result['error']['errorMessage'])) + def _perform_login(self, username, password): + login_page = self._download_webpage( + f'{self._AUTH_BASE_URL}/auth', None, 'Downloading login page', + query={ + 'client_id': 'tennis-tv-web', + 'redirect_uri': 'https://tennistv.com', + 'response_mode': 'fragment', + 'response_type': 'code', + 'scope': 'openid' + }) + + post_url = self._html_search_regex(r'action=["\']([^"\']+?)["\']\s+method=["\']post["\']', login_page, 'login POST url') + temp_page = self._download_webpage( + post_url, None, 'Sending login data', 'Unable to send login data', + headers=self._HEADERS, data=urlencode_postdata({ + 'username': username, + 'password': password, + 'submitAction': 'Log In' + })) + if 'Your username or password was incorrect' in temp_page: + raise ExtractorError('Your username or password was incorrect', expected=True) + + handle = self._request_webpage( + f'{self._AUTH_BASE_URL}/auth', None, 'Logging in', headers=self._HEADERS, + query={ + 'client_id': 'tennis-tv-web', + 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html', + 'state': random_uuidv4(), + 'response_mode': 'fragment', + 'response_type': 'code', + 'scope': 'openid', + 'nonce': random_uuidv4(), + 'prompt': 'none' + }) + + self.get_token(None, { + 'code': urllib.parse.parse_qs(handle.geturl())['code'][-1], + 'grant_type': 'authorization_code', + 'client_id': 'tennis-tv-web', + 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html' + }) + + def get_token(self, video_id, payload): + res = self._download_json( + f'{self._AUTH_BASE_URL}/token', video_id, 'Fetching tokens', + 'Unable to fetch tokens', headers=self._HEADERS, data=urlencode_postdata(payload)) + + self.access_token = res.get('access_token') or self.access_token + self.refresh_token = res.get('refresh_token') or self.refresh_token - if login_result['entitlement'] != 'SUBSCRIBED': - self.report_warning('%s may not be subscribed to %s.' % (username, self.IE_NAME)) + def _real_initialize(self): + if self.access_token and self.refresh_token: + return - self._session_token = login_result['sessionToken'] + cookies = self._get_cookies('https://www.tennistv.com/') + if not cookies.get('access_token') or not cookies.get('refresh_token'): + self.raise_login_required() + self.access_token, self.refresh_token = cookies['access_token'].value, cookies['refresh_token'].value - def _real_initialize(self): - if not self._session_token: - raise self.raise_login_required('Login info is needed for this website', method='password') + def _download_session_json(self, video_id, entryid,): + return self._download_json( + f'https://atppayments.streamamg.com/api/v1/session/ksession/?lang=en&apijwttoken={self.access_token}&entryId={entryid}', + video_id, 'Downloading ksession token', 'Failed to download ksession token', headers=self._HEADERS) def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - internal_id = self._search_regex(r'video=([\w-]+)', webpage, 'internal video id') + entryid = self._search_regex(r'data-entry-id=["\']([^"\']+)', webpage, 'entryID') + session_json = self._download_session_json(video_id, entryid) - headers = { - 'Origin': 'https://www.tennistv.com', - 'authorization': 'ATP %s' % self._session_token, - 'content-type': 'application/json', - 'Referer': url, - } - check_data = { - 'videoID': internal_id, - 'VideoUrlType': 'HLS', - } - check_json = json.dumps(check_data).encode('utf-8') - check_result = self._download_json( - 'https://www.tennistv.com/api/users/v1/entitlementchecknondiva', - video_id, note='Checking video authorization', headers=headers, data=check_json) - formats = self._extract_m3u8_formats(check_result['contentUrl'], video_id, ext='mp4') - self._sort_formats(formats) + k_session = session_json.get('KSession') + if k_session is None: + self.get_token(video_id, { + 'grant_type': 'refresh_token', + 'refresh_token': self.refresh_token, + 'client_id': 'tennis-tv-web' + }) + k_session = self._download_session_json(video_id, entryid).get('KSession') + if k_session is None: + raise ExtractorError('Failed to get KSession, possibly a premium video', expected=True) - vdata = self._download_json( - 'https://www.tennistv.com/api/en/v2/none/common/video/%s' % video_id, - video_id, headers=headers) + if session_json.get('ErrorMessage'): + self.report_warning(session_json['ErrorMessage']) - timestamp = unified_timestamp(vdata['timestamp']) - thumbnail = vdata['video']['thumbnailUrl'] - description = vdata['displayText']['description'] - title = vdata['video']['title'] + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + self._FORMAT_URL.format(partner=self._PARTNER_ID, entry=entryid, session=k_session), video_id) - series = vdata['tour'] - venue = vdata['displayText']['venue'] - round_str = vdata['seo']['round'] + self._sort_formats(formats) return { 'id': video_id, - 'title': title, - 'description': description, + 'title': self._html_extract_title(webpage) or self._og_search_title(webpage), + 'description': self._html_search_regex( + (r'<span itemprop="description" content=["\']([^"\']+)["\']>', *self._og_regexes('description')), + webpage, 'description', fatal=False), + 'thumbnail': f'https://open.http.mp.streamamg.com/p/{self._PARTNER_ID}/sp/{self._PARTNER_ID}00/thumbnail/entry_id/{entryid}/version/100001/height/1920', + 'timestamp': unified_timestamp(self._html_search_regex( + r'<span itemprop="description" content=["\']([^"\']+)["\']>', webpage, 'upload time')), + 'series': self._html_search_regex(r'data-series\s*?=\s*?"(.*?)"', webpage, 'series', fatal=False) or None, + 'season': self._html_search_regex(r'data-tournament-city\s*?=\s*?"(.*?)"', webpage, 'season', fatal=False) or None, + 'episode': self._html_search_regex(r'data-round\s*?=\s*?"(.*?)"', webpage, 'round', fatal=False) or None, 'formats': formats, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'series': series, - 'season': venue, - 'episode': round_str, + 'subtitles': subtitles, } diff --git a/yt_dlp/extractor/testurl.py b/yt_dlp/extractor/testurl.py index 32cae429e..d205fe053 100644 --- a/yt_dlp/extractor/testurl.py +++ b/yt_dlp/extractor/testurl.py @@ -11,7 +11,7 @@ class TestURLIE(InfoExtractor): _VALID_URL = r'test(?:url)?:(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?$' def _real_extract(self, url): - from ..extractor import gen_extractor_classes + from . import gen_extractor_classes extractor_id, num = self._match_valid_url(url).group('extractor', 'num') diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 4ba993582..680358d5e 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -1,28 +1,27 @@ import itertools +import json import random +import re import string import time -import json from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_urllib_parse_urlparse -) +from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse from ..utils import ( ExtractorError, HEADRequest, + LazyList, UnsupportedError, + get_element_by_id, get_first, int_or_none, join_nonempty, - LazyList, + qualities, srt_subtitles_timecode, str_or_none, traverse_obj, try_get, url_or_none, - qualities, ) @@ -35,6 +34,21 @@ class TikTokBaseIE(InfoExtractor): _UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s' _WEBPAGE_HOST = 'https://www.tiktok.com/' QUALITIES = ('360p', '540p', '720p', '1080p') + _session_initialized = False + + @staticmethod + def _create_url(user_id, video_id): + return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}' + + def _get_sigi_state(self, webpage, display_id): + return self._parse_json(get_element_by_id( + 'SIGI_STATE|sigi-persisted-data', webpage, escape_value=False), display_id) + + def _real_initialize(self): + if self._session_initialized: + return + self._request_webpage(HEADRequest('https://www.tiktok.com'), None, note='Setting up session', fatal=False) + TikTokBaseIE._session_initialized = True def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True, note='Downloading API JSON', errnote='Unable to download API page'): @@ -261,6 +275,9 @@ class TikTokBaseIE(InfoExtractor): return { 'id': aweme_id, + 'extractor_key': TikTokIE.ie_key(), + 'extractor': TikTokIE.IE_NAME, + 'webpage_url': self._create_url(author_info.get('uid'), aweme_id), 'title': aweme_detail.get('desc'), 'description': aweme_detail.get('desc'), 'view_count': int_or_none(stats_info.get('play_count')), @@ -361,7 +378,7 @@ class TikTokBaseIE(InfoExtractor): class TikTokIE(TikTokBaseIE): - _VALID_URL = r'https?://www\.tiktok\.com/@[\w\.-]+/video/(?P<id>\d+)' + _VALID_URL = r'https?://www\.tiktok\.com/(?:embed|@(?P<user_id>[\w\.-]+)/video)/(?P<id>\d+)' _TESTS = [{ 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610', @@ -459,14 +476,14 @@ class TikTokIE(TikTokBaseIE): 'repost_count': int, 'comment_count': int, }, - 'expected_warnings': ['Video not available'] + 'expected_warnings': ['trying with webpage', 'Unable to find video in feed'] }, { # Video without title and description 'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694', 'info_dict': { 'id': '7059698374567611694', 'ext': 'mp4', - 'title': 'tiktok video #7059698374567611694', + 'title': 'TikTok video #7059698374567611694', 'description': '', 'uploader': 'pokemonlife22', 'creator': 'Pokemon', @@ -483,13 +500,40 @@ class TikTokIE(TikTokBaseIE): 'repost_count': int, 'comment_count': int, }, - 'expected_warnings': ['Video not available', 'Creating a generic title'] + }, { + # hydration JSON is sent in a <script> element + 'url': 'https://www.tiktok.com/@denidil6/video/7065799023130643713', + 'info_dict': { + 'id': '7065799023130643713', + 'ext': 'mp4', + 'title': '#denidil#денидил', + 'description': '#denidil#денидил', + 'uploader': 'denidil6', + 'uploader_id': '7046664115636405250', + 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAsvMSzFdQ4ikl3uR2TEJwMBbB2yZh2Zxwhx-WCo3rbDpAharE3GQCrFuJArI3C8QJ', + 'artist': 'Holocron Music', + 'album': 'Wolf Sounds (1 Hour) Enjoy the Company of the Animal That Is the Majestic King of the Night', + 'track': 'Wolf Sounds (1 Hour) Enjoy the Company of the Animal That Is the Majestic King of the Night', + 'timestamp': 1645134536, + 'duration': 26, + 'upload_date': '20220217', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + }, + 'expected_warnings': ['trying feed workaround', 'Unable to find video in feed'] }, { # Auto-captions available 'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758', 'only_matching': True }] + @classmethod + def _extract_urls(cls, webpage): + return [mobj.group('url') for mobj in re.finditer( + rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{cls._VALID_URL})', webpage)] + def _extract_aweme_app(self, aweme_id): try: aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id, @@ -497,7 +541,7 @@ class TikTokIE(TikTokBaseIE): if not aweme_detail: raise ExtractorError('Video not available', video_id=aweme_id) except ExtractorError as e: - self.report_warning(f'{e}; Retrying with feed workaround') + self.report_warning(f'{e.orig_msg}; trying feed workaround') feed_list = self._call_api('feed', {'aweme_id': aweme_id}, aweme_id, note='Downloading video feed', errnote='Unable to download video feed').get('aweme_list') or [] aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None) @@ -506,26 +550,20 @@ class TikTokIE(TikTokBaseIE): return self._parse_aweme_video_app(aweme_detail) def _real_extract(self, url): - video_id = self._match_id(url) - + video_id, user_id = self._match_valid_url(url).group('id', 'user_id') try: return self._extract_aweme_app(video_id) except ExtractorError as e: - self.report_warning(f'{e}; Retrying with webpage') + self.report_warning(f'{e}; trying with webpage') - # If we only call once, we get a 403 when downlaoding the video. - self._download_webpage(url, video_id) - webpage = self._download_webpage(url, video_id, note='Downloading video webpage') + url = self._create_url(user_id, video_id) + webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'User-Agent:Mozilla/5.0'}) next_data = self._search_nextjs_data(webpage, video_id, default='{}') - if next_data: status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode'), expected_type=int) or 0 video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct'), expected_type=dict) else: - sigi_json = self._search_regex( - r'>\s*window\[[\'"]SIGI_STATE[\'"]\]\s*=\s*(?P<sigi_state>{.+});', - webpage, 'sigi data', group='sigi_state') - sigi_data = self._parse_json(sigi_json, video_id) + sigi_data = self._get_sigi_state(webpage, video_id) status = traverse_obj(sigi_data, ('VideoPage', 'statusCode'), expected_type=int) or 0 video_data = traverse_obj(sigi_data, ('ItemModule', video_id), expected_type=dict) @@ -841,7 +879,7 @@ class DouyinIE(TikTokIE): try: return self._extract_aweme_app(video_id) except ExtractorError as e: - self.report_warning(f'{e}; Retrying with webpage') + self.report_warning(f'{e}; trying with webpage') webpage = self._download_webpage(url, video_id) render_data_json = self._search_regex( diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index c049025a3..d43411928 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -38,7 +38,7 @@ class TrovoBaseIE(InfoExtractor): return { 'uploader': streamer_info.get('nickName'), 'uploader_id': str_or_none(streamer_info.get('uid')), - 'uploader_url': format_field(username, template='https://trovo.live/%s'), + 'uploader_url': format_field(username, None, 'https://trovo.live/%s'), } diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py index b04575bd5..cebd027c8 100644 --- a/yt_dlp/extractor/tver.py +++ b/yt_dlp/extractor/tver.py @@ -54,9 +54,24 @@ class TVerIE(InfoExtractor): video_id = self._match_id(self._search_regex( (r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'), webpage, 'url regex')) + + episode_info = self._download_json( + f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]', + video_id, fatal=False, + query={ + 'platform_uid': self._PLATFORM_UID, + 'platform_token': self._PLATFORM_TOKEN, + }, headers={ + 'x-tver-platform-type': 'web' + }) + episode_content = traverse_obj( + episode_info, ('result', 'episode', 'content')) or {} + video_info = self._download_json( f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, - query={'v': '5'}, headers={ + query={ + 'v': str_or_none(episode_content.get('version')) or '5', + }, headers={ 'Origin': 'https://tver.jp', 'Referer': 'https://tver.jp/', }) @@ -67,25 +82,13 @@ class TVerIE(InfoExtractor): if not r_id.isdigit(): r_id = f'ref:{r_id}' - additional_info = self._download_json( - f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]', - video_id, fatal=False, - query={ - 'platform_uid': self._PLATFORM_UID, - 'platform_token': self._PLATFORM_TOKEN, - }, headers={ - 'x-tver-platform-type': 'web' - }) - - additional_content_info = traverse_obj( - additional_info, ('result', 'episode', 'content'), get_all=False) or {} - episode = strip_or_none(additional_content_info.get('title')) - series = str_or_none(additional_content_info.get('seriesTitle')) + episode = strip_or_none(episode_content.get('title')) + series = str_or_none(episode_content.get('seriesTitle')) title = ( join_nonempty(series, episode, delim=' ') or str_or_none(video_info.get('title'))) - provider = str_or_none(additional_content_info.get('productionProviderName')) - onair_label = str_or_none(additional_content_info.get('broadcastDateLabel')) + provider = str_or_none(episode_content.get('productionProviderName')) + onair_label = str_or_none(episode_content.get('broadcastDateLabel')) return { '_type': 'url_transparent', diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index af6750333..d516aafa2 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -470,7 +470,7 @@ class TwitterIE(TwitterBaseIE): 'uploader': uploader, 'timestamp': unified_timestamp(status.get('created_at')), 'uploader_id': uploader_id, - 'uploader_url': format_field(uploader_id, template='https://twitter.com/%s'), + 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'), 'like_count': int_or_none(status.get('favorite_count')), 'repost_count': int_or_none(status.get('retweet_count')), 'comment_count': int_or_none(status.get('reply_count')), diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py index d35cd0d43..1dc2dbdc4 100644 --- a/yt_dlp/extractor/udemy.py +++ b/yt_dlp/extractor/udemy.py @@ -1,16 +1,12 @@ import re +import urllib.request from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, - compat_urllib_request, - compat_urlparse, -) +from ..compat import compat_HTTPError, compat_str, compat_urlparse from ..utils import ( + ExtractorError, determine_ext, extract_attributes, - ExtractorError, float_or_none, int_or_none, js_to_json, @@ -148,14 +144,14 @@ class UdemyIE(InfoExtractor): 'X-Udemy-Snail-Case': 'true', 'X-Requested-With': 'XMLHttpRequest', } - for cookie in self._downloader.cookiejar: + for cookie in self.cookiejar: if cookie.name == 'client_id': headers['X-Udemy-Client-Id'] = cookie.value elif cookie.name == 'access_token': headers['X-Udemy-Bearer-Token'] = cookie.value headers['X-Udemy-Authorization'] = 'Bearer %s' % cookie.value - if isinstance(url_or_request, compat_urllib_request.Request): + if isinstance(url_or_request, urllib.request.Request): for header, value in headers.items(): url_or_request.add_header(header, value) else: diff --git a/yt_dlp/extractor/urort.py b/yt_dlp/extractor/urort.py index 296799d38..3f687f737 100644 --- a/yt_dlp/extractor/urort.py +++ b/yt_dlp/extractor/urort.py @@ -1,10 +1,7 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse, -) -from ..utils import ( - unified_strdate, -) +from ..utils import unified_strdate class UrortIE(InfoExtractor): @@ -31,7 +28,7 @@ class UrortIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) - fstr = compat_urllib_parse.quote("InternalBandUrl eq '%s'" % playlist_id) + fstr = urllib.parse.quote("InternalBandUrl eq '%s'" % playlist_id) json_url = 'http://urort.p3.no/breeze/urort/TrackDTOViews?$filter=%s&$orderby=Released%%20desc&$expand=Tags%%2CFiles' % fstr songs = self._download_json(json_url, playlist_id) entries = [] diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py index bc0187511..825089f47 100644 --- a/yt_dlp/extractor/vevo.py +++ b/yt_dlp/extractor/vevo.py @@ -33,10 +33,124 @@ class VevoIE(VevoBaseIE): https?://cache\.vevo\.com/m/html/embed\.html\?video=| https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| https?://embed\.vevo\.com/.*?[?&]isrc=| + https?://tv\.vevo\.com/watch/artist/(?:[^/]+)/| vevo:) (?P<id>[^&?#]+)''' - _TESTS = [] + _TESTS = [{ + 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', + 'md5': '95ee28ee45e70130e3ab02b0f579ae23', + 'info_dict': { + 'id': 'GB1101300280', + 'ext': 'mp4', + 'title': 'Hurts - Somebody to Die For', + 'timestamp': 1372057200, + 'upload_date': '20130624', + 'uploader': 'Hurts', + 'track': 'Somebody to Die For', + 'artist': 'Hurts', + 'genre': 'Pop', + }, + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], + }, { + 'note': 'v3 SMIL format', + 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', + 'md5': 'f6ab09b034f8c22969020b042e5ac7fc', + 'info_dict': { + 'id': 'USUV71302923', + 'ext': 'mp4', + 'title': 'Cassadee Pope - I Wish I Could Break Your Heart', + 'timestamp': 1392796919, + 'upload_date': '20140219', + 'uploader': 'Cassadee Pope', + 'track': 'I Wish I Could Break Your Heart', + 'artist': 'Cassadee Pope', + 'genre': 'Country', + }, + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], + }, { + 'note': 'Age-limited video', + 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', + 'info_dict': { + 'id': 'USRV81300282', + 'ext': 'mp4', + 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', + 'age_limit': 18, + 'timestamp': 1372888800, + 'upload_date': '20130703', + 'uploader': 'Justin Timberlake', + 'track': 'Tunnel Vision (Explicit)', + 'artist': 'Justin Timberlake', + 'genre': 'Pop', + }, + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], + }, { + 'note': 'No video_info', + 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', + 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0', + 'info_dict': { + 'id': 'USUV71503000', + 'ext': 'mp4', + 'title': 'K Camp ft. T.I. - Till I Die', + 'age_limit': 18, + 'timestamp': 1449468000, + 'upload_date': '20151207', + 'uploader': 'K Camp', + 'track': 'Till I Die', + 'artist': 'K Camp', + 'genre': 'Hip-Hop', + }, + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], + }, { + 'note': 'Featured test', + 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190', + 'md5': 'd28675e5e8805035d949dc5cf161071d', + 'info_dict': { + 'id': 'USUV71402190', + 'ext': 'mp4', + 'title': 'Lemaitre ft. LoLo - Wait', + 'age_limit': 0, + 'timestamp': 1413432000, + 'upload_date': '20141016', + 'uploader': 'Lemaitre', + 'track': 'Wait', + 'artist': 'Lemaitre', + 'genre': 'Electronic', + }, + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], + }, { + 'note': 'Only available via webpage', + 'url': 'http://www.vevo.com/watch/GBUV71600656', + 'md5': '67e79210613865b66a47c33baa5e37fe', + 'info_dict': { + 'id': 'GBUV71600656', + 'ext': 'mp4', + 'title': 'ABC - Viva Love', + 'age_limit': 0, + 'timestamp': 1461830400, + 'upload_date': '20160428', + 'uploader': 'ABC', + 'track': 'Viva Love', + 'artist': 'ABC', + 'genre': 'Pop', + }, + 'expected_warnings': ['Failed to download video versions info'], + }, { + # no genres available + 'url': 'http://www.vevo.com/watch/INS171400764', + 'only_matching': True, + }, { + # Another case available only via the webpage; using streams/streamsV3 formats + # Geo-restricted to Netherlands/Germany + 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909', + 'only_matching': True, + }, { + 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=', + 'only_matching': True, + }, { + 'url': 'https://tv.vevo.com/watch/artist/janet-jackson/US0450100550', + 'only_matching': True, + }] _VERSIONS = { 0: 'youtube', # only in AuthenticateVideo videoVersions 1: 'level3', @@ -138,6 +252,7 @@ class VevoIE(VevoBaseIE): fatal=False)) else: m = re.search(r'''(?xi) + _(?P<quality>[a-z0-9]+) _(?P<width>[0-9]+)x(?P<height>[0-9]+) _(?P<vcodec>[a-z0-9]+) _(?P<vbr>[0-9]+) @@ -149,7 +264,7 @@ class VevoIE(VevoBaseIE): formats.append({ 'url': version_url, - 'format_id': 'http-%s-%s' % (version, video_version['quality']), + 'format_id': f'http-{version}-{video_version.get("quality") or m.group("quality")}', 'vcodec': m.group('vcodec'), 'acodec': m.group('acodec'), 'vbr': int(m.group('vbr')), diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py index 251eb78fe..9b05c86a5 100644 --- a/yt_dlp/extractor/videa.py +++ b/yt_dlp/extractor/videa.py @@ -1,8 +1,10 @@ import random import re import string +import struct from .common import InfoExtractor +from ..compat import compat_b64decode, compat_ord from ..utils import ( ExtractorError, int_or_none, @@ -14,11 +16,6 @@ from ..utils import ( xpath_element, xpath_text, ) -from ..compat import ( - compat_b64decode, - compat_ord, - compat_struct_pack, -) class VideaIE(InfoExtractor): @@ -102,7 +99,7 @@ class VideaIE(InfoExtractor): j = (j + S[i]) % 256 S[i], S[j] = S[j], S[i] k = S[(S[i] + S[j]) % 256] - res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m])) + res += struct.pack('B', k ^ compat_ord(cipher_text[m])) return res.decode() diff --git a/yt_dlp/extractor/videocampus_sachsen.py b/yt_dlp/extractor/videocampus_sachsen.py index 906412f08..679574bd7 100644 --- a/yt_dlp/extractor/videocampus_sachsen.py +++ b/yt_dlp/extractor/videocampus_sachsen.py @@ -6,14 +6,18 @@ from ..utils import ExtractorError class VideocampusSachsenIE(InfoExtractor): - IE_NAME = 'Vimp' + IE_NAME = 'ViMP' _INSTANCES = ( + 'bergauf.tv', 'campus.demo.vimp.com', 'corporate.demo.vimp.com', 'dancehalldatabase.com', + 'drehzahl.tv', 'educhannel.hs-gesundheit.de', 'emedia.ls.haw-hamburg.de', 'globale-evolution.net', + 'hohu.tv', + 'htvideos.hightechhigh.org', 'k210039.vimp.mivitec.net', 'media.cmslegal.com', 'media.hs-furtwangen.de', @@ -25,6 +29,7 @@ class VideocampusSachsenIE(InfoExtractor): 'mportal.europa-uni.de', 'pacific.demo.vimp.com', 'slctv.com', + 'streaming.prairiesouth.ca', 'tube.isbonline.cn', 'univideo.uni-kassel.de', 'ursula2.genetics.emory.edu', @@ -52,11 +57,15 @@ class VideocampusSachsenIE(InfoExtractor): 'vimp.weka-fachmedien.de', 'webtv.univ-montp3.fr', 'www.b-tu.de/media', + 'www.bergauf.tv', 'www.bigcitytv.de', 'www.cad-videos.de', + 'www.drehzahl.tv', 'www.fh-bielefeld.de/medienportal', + 'www.hohu.tv', 'www.orvovideo.com', 'www.rwe.tv', + 'www.salzi.tv', 'www.wenglor-media.com', 'www2.univ-sba.dz', ) @@ -73,6 +82,7 @@ class VideocampusSachsenIE(InfoExtractor): 'id': 'e6b9349905c1628631f175712250f2a1', 'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7', 'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7', + 'thumbnail': 'https://videocampus.sachsen.de/cache/1a985379ad3aecba8097a6902c7daa4e.jpg', 'ext': 'mp4', }, }, @@ -82,6 +92,7 @@ class VideocampusSachsenIE(InfoExtractor): 'id': 'fc99c527e4205b121cb7c74433469262', 'title': 'Was ist selbstgesteuertes Lernen?', 'description': 'md5:196aa3b0509a526db62f84679522a2f5', + 'thumbnail': 'https://videocampus.sachsen.de/cache/6f4a85096ba24cb398e6ce54446b57ae.jpg', 'display_id': 'Was-ist-selbstgesteuertes-Lernen', 'ext': 'mp4', }, @@ -92,6 +103,7 @@ class VideocampusSachsenIE(InfoExtractor): 'id': '09d4ed029002eb1bdda610f1103dd54c', 'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht', 'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58', + 'thumbnail': 'https://videocampus.sachsen.de/cache/2452498fe8c2d5a7dc79a05d30f407b6.jpg', 'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht', 'ext': 'mp4', }, @@ -103,6 +115,7 @@ class VideocampusSachsenIE(InfoExtractor): 'id': '0183356e41af7bfb83d7667b20d9b6a3', 'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22', 'description': 'md5:508958bd93e0ca002ac731d94182a54f', + 'thumbnail': 'https://www2.univ-sba.dz/cache/4d5d4a0b4189271a8cc6cb5328e14769.jpg', 'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122', 'ext': 'mp4', } @@ -113,6 +126,7 @@ class VideocampusSachsenIE(InfoExtractor): 'id': 'c8816f1cc942c12b6cce57c835cffd7c', 'title': 'Preisverleihung »Produkte des Jahres 2022«', 'description': 'md5:60c347568ca89aa25b772c4ea564ebd3', + 'thumbnail': 'https://vimp.weka-fachmedien.de/cache/da9f3090e9227b25beacf67ccf94de14.png', 'display_id': 'Preisverleihung-Produkte-des-Jahres-2022', 'ext': 'mp4', }, @@ -124,7 +138,7 @@ class VideocampusSachsenIE(InfoExtractor): 'title': 'Was ist selbstgesteuertes Lernen?', 'ext': 'mp4', }, - } + }, ] def _real_extract(self, url): @@ -139,12 +153,14 @@ class VideocampusSachsenIE(InfoExtractor): if not (display_id or tmp_id): # Title, description from embedded page's meta wouldn't be correct - title = self._html_search_regex(r'<img[^>]* title="([^"<]+)"', webpage, 'title', fatal=False) + title = self._html_search_regex(r'<video-js[^>]* data-piwik-title="([^"<]+)"', webpage, 'title', fatal=False) description = None + thumbnail = None else: title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False) description = self._html_search_meta( - ('og:description', 'twitter:description', 'description'), webpage, default=None) + ('og:description', 'twitter:description', 'description'), webpage, fatal=False) + thumbnail = self._html_search_meta(('og:image', 'twitter:image'), webpage, fatal=False) formats, subtitles = [], {} try: @@ -162,7 +178,8 @@ class VideocampusSachsenIE(InfoExtractor): 'id': video_id, 'title': title, 'description': description, + 'thumbnail': thumbnail, 'display_id': display_id, 'formats': formats, - 'subtitles': subtitles + 'subtitles': subtitles, } diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 599996bf9..8092d340e 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -152,7 +152,7 @@ class VidioIE(VidioBaseIE): 'uploader': user.get('name'), 'timestamp': parse_iso8601(video.get('created_at')), 'uploader_id': username, - 'uploader_url': format_field(username, template='https://www.vidio.com/@%s'), + 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'), 'channel': channel.get('name'), 'channel_id': str_or_none(channel.get('id')), 'view_count': get_count('view_count'), @@ -283,5 +283,5 @@ class VidioLiveIE(VidioBaseIE): 'uploader': user.get('name'), 'timestamp': parse_iso8601(stream_meta.get('start_time')), 'uploader_id': username, - 'uploader_url': format_field(username, template='https://www.vidio.com/@%s'), + 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'), } diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py index b9845affd..69a75304e 100644 --- a/yt_dlp/extractor/vidlii.py +++ b/yt_dlp/extractor/vidlii.py @@ -100,7 +100,7 @@ class VidLiiIE(InfoExtractor): uploader = self._search_regex( r'<div[^>]+class=["\']wt_person[^>]+>\s*<a[^>]+\bhref=["\']/user/[^>]+>([^<]+)', webpage, 'uploader', fatal=False) - uploader_url = format_field(uploader, template='https://www.vidlii.com/user/%s') + uploader_url = format_field(uploader, None, 'https://www.vidlii.com/user/%s') upload_date = unified_strdate(self._html_search_meta( 'datePublished', webpage, default=None) or self._search_regex( diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 59c5353ab..961734345 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -40,6 +40,18 @@ class VimeoBaseInfoExtractor(InfoExtractor): _LOGIN_REQUIRED = False _LOGIN_URL = 'https://vimeo.com/log_in' + @staticmethod + def _smuggle_referrer(url, referrer_url): + return smuggle_url(url, {'http_headers': {'Referer': referrer_url}}) + + def _unsmuggle_headers(self, url): + """@returns (url, smuggled_data, headers)""" + url, data = unsmuggle_url(url, {}) + headers = self.get_param('http_headers').copy() + if 'http_headers' in data: + headers.update(data['http_headers']) + return url, data, headers + def _perform_login(self, username, password): webpage = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') @@ -718,10 +730,6 @@ class VimeoIE(VimeoBaseInfoExtractor): ] @staticmethod - def _smuggle_referrer(url, referrer_url): - return smuggle_url(url, {'http_headers': {'Referer': referrer_url}}) - - @staticmethod def _extract_urls(url, webpage): urls = [] # Look for embedded (iframe) Vimeo player @@ -754,8 +762,8 @@ class VimeoIE(VimeoBaseInfoExtractor): 'Content-Type': 'application/x-www-form-urlencoded', }) checked = self._download_json( - url + '/check-password', video_id, - 'Verifying the password', data=data, headers=headers) + f'{compat_urlparse.urlsplit(url)._replace(query=None).geturl()}/check-password', + video_id, 'Verifying the password', data=data, headers=headers) if checked is False: raise ExtractorError('Wrong video password', expected=True) return checked @@ -830,10 +838,7 @@ class VimeoIE(VimeoBaseInfoExtractor): raise def _real_extract(self, url): - url, data = unsmuggle_url(url, {}) - headers = self.get_param('http_headers').copy() - if 'http_headers' in data: - headers.update(data['http_headers']) + url, data, headers = self._unsmuggle_headers(url) if 'Referer' not in headers: headers['Referer'] = url @@ -1383,14 +1388,15 @@ class VHXEmbedIE(VimeoBaseInfoExtractor): _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)' @staticmethod - def _extract_url(webpage): + def _extract_url(url, webpage): mobj = re.search( r'<iframe[^>]+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage) - return unescapeHTML(mobj.group(1)) if mobj else None + return VimeoIE._smuggle_referrer(unescapeHTML(mobj.group(1)), url) if mobj else None def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + url, _, headers = self._unsmuggle_headers(url) + webpage = self._download_webpage(url, video_id, headers=headers) config_url = self._parse_json(self._search_regex( r'window\.OTTData\s*=\s*({.+})', webpage, 'ott data'), video_id, js_to_json)['config_url'] diff --git a/yt_dlp/extractor/vine.py b/yt_dlp/extractor/vine.py index bbf43a83f..947f5cdb6 100644 --- a/yt_dlp/extractor/vine.py +++ b/yt_dlp/extractor/vine.py @@ -89,7 +89,7 @@ class VineIE(InfoExtractor): username = data.get('username') - alt_title = format_field(username, template='Vine by %s') + alt_title = format_field(username, None, 'Vine by %s') return { 'id': video_id, diff --git a/yt_dlp/extractor/voicy.py b/yt_dlp/extractor/voicy.py index e4570a03a..feab79138 100644 --- a/yt_dlp/extractor/voicy.py +++ b/yt_dlp/extractor/voicy.py @@ -1,3 +1,5 @@ +import itertools + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( @@ -9,8 +11,6 @@ from ..utils import ( unsmuggle_url, ) -import itertools - class VoicyBaseIE(InfoExtractor): def _extract_from_playlist_data(self, value): @@ -105,7 +105,7 @@ class VoicyChannelIE(VoicyBaseIE): @classmethod def suitable(cls, url): - return not VoicyIE.suitable(url) and super(VoicyChannelIE, cls).suitable(url) + return not VoicyIE.suitable(url) and super().suitable(url) def _entries(self, channel_id): pager = '' diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py index 35662753e..0b9bf2903 100644 --- a/yt_dlp/extractor/vrv.py +++ b/yt_dlp/extractor/vrv.py @@ -1,17 +1,14 @@ import base64 -import json import hashlib import hmac +import json import random import string import time +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_urllib_parse_urlencode, - compat_urllib_parse, -) +from ..compat import compat_HTTPError, compat_urllib_parse_urlencode from ..utils import ( ExtractorError, float_or_none, @@ -46,12 +43,12 @@ class VRVBaseIE(InfoExtractor): headers['Content-Type'] = 'application/json' base_string = '&'.join([ 'POST' if data else 'GET', - compat_urllib_parse.quote(base_url, ''), - compat_urllib_parse.quote(encoded_query, '')]) + urllib.parse.quote(base_url, ''), + urllib.parse.quote(encoded_query, '')]) oauth_signature = base64.b64encode(hmac.new( (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'), base_string.encode(), hashlib.sha1).digest()).decode() - encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '') + encoded_query += '&oauth_signature=' + urllib.parse.quote(oauth_signature, '') try: return self._download_json( '?'.join([base_url, encoded_query]), video_id, diff --git a/yt_dlp/extractor/vshare.py b/yt_dlp/extractor/vshare.py index 8ef75d30e..fd5226bbc 100644 --- a/yt_dlp/extractor/vshare.py +++ b/yt_dlp/extractor/vshare.py @@ -1,11 +1,7 @@ import re from .common import InfoExtractor -from ..compat import compat_chr -from ..utils import ( - decode_packed_codes, - ExtractorError, -) +from ..utils import ExtractorError, decode_packed_codes class VShareIE(InfoExtractor): @@ -37,7 +33,7 @@ class VShareIE(InfoExtractor): digits = [int(digit) for digit in digits.split(',')] key_digit = self._search_regex( r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit') - chars = [compat_chr(d - int(key_digit)) for d in digits] + chars = [chr(d - int(key_digit)) for d in digits] return ''.join(chars) def _real_extract(self, url): diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py index 6349e5326..e1062b9b5 100644 --- a/yt_dlp/extractor/wppilot.py +++ b/yt_dlp/extractor/wppilot.py @@ -20,7 +20,7 @@ class WPPilotBaseIE(InfoExtractor): def _get_channel_list(self, cache=True): if cache is True: - cache_res = self._downloader.cache.load('wppilot', 'channel-list') + cache_res = self.cache.load('wppilot', 'channel-list') if cache_res: return cache_res, True webpage = self._download_webpage('https://pilot.wp.pl/tv/', None, 'Downloading webpage') @@ -35,7 +35,7 @@ class WPPilotBaseIE(InfoExtractor): channel_list = try_get(qhash_content, lambda x: x['data']['allChannels']['nodes']) if channel_list is None: continue - self._downloader.cache.store('wppilot', 'channel-list', channel_list) + self.cache.store('wppilot', 'channel-list', channel_list) return channel_list, False raise ExtractorError('Unable to find the channel list') @@ -101,7 +101,7 @@ class WPPilotIE(WPPilotBaseIE): channel = self._get_channel(video_id) video_id = str(channel['id']) - is_authorized = next((c for c in self._downloader.cookiejar if c.name == 'netviapisessid'), None) + is_authorized = next((c for c in self.cookiejar if c.name == 'netviapisessid'), None) # cookies starting with "g:" are assigned to guests is_authorized = True if is_authorized is not None and not is_authorized.value.startswith('g:') else False diff --git a/yt_dlp/extractor/xfileshare.py b/yt_dlp/extractor/xfileshare.py index 28b6ecb6e..63abe4a1f 100644 --- a/yt_dlp/extractor/xfileshare.py +++ b/yt_dlp/extractor/xfileshare.py @@ -1,11 +1,10 @@ import re from .common import InfoExtractor -from ..compat import compat_chr from ..utils import ( + ExtractorError, decode_packed_codes, determine_ext, - ExtractorError, int_or_none, js_to_json, urlencode_postdata, @@ -32,11 +31,11 @@ def aa_decode(aa_code): aa_char = aa_char.replace('+ ', '') m = re.match(r'^\d+', aa_char) if m: - ret += compat_chr(int(m.group(0), 8)) + ret += chr(int(m.group(0), 8)) else: m = re.match(r'^u([\da-f]+)', aa_char) if m: - ret += compat_chr(int(m.group(1), 16)) + ret += chr(int(m.group(1), 16)) return ret diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py index ff15d3707..e42eed7d8 100644 --- a/yt_dlp/extractor/xhamster.py +++ b/yt_dlp/extractor/xhamster.py @@ -21,7 +21,7 @@ from ..utils import ( class XHamsterIE(InfoExtractor): - _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)' + _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)' _VALID_URL = r'''(?x) https?:// (?:.+?\.)?%s/ @@ -32,7 +32,7 @@ class XHamsterIE(InfoExtractor): ''' % _DOMAINS _TESTS = [{ 'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445', - 'md5': '98b4687efb1ffd331c4197854dc09e8f', + 'md5': '34e1ab926db5dc2750fed9e1f34304bb', 'info_dict': { 'id': '1509445', 'display_id': 'femaleagent-shy-beauty-takes-the-bait', @@ -41,6 +41,7 @@ class XHamsterIE(InfoExtractor): 'timestamp': 1350194821, 'upload_date': '20121014', 'uploader': 'Ruseful2011', + 'uploader_id': 'ruseful2011', 'duration': 893, 'age_limit': 18, }, @@ -70,6 +71,7 @@ class XHamsterIE(InfoExtractor): 'timestamp': 1454948101, 'upload_date': '20160208', 'uploader': 'parejafree', + 'uploader_id': 'parejafree', 'duration': 72, 'age_limit': 18, }, @@ -115,6 +117,9 @@ class XHamsterIE(InfoExtractor): }, { 'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx', 'only_matching': True, + }, { + 'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf', + 'only_matching': True, }] def _real_extract(self, url): @@ -244,7 +249,6 @@ class XHamsterIE(InfoExtractor): categories = None uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL'])) - return { 'id': video_id, 'display_id': display_id, @@ -263,7 +267,7 @@ class XHamsterIE(InfoExtractor): 'dislike_count': int_or_none(try_get( video, lambda x: x['rating']['dislikes'], int)), 'comment_count': int_or_none(video.get('views')), - 'age_limit': age_limit, + 'age_limit': age_limit if age_limit is not None else 18, 'categories': categories, 'formats': formats, } @@ -423,6 +427,9 @@ class XHamsterUserIE(InfoExtractor): 'id': 'firatkaan', }, 'playlist_mincount': 1, + }, { + 'url': 'https://xhday.com/users/mobhunter', + 'only_matching': True, }] def _entries(self, user_id): diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py index 3fe6192bf..8811df6d8 100644 --- a/yt_dlp/extractor/yahoo.py +++ b/yt_dlp/extractor/yahoo.py @@ -1,15 +1,15 @@ import hashlib import itertools import re +import urllib.parse +from .brightcove import BrightcoveNewIE from .common import InfoExtractor, SearchInfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse, -) +from .youtube import YoutubeIE +from ..compat import compat_str from ..utils import ( - clean_html, ExtractorError, + clean_html, int_or_none, mimetype2ext, parse_iso8601, @@ -18,9 +18,6 @@ from ..utils import ( url_or_none, ) -from .brightcove import BrightcoveNewIE -from .youtube import YoutubeIE - class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' @@ -333,7 +330,7 @@ class YahooSearchIE(SearchInfoExtractor): def _search_results(self, query): for pagenum in itertools.count(0): - result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30) + result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (urllib.parse.quote_plus(query), pagenum * 30) info = self._download_json(result_url, query, note='Downloading results page ' + str(pagenum + 1)) yield from (self.url_result(result['rurl']) for result in info['results']) @@ -434,7 +431,7 @@ class YahooGyaOIE(InfoExtractor): page = 1 while True: playlist = self._download_json( - f'https://gyao.yahoo.co.jp/api/programs/{program_id}/videos?page={page}', program_id, + f'https://gyao.yahoo.co.jp/api/programs/{program_id}/videos?page={page}&serviceId=gy', program_id, note=f'Downloading JSON metadata page {page}') if not playlist: break diff --git a/yt_dlp/extractor/ynet.py b/yt_dlp/extractor/ynet.py index 444785947..27eda9721 100644 --- a/yt_dlp/extractor/ynet.py +++ b/yt_dlp/extractor/ynet.py @@ -1,8 +1,8 @@ -import re import json +import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote_plus class YnetIE(InfoExtractor): @@ -31,7 +31,7 @@ class YnetIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - content = compat_urllib_parse_unquote_plus(self._og_search_video_url(webpage)) + content = urllib.parse.unquote_plus(self._og_search_video_url(webpage)) config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config')) f4m_url = config['clip']['url'] title = self._og_search_title(webpage) diff --git a/yt_dlp/extractor/younow.py b/yt_dlp/extractor/younow.py index 76d89f3ce..18112ba35 100644 --- a/yt_dlp/extractor/younow.py +++ b/yt_dlp/extractor/younow.py @@ -91,7 +91,7 @@ def _extract_moment(item, fatal=True): uploader = try_get(item, lambda x: x['owner']['name'], compat_str) uploader_id = try_get(item, lambda x: x['owner']['userId']) - uploader_url = format_field(uploader, template='https://www.younow.com/%s') + uploader_url = format_field(uploader, None, 'https://www.younow.com/%s') entry = { 'extractor_key': 'YouNowMoment', diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py index 5aea82295..b484e08ec 100644 --- a/yt_dlp/extractor/youporn.py +++ b/yt_dlp/extractor/youporn.py @@ -135,9 +135,10 @@ class YouPornIE(InfoExtractor): r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>', webpage, 'uploader', fatal=False) upload_date = unified_strdate(self._html_search_regex( - [r'UPLOADED:\s*<span>([^<]+)', + (r'UPLOADED:\s*<span>([^<]+)', r'Date\s+[Aa]dded:\s*<span>([^<]+)', - r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'], + r'''(?s)<div[^>]+class=["']videoInfo(?:Date|Time)\b[^>]*>(.+?)</div>''', + r'(?s)<label\b[^>]*>Uploaded[^<]*</label>\s*<span\b[^>]*>(.+?)</span>'), webpage, 'upload date', fatal=False)) age_limit = self._rta_search(webpage) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 5546aa9a3..ebc3381a2 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2,7 +2,6 @@ import base64 import calendar import copy import datetime -import functools import hashlib import itertools import json @@ -14,18 +13,11 @@ import sys import threading import time import traceback +import urllib.error +import urllib.parse from .common import InfoExtractor, SearchInfoExtractor -from ..compat import ( - compat_chr, - compat_HTTPError, - compat_parse_qs, - compat_str, - compat_urllib_parse_unquote_plus, - compat_urllib_parse_urlencode, - compat_urllib_parse_urlparse, - compat_urlparse, -) +from ..compat import functools from ..jsinterp import JSInterpreter from ..utils import ( NO_DEFAULT, @@ -382,11 +374,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor): pref = {} if pref_cookie: try: - pref = dict(compat_urlparse.parse_qsl(pref_cookie.value)) + pref = dict(urllib.parse.parse_qsl(pref_cookie.value)) except ValueError: self.report_warning('Failed to parse user PREF cookie' + bug_reports_message()) pref.update({'hl': 'en', 'tz': 'UTC'}) - self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref)) + self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref)) def _real_initialize(self): self._initialize_pref() @@ -397,9 +389,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if self._LOGIN_REQUIRED and not self._cookies_passed: self.raise_login_required('Login details are needed to download this content', method='cookies') - _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' - _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;' - _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)' + _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=' + _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=' def _get_default_ytcfg(self, client='web'): return copy.deepcopy(INNERTUBE_CLIENTS[client]) @@ -415,15 +406,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor): def _extract_client_name(self, ytcfg, default_client='web'): return self._ytcfg_get_safe( ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'], - lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client) + lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client) def _extract_client_version(self, ytcfg, default_client='web'): return self._ytcfg_get_safe( ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'], - lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client) + lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client) + + def _select_api_hostname(self, req_api_hostname, default_client=None): + return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0] + or req_api_hostname or self._get_innertube_host(default_client or 'web')) def _extract_api_key(self, ytcfg=None, default_client='web'): - return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client) + return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client) def _extract_context(self, ytcfg=None, default_client='web'): context = get_first( @@ -470,18 +465,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor): real_headers.update({'content-type': 'application/json'}) if headers: real_headers.update(headers) + api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0] + or api_key or self._extract_api_key(default_client=default_client)) return self._download_json( - f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}', + f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}', video_id=video_id, fatal=fatal, note=note, errnote=errnote, data=json.dumps(data).encode('utf8'), headers=real_headers, - query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'}) + query={'key': api_key, 'prettyPrint': 'false'}) def extract_yt_initial_data(self, item_id, webpage, fatal=True): - data = self._search_regex( - (fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}', - self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal) - if data: - return self._parse_json(data, item_id, fatal=fatal) + return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal) @staticmethod def _extract_session_index(*data): @@ -497,7 +490,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # Deprecated? def _extract_identity_token(self, ytcfg=None, webpage=None): if ytcfg: - token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str) + token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str) if token: return token if webpage: @@ -513,12 +506,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): """ for data in args: # ytcfg includes channel_syncid if on secondary channel - delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str) + delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str) if delegated_sid: return delegated_sid sync_ids = (try_get( data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'], - lambda x: x['DATASYNC_ID']), compat_str) or '').split('||') + lambda x: x['DATASYNC_ID']), str) or '').split('||') if len(sync_ids) >= 2 and sync_ids[1]: # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel # and just "user_syncid||" for primary channel. We only want the channel_syncid @@ -534,7 +527,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))], expected_type=str) - @property + @functools.cached_property def is_authenticated(self): return bool(self._generate_sapisidhash_header()) @@ -550,9 +543,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self, *, ytcfg=None, account_syncid=None, session_index=None, visitor_data=None, identity_token=None, api_hostname=None, default_client='web'): - origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client)) + origin = 'https://' + (self._select_api_hostname(api_hostname, default_client)) headers = { - 'X-YouTube-Client-Name': compat_str( + 'X-YouTube-Client-Name': str( self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)), 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client), 'Origin': origin, @@ -612,7 +605,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): def _extract_continuation_ep_data(cls, continuation_ep: dict): if isinstance(continuation_ep, dict): continuation = try_get( - continuation_ep, lambda x: x['continuationCommand']['token'], compat_str) + continuation_ep, lambda x: x['continuationCommand']['token'], str) if not continuation: return ctp = continuation_ep.get('clickTrackingParams') @@ -672,7 +665,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): def _extract_badges(self, renderer: dict): badges = set() for badge in try_get(renderer, lambda x: x['badges'], list) or []: - label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str) + label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str) if label: badges.add(label.lower()) return badges @@ -687,7 +680,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)): obj = [obj] for item in obj: - text = try_get(item, lambda x: x['simpleText'], compat_str) + text = try_get(item, lambda x: x['simpleText'], str) if text: return text runs = try_get(item, lambda x: x['runs'], list) or [] @@ -789,20 +782,20 @@ class YoutubeBaseInfoExtractor(InfoExtractor): note='%s%s' % (note, ' (retry #%d)' % count if count else '')) except ExtractorError as e: if isinstance(e.cause, network_exceptions): - if isinstance(e.cause, compat_HTTPError): + if isinstance(e.cause, urllib.error.HTTPError): first_bytes = e.cause.read(512) if not is_html(first_bytes): yt_error = try_get( self._parse_json( self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False), - lambda x: x['error']['message'], compat_str) + lambda x: x['error']['message'], str) if yt_error: self._report_alerts([('ERROR', yt_error)], fatal=False) # Downloading page may result in intermittent 5xx HTTP error # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289 # We also want to catch all other network exceptions since errors in later pages can be troublesome # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210 - if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429): + if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429): last_error = error_to_compat_str(e.cause or e.msg) if count < retries: continue @@ -2212,28 +2205,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, { # Story. Requires specific player params to work. # Note: stories get removed after some period of time - 'url': 'https://www.youtube.com/watch?v=yN3x1t3sieA', + 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI', 'info_dict': { - 'id': 'yN3x1t3sieA', + 'id': 'vv8qTUWmulI', 'ext': 'mp4', - 'uploader': 'Linus Tech Tips', - 'duration': 13, - 'channel': 'Linus Tech Tips', + 'availability': 'unlisted', + 'view_count': int, + 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA', + 'upload_date': '20220526', + 'categories': ['Education'], + 'title': 'Story', + 'channel': 'IT\'S HISTORY', + 'description': '', + 'uploader_id': 'BlastfromthePast', + 'duration': 12, + 'uploader': 'IT\'S HISTORY', 'playable_in_embed': True, - 'tags': [], 'age_limit': 0, - 'uploader_url': 'http://www.youtube.com/user/LinusTechTips', - 'upload_date': '20220402', - 'thumbnail': 'https://i.ytimg.com/vi_webp/yN3x1t3sieA/maxresdefault.webp', - 'title': 'Story', 'live_status': 'not_live', - 'uploader_id': 'LinusTechTips', + 'tags': [], + 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp', + 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast', + 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA', + } + }, { + 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA', + 'info_dict': { + 'id': 'tjjjtzRLHvA', + 'ext': 'mp4', + 'title': 'ハッシュタグ無し };if window.ytcsi', + 'upload_date': '20220323', + 'like_count': int, + 'availability': 'unlisted', + 'channel': 'nao20010128nao', + 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp', + 'age_limit': 0, + 'uploader': 'nao20010128nao', + 'uploader_id': 'nao20010128nao', + 'categories': ['Music'], 'view_count': int, 'description': '', - 'channel_id': 'UCXuqSBlHAE6Xw-yeJA0Tunw', - 'categories': ['Science & Technology'], - 'channel_url': 'https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw', - 'availability': 'unlisted', + 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A', + 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A', + 'live_status': 'not_live', + 'playable_in_embed': True, + 'channel_follower_count': int, + 'duration': 6, + 'tags': [], + 'uploader_url': 'http://www.youtube.com/user/nao20010128nao', } } ] @@ -2319,7 +2338,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Obtain from MPD's maximum seq value old_mpd_url = mpd_url last_error = ctx.pop('last_error', None) - expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403 + expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000) or (mpd_url, stream_number, False)) if not refresh_sequence: @@ -2386,6 +2405,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx) yield { 'url': last_segment_url, + 'fragment_count': last_seq, } if known_idx == last_seq: no_fragment_score += 5 @@ -2400,7 +2420,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_player_url(self, *ytcfgs, webpage=None): player_url = traverse_obj( ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'), - get_all=False, expected_type=compat_str) + get_all=False, expected_type=str) if not player_url: return return urljoin('https://www.youtube.com', player_url) @@ -2417,7 +2437,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _signature_cache_id(self, example_sig): """ Return a string representation of a signature """ - return '.'.join(compat_str(len(part)) for part in example_sig.split('.')) + return '.'.join(str(len(part)) for part in example_sig.split('.')) @classmethod def _extract_player_info(cls, player_url): @@ -2447,7 +2467,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}' assert os.path.basename(func_id) == func_id - cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id) + cache_spec = self.cache.load('youtube-sigfuncs', func_id) if cache_spec is not None: return lambda s: ''.join(s[i] for i in cache_spec) @@ -2455,11 +2475,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if code: res = self._parse_sig_js(code) - test_string = ''.join(map(compat_chr, range(len(example_sig)))) + test_string = ''.join(map(chr, range(len(example_sig)))) cache_res = res(test_string) cache_spec = [ord(c) for c in cache_res] - self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec) + self.cache.store('youtube-sigfuncs', func_id, cache_spec) return res def _print_sig_code(self, func, example_sig): @@ -2494,12 +2514,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: yield _genslice(start, i, step) - test_string = ''.join(map(compat_chr, range(len(example_sig)))) + test_string = ''.join(map(chr, range(len(example_sig)))) cache_res = func(test_string) cache_spec = [ord(c) for c in cache_res] expr_code = ' + '.join(gen_sig_code(cache_spec)) signature_id_tuple = '(%s)' % ( - ', '.join(compat_str(len(p)) for p in example_sig.split('.'))) + ', '.join(str(len(p)) for p in example_sig.split('.'))) code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n' ' return %s\n') % (signature_id_tuple, expr_code) self.to_screen('Extracted signature function:\n' + code) @@ -2530,22 +2550,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _decrypt_signature(self, s, video_id, player_url): """Turn the encrypted s field into a working signature""" - - if player_url is None: - raise ExtractorError('Cannot decrypt signature without player_url') - try: player_id = (player_url, self._signature_cache_id(s)) if player_id not in self._player_cache: - func = self._extract_signature_function( - video_id, player_url, s - ) + func = self._extract_signature_function(video_id, player_url, s) self._player_cache[player_id] = func func = self._player_cache[player_id] self._print_sig_code(func, s) return func(s) except Exception as e: - raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e) + raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id) def _decrypt_nsig(self, s, video_id, player_url): """Turn the encrypted n field into a working signature""" @@ -2580,7 +2594,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_n_function(self, video_id, player_url): player_id = self._extract_player_info(player_url) - func_code = self._downloader.cache.load('youtube-nsig', player_id) + func_code = self.cache.load('youtube-nsig', player_id) if func_code: jsi = JSInterpreter(func_code) @@ -2589,7 +2603,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): funcname = self._extract_n_function_name(jscode) jsi = JSInterpreter(jscode) func_code = jsi.extract_function_code(funcname) - self._downloader.cache.store('youtube-nsig', player_id, func_code) + self.cache.store('youtube-nsig', player_id, func_code) if self.get_param('youtube_print_sig_code'): self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n') @@ -2621,30 +2635,45 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return sts def _mark_watched(self, video_id, player_responses): - playback_url = get_first( - player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'), - expected_type=url_or_none) - if not playback_url: - self.report_warning('Unable to mark watched') - return - parsed_playback_url = compat_urlparse.urlparse(playback_url) - qs = compat_urlparse.parse_qs(parsed_playback_url.query) + for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')): + label = 'fully ' if is_full else '' + url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'), + expected_type=url_or_none) + if not url: + self.report_warning(f'Unable to mark {label}watched') + return + parsed_url = urllib.parse.urlparse(url) + qs = urllib.parse.parse_qs(parsed_url.query) + + # cpn generation algorithm is reverse engineered from base.js. + # In fact it works even with dummy cpn. + CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_' + cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)) + + # # more consistent results setting it to right before the end + video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)] + + qs.update({ + 'ver': ['2'], + 'cpn': [cpn], + 'cmt': video_length, + 'el': 'detailpage', # otherwise defaults to "shorts" + }) - # cpn generation algorithm is reverse engineered from base.js. - # In fact it works even with dummy cpn. - CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_' - cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)) + if is_full: + # these seem to mark watchtime "history" in the real world + # they're required, so send in a single value + qs.update({ + 'st': video_length, + 'et': video_length, + }) - qs.update({ - 'ver': ['2'], - 'cpn': [cpn], - }) - playback_url = compat_urlparse.urlunparse( - parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True))) + url = urllib.parse.urlunparse( + parsed_url._replace(query=urllib.parse.urlencode(qs, True))) - self._download_webpage( - playback_url, video_id, 'Marking watched', - 'Unable to mark watched', fatal=False) + self._download_webpage( + url, video_id, f'Marking {label}watched', + 'Unable to mark watched', fatal=False) @staticmethod def _extract_urls(webpage): @@ -2713,39 +2742,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor): chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription')) chapter_title = lambda chapter: self._get_text(chapter, 'title') - return next(( - filter(None, ( - self._extract_chapters( - traverse_obj(contents, (..., 'macroMarkersListItemRenderer')), - chapter_time, chapter_title, duration) - for contents in content_list - ))), []) - - def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration): - chapters = [] - last_chapter = {'start_time': 0} - for idx, chapter in enumerate(chapter_list or []): - title = chapter_title(chapter) - start_time = chapter_time(chapter) - if start_time is None: - continue - last_chapter['end_time'] = start_time - if start_time < last_chapter['start_time']: - if idx == 1: - chapters.pop() - self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title']) - else: - self.report_warning(f'Invalid start time for chapter "{title}"') - continue - last_chapter = {'start_time': start_time, 'title': title} - chapters.append(last_chapter) - last_chapter['end_time'] = duration - return chapters + return next(filter(None, ( + self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')), + chapter_time, chapter_title, duration) + for contents in content_list)), []) - def _extract_yt_initial_variable(self, webpage, regex, video_id, name): - return self._parse_json(self._search_regex( - (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}', - regex), webpage, name, default='{}'), video_id, fatal=False) + def _extract_chapters_from_description(self, description, duration): + return self._extract_chapters( + re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''), + chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1], + duration=duration, strict=False) + + def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True): + if not duration: + return + chapter_list = [{ + 'start_time': chapter_time(chapter), + 'title': chapter_title(chapter), + } for chapter in chapter_list or []] + if not strict: + chapter_list.sort(key=lambda c: c['start_time'] or 0) + + chapters = [{'start_time': 0, 'title': '<Untitled>'}] + for idx, chapter in enumerate(chapter_list): + if chapter['start_time'] is None or not chapter['title']: + self.report_warning(f'Incomplete chapter {idx}') + elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration: + chapters[-1]['end_time'] = chapter['start_time'] + chapters.append(chapter) + else: + self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"') + chapters[-1]['end_time'] = duration + return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:] def _extract_comment(self, comment_renderer, parent=None): comment_id = comment_renderer.get('commentId') @@ -2758,12 +2786,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText') author = self._get_text(comment_renderer, 'authorText') author_id = try_get(comment_renderer, - lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str) + lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str) votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'], - lambda x: x['likeCount']), compat_str)) or 0 + lambda x: x['likeCount']), str)) or 0 author_thumbnail = try_get(comment_renderer, - lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str) + lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str) author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool) is_favorited = 'creatorHeart' in (try_get( @@ -3028,9 +3056,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg): initial_pr = None if webpage: - initial_pr = self._extract_yt_initial_variable( - webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE, - video_id, 'initial player response') + initial_pr = self._search_json( + self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False) all_clients = set(clients) clients = clients[::-1] @@ -3144,16 +3171,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor): fmt_url = fmt.get('url') if not fmt_url: - sc = compat_parse_qs(fmt.get('signatureCipher')) + sc = urllib.parse.parse_qs(fmt.get('signatureCipher')) fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0])) encrypted_sig = try_get(sc, lambda x: x['s'][0]) - if not (sc and fmt_url and encrypted_sig): + if not all((sc, fmt_url, player_url, encrypted_sig)): continue - if not player_url: + try: + fmt_url += '&%s=%s' % ( + traverse_obj(sc, ('sp', -1)) or 'signature', + self._decrypt_signature(encrypted_sig, video_id, player_url) + ) + except ExtractorError as e: + self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True) + self.write_debug(e, only_once=True) continue - signature = self._decrypt_signature(sc['s'][0], video_id, player_url) - sp = try_get(sc, lambda x: x['sp'][0]) or 'signature' - fmt_url += '&' + sp + '=' + signature query = parse_qs(fmt_url) throttled = False @@ -3164,7 +3195,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): except ExtractorError as e: self.report_warning( 'nsig extraction failed: You may experience throttling for some formats\n' - f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True) + f'n = {query["n"][0]} ; player = {player_url}', only_once=True) + self.write_debug(e, only_once=True) throttled = True if itag: @@ -3380,12 +3412,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Unquote should take place before split on comma (,) since textual # fields may contain comma as well (see # https://github.com/ytdl-org/youtube-dl/issues/8536) - feed_data = compat_parse_qs( - compat_urllib_parse_unquote_plus(feed)) + feed_data = urllib.parse.parse_qs( + urllib.parse.unquote_plus(feed)) def feed_entry(name): return try_get( - feed_data, lambda x: x[name][0], compat_str) + feed_data, lambda x: x[name][0], str) feed_id = feed_entry('id') if not feed_id: @@ -3414,6 +3446,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): or get_first(microformats, 'lengthSeconds') or parse_duration(search_meta('duration'))) or None + if get_first(video_details, 'isPostLiveDvr'): + self.write_debug('Video is in Post-Live Manifestless mode') + if duration or 0 > 4 * 3600: + self.report_warning( + 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. ' + 'This is a known issue and patches are welcome') + live_broadcast_details, is_live, streaming_data, formats = self._list_formats( video_id, microformats, video_details, player_responses, player_url, duration) @@ -3523,7 +3562,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None, 'uploader_url': owner_profile_url, 'channel_id': channel_id, - 'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'), + 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'), 'duration': duration, 'view_count': int_or_none( get_first((video_details, microformats), (..., 'viewCount')) @@ -3593,7 +3632,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if 'translated_subs' in self._configuration_arg('skip'): continue trans_code += f'-{lang_code}' - trans_name += format_field(lang_name, template=' from %s') + trans_name += format_field(lang_name, None, ' from %s') # Add an "-orig" label to the original language so that it can be distinguished. # The subs are returned without "-orig" as well for compatibility if lang_code == f'a-{orig_trans_code}': @@ -3605,9 +3644,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): info['automatic_captions'] = automatic_captions info['subtitles'] = subtitles - parsed_url = compat_urllib_parse_urlparse(url) + parsed_url = urllib.parse.urlparse(url) for component in [parsed_url.fragment, parsed_url.query]: - query = compat_parse_qs(component) + query = urllib.parse.parse_qs(component) for k, v in query.items(): for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]: d_k += '_time' @@ -3616,7 +3655,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Youtube Music Auto-generated description if video_description: - mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description) + mobj = re.search( + r'''(?xs) + (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+ + (?P<album>[^\n]+) + (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))? + (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))? + (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))? + .+\nAuto-generated\ by\ YouTube\.\s*$ + ''', video_description) if mobj: release_year = mobj.group('release_year') release_date = mobj.group('release_date') @@ -3634,9 +3681,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): initial_data = None if webpage: - initial_data = self._extract_yt_initial_variable( - webpage, self._YT_INITIAL_DATA_RE, video_id, - 'yt initial data') + initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False) if not initial_data: query = {'videoId': video_id} query.update(self._get_checkok_params()) @@ -3646,13 +3691,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): headers=self.generate_api_headers(ytcfg=master_ytcfg), note='Downloading initial data API JSON') + info['comment_count'] = traverse_obj(initial_data, ( + 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer', + 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText' + ), ( + 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section', + 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text' + ), expected_type=int_or_none, get_all=False) + try: # This will error if there is no livechat initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] except (KeyError, IndexError, TypeError): pass else: info.setdefault('subtitles', {})['live_chat'] = [{ - 'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies + # url is needed to set cookies + 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1', 'video_id': video_id, 'ext': 'json', 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay', @@ -3662,6 +3716,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): info['chapters'] = ( self._extract_chapters_from_json(initial_data, duration) or self._extract_chapters_from_engagement_panel(initial_data, duration) + or self._extract_chapters_from_description(video_description, duration) or None) contents = traverse_obj( @@ -3884,7 +3939,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): # generic endpoint URL support ep_url = urljoin('https://www.youtube.com/', try_get( renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], - compat_str)) + str)) if ep_url: for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE): if ie.suitable(ep_url): @@ -3928,7 +3983,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): def _shelf_entries(self, shelf_renderer, skip_channels=False): ep = try_get( shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], - compat_str) + str) shelf_url = urljoin('https://www.youtube.com', ep) if shelf_url: # Skipping links to another channels, note that checking for @@ -3988,7 +4043,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): yield entry # playlist attachment playlist_id = try_get( - post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str) + post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str) if playlist_id: yield self.url_result( 'https://www.youtube.com/playlist?list=%s' % playlist_id, @@ -3999,7 +4054,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): if not isinstance(run, dict): continue ep_url = try_get( - run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str) + run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str) if not ep_url: continue if not YoutubeIE.suitable(ep_url): @@ -4015,9 +4070,12 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): return for content in contents: renderer = content.get('backstagePostThreadRenderer') - if not isinstance(renderer, dict): + if isinstance(renderer, dict): + yield from self._post_thread_entries(renderer) continue - yield from self._post_thread_entries(renderer) + renderer = content.get('videoRenderer') + if isinstance(renderer, dict): + yield self._video_entry(renderer) r''' # unused def _rich_grid_entries(self, contents): @@ -4173,10 +4231,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): uploader['uploader'] = self._search_regex( r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text) uploader['uploader_id'] = try_get( - owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str) + owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str) uploader['uploader_url'] = urljoin( 'https://www.youtube.com/', - try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str)) + try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str)) return {k: v for k, v in uploader.items() if v is not None} def _extract_from_tabs(self, item_id, ytcfg, data, tabs): @@ -4304,13 +4362,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg): title = playlist.get('title') or try_get( - data, lambda x: x['titleText']['simpleText'], compat_str) + data, lambda x: x['titleText']['simpleText'], str) playlist_id = playlist.get('playlistId') or item_id # Delegating everything except mix playlists to regular tab-based playlist URL playlist_url = urljoin(url, try_get( playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], - compat_str)) + str)) # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1] # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg @@ -4381,7 +4439,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): continue nav_item_renderer = menu_item.get('menuNavigationItemRenderer') text = try_get( - nav_item_renderer, lambda x: x['text']['simpleText'], compat_str) + nav_item_renderer, lambda x: x['text']['simpleText'], str) if not text or text.lower() != 'show unavailable videos': continue browse_endpoint = try_get( @@ -4402,7 +4460,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): check_get_keys='contents', fatal=False, ytcfg=ytcfg, note='Downloading API JSON with unavailable videos') - @property + @functools.cached_property def skip_webpage(self): return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) @@ -4423,7 +4481,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {} except ExtractorError as e: if isinstance(e.cause, network_exceptions): - if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429): + if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429): last_error = error_to_compat_str(e.cause or e.msg) if count < retries: continue @@ -5236,8 +5294,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data def _real_extract(self, url, smuggled_data): item_id = self._match_id(url) - url = compat_urlparse.urlunparse( - compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com')) + url = urllib.parse.urlunparse( + urllib.parse.urlparse(url)._replace(netloc='www.youtube.com')) compat_opts = self.get_param('compat_opts', []) def get_mobj(url): @@ -5257,7 +5315,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): mdata = self._extract_tab_endpoint( f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music') murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), - get_all=False, expected_type=compat_str) + get_all=False, expected_type=str) if not murl: raise ExtractorError('Failed to resolve album to playlist') return self.url_result(murl, ie=YoutubeTabIE.ie_key()) @@ -5622,11 +5680,13 @@ class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor): channel = traverse_obj( notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'), expected_type=str) + notification_title = self._get_text(notification, 'shortMessage') + if notification_title: + notification_title = notification_title.replace('\xad', '') # remove soft hyphens + # TODO: handle recommended videos title = self._search_regex( - rf'{re.escape(channel)} [^:]+: (.+)', self._get_text(notification, 'shortMessage'), + rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title, 'video title', default=None) - if title: - title = title.replace('\xad', '') # remove soft hyphens upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d') if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key()) else None) @@ -5778,7 +5838,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): if params: section = next((k for k, v in self._SECTIONS.items() if v == params), params) else: - section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower() + section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower() params = self._SECTIONS.get(section) if not params: section = None @@ -5925,14 +5985,43 @@ class YoutubeTruncatedURLIE(InfoExtractor): expected=True) -class YoutubeClipIE(InfoExtractor): +class YoutubeClipIE(YoutubeTabBaseInfoExtractor): IE_NAME = 'youtube:clip' - IE_DESC = False # Do not list - _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)' + _TESTS = [{ + # FIXME: Other metadata should be extracted from the clip, not from the base video + 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ', + 'info_dict': { + 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ', + 'ext': 'mp4', + 'section_start': 29.0, + 'section_end': 39.7, + 'duration': 10.7, + } + }] def _real_extract(self, url): - self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead') - return self.url_result(url, 'Generic') + clip_id = self._match_id(url) + _, data = self._extract_webpage(url, clip_id) + + video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId')) + if not video_id: + raise ExtractorError('Unable to find video ID') + + clip_data = traverse_obj(data, ( + 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer', + 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ..., + 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command', + 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False) + + return { + '_type': 'url_transparent', + 'url': f'https://www.youtube.com/watch?v={video_id}', + 'ie_key': YoutubeIE.ie_key(), + 'id': clip_id, + 'section_start': int(clip_data['startTimeMs']) / 1000, + 'section_end': int(clip_data['endTimeMs']) / 1000, + } class YoutubeTruncatedIDIE(InfoExtractor): diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py index 16f827a7e..2a7e85472 100644 --- a/yt_dlp/extractor/zattoo.py +++ b/yt_dlp/extractor/zattoo.py @@ -220,7 +220,7 @@ class ZattooPlatformBaseIE(InfoExtractor): 'id': channel_name, 'title': channel_name, 'is_live': True, - 'format': formats, + 'formats': formats, 'subtitles': subtitles } diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py index a388ff562..3a7f01f7a 100644 --- a/yt_dlp/extractor/zdf.py +++ b/yt_dlp/extractor/zdf.py @@ -69,6 +69,7 @@ class ZDFBaseIE(InfoExtractor): f.update({ 'url': format_url, 'format_id': join_nonempty('http', meta.get('type'), meta.get('quality')), + 'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None)) }) new_formats = [f] formats.extend(merge_dicts(f, { @@ -108,7 +109,7 @@ class ZDFBaseIE(InfoExtractor): 'class': track.get('class'), 'language': track.get('language'), }) - self._sort_formats(formats, ('hasaud', 'res', 'quality', 'language_preference')) + self._sort_formats(formats, ('tbr', 'res', 'quality', 'language_preference')) duration = float_or_none(try_get( ptmd, lambda x: x['attributes']['duration']['value']), scale=1000) @@ -187,7 +188,7 @@ class ZDFIE(ZDFBaseIE): }, }, { 'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html', - 'md5': '3d6f1049e9682178a11c54b91f3dd065', + 'md5': '57af4423db0455a3975d2dc4578536bc', 'info_dict': { 'ext': 'mp4', 'id': 'video_funk_1770473', @@ -230,6 +231,19 @@ class ZDFIE(ZDFBaseIE): 'timestamp': 1641355200, 'upload_date': '20220105', }, + 'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"' + }, { + 'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html', + 'info_dict': { + 'id': '191205_1800_sendung_sok8', + 'ext': 'mp4', + 'title': 'Das Geld anderer Leute', + 'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d', + 'duration': 2581.0, + 'timestamp': 1654790700, + 'upload_date': '20220609', + 'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350', + }, }] def _extract_entry(self, url, player, content, video_id): diff --git a/yt_dlp/extractor/zhihu.py b/yt_dlp/extractor/zhihu.py index 70eb3ccd1..d8d259dd6 100644 --- a/yt_dlp/extractor/zhihu.py +++ b/yt_dlp/extractor/zhihu.py @@ -58,7 +58,7 @@ class ZhihuIE(InfoExtractor): 'uploader': author.get('name'), 'timestamp': int_or_none(zvideo.get('published_at')), 'uploader_id': author.get('id'), - 'uploader_url': format_field(url_token, template='https://www.zhihu.com/people/%s'), + 'uploader_url': format_field(url_token, None, 'https://www.zhihu.com/people/%s'), 'duration': float_or_none(video.get('duration')), 'view_count': int_or_none(zvideo.get('play_count')), 'like_count': int_or_none(zvideo.get('liked_count')), |